1. 监控单文件

当被监控的文件有新内容写入,就会上传到 hdfs
bin/flume-ng agent --conf conf/ --name a1 --conf-file job/flume-file-hdfs.conf

  1. a1.sources = r1
  2. a1.sinks = k1
  3. a1.channels = c1
  4. a1.sources.r1.type = exec
  5. a1.sources.r1.command = tail -F /tmp/root/hive.log
  6. a1.channels.c1.type = memory
  7. a1.channels.c1.capacity = 1000
  8. a1.channels.c1.transactionCapacity = 100
  9. a1.sinks.k1.type = hdfs
  10. a1.sinks.k1.hdfs.path = hdfs://hd01:8020/flume/%Y%m%d/%H
  11. a1.sinks.k1.hdfs.filePrefix = logs-
  12. a1.sinks.k1.hdfs.round = true
  13. a1.sinks.k1.hdfs.roundValue = 1
  14. a1.sinks.k1.hdfs.roundUnit = hour
  15. a1.sinks.k1.hdfs.useLocalTimeStamp = true
  16. a1.sinks.k1.hdfs.batchSize = 100
  17. a1.sinks.k1.hdfs.fileType = DataStream
  18. a1.sinks.k1.hdfs.rollInterval = 60
  19. a1.sinks.k1.hdfs.rollSize = 134217700
  20. a1.sinks.k1.hdfs.rollCount = 0
  21. a1.sources.r1.channels = c1
  22. a1.sinks.k1.channel = c1

2. 监控目录新文件

当被监控的目录有新文件的创建,就会上传到 hdfs
bin/flume-ng agent --conf conf/ --name a1 --conf-file job/flume-dir-hdfs.conf

  1. a1.sources = r1
  2. a1.sinks = k1
  3. a1.channels = c1
  4. a1.sources.r1.type = spooldir
  5. a1.sources.r1.spoolDir = /usr/local/flume-1.9.0/dokshare/upload
  6. a1.sources.r1.fileSuffix = .COMPLETED
  7. a1.sources.r1.fileHeader = true
  8. a1.sources.r1.ignorePattern = ([^ ]*\.tmp)
  9. a1.channels.c1.type = memory
  10. a1.channels.c1.capacity = 1000
  11. a1.channels.c1.transactionCapacity = 100
  12. a1.sinks.k1.type = hdfs
  13. a1.sinks.k1.hdfs.path = hdfs://hd01:8020/flume/upload/%Y%m%d/%H
  14. a1.sinks.k1.hdfs.filePrefix = upload-
  15. a1.sinks.k1.hdfs.round = true
  16. a1.sinks.k1.hdfs.roundValue = 1
  17. a1.sinks.k1.hdfs.roundUnit = hour
  18. a1.sinks.k1.hdfs.useLocalTimeStamp = true
  19. a1.sinks.k1.hdfs.batchSize = 100
  20. a1.sinks.k1.hdfs.fileType = DataStream
  21. a1.sinks.k1.hdfs.rollInterval = 60
  22. a1.sinks.k1.hdfs.rollSize = 134217700
  23. a1.sinks.k1.hdfs.rollCount = 0
  24. a1.sources.r1.channels = c1
  25. a1.sinks.k1.channel = c1

3. 监控目录下多个追加吸入文件

监听多个实时追加的文件(可以实现断点续传),并上传到 hdfs
bin/flume-ng agent -c conf/ -n a1 -f job/flume-taildir-hdfs.conf

  1. a1.sources = r1
  2. a1.sinks = k1
  3. a1.channels = c1
  4. a1.sources.r1.type = TAILDIR
  5. a1.sources.r1.positionFile = /usr/local/flume-1.9.0/dokshare/taildir/tail_dir.json
  6. a1.sources.r1.filegroups = f1 f2
  7. a1.sources.r1.filegroups.f1 = /usr/local/flume-1.9.0/taildir/files/.*file.*
  8. a1.sources.r1.filegroups.f2 = /usr/local/flume-1.9.0/taildir/files2/.*log.*
  9. a1.channels.c1.type = memory
  10. a1.channels.c1.capacity = 1000
  11. a1.channels.c1.transactionCapacity = 100
  12. a1.sinks.k1.type = hdfs
  13. a1.sinks.k1.hdfs.path = hdfs://hd01:8020/flume/taildir/%Y%m%d/%H
  14. a1.sinks.k1.hdfs.filePrefix = upload-
  15. a1.sinks.k1.hdfs.round = true
  16. a1.sinks.k1.hdfs.roundValue = 1
  17. a1.sinks.k1.hdfs.roundUnit = hour
  18. a1.sinks.k1.hdfs.useLocalTimeStamp = true
  19. a1.sinks.k1.hdfs.batchSize = 100
  20. a1.sinks.k1.hdfs.fileType = DataStream
  21. a1.sinks.k1.hdfs.rollInterval = 60
  22. a1.sinks.k1.hdfs.rollSize = 134217700
  23. a1.sinks.k1.hdfs.rollCount = 0