1. 监控单文件
当被监控的文件有新内容写入,就会上传到 hdfsbin/flume-ng agent --conf conf/ --name a1 --conf-file job/flume-file-hdfs.conf
a1.sources = r1
a1.sinks = k1
a1.channels = c1
a1.sources.r1.type = exec
a1.sources.r1.command = tail -F /tmp/root/hive.log
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100
a1.sinks.k1.type = hdfs
a1.sinks.k1.hdfs.path = hdfs://hd01:8020/flume/%Y%m%d/%H
a1.sinks.k1.hdfs.filePrefix = logs-
a1.sinks.k1.hdfs.round = true
a1.sinks.k1.hdfs.roundValue = 1
a1.sinks.k1.hdfs.roundUnit = hour
a1.sinks.k1.hdfs.useLocalTimeStamp = true
a1.sinks.k1.hdfs.batchSize = 100
a1.sinks.k1.hdfs.fileType = DataStream
a1.sinks.k1.hdfs.rollInterval = 60
a1.sinks.k1.hdfs.rollSize = 134217700
a1.sinks.k1.hdfs.rollCount = 0
a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1
2. 监控目录新文件
当被监控的目录有新文件的创建,就会上传到 hdfsbin/flume-ng agent --conf conf/ --name a1 --conf-file job/flume-dir-hdfs.conf
a1.sources = r1
a1.sinks = k1
a1.channels = c1
a1.sources.r1.type = spooldir
a1.sources.r1.spoolDir = /usr/local/flume-1.9.0/dokshare/upload
a1.sources.r1.fileSuffix = .COMPLETED
a1.sources.r1.fileHeader = true
a1.sources.r1.ignorePattern = ([^ ]*\.tmp)
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100
a1.sinks.k1.type = hdfs
a1.sinks.k1.hdfs.path = hdfs://hd01:8020/flume/upload/%Y%m%d/%H
a1.sinks.k1.hdfs.filePrefix = upload-
a1.sinks.k1.hdfs.round = true
a1.sinks.k1.hdfs.roundValue = 1
a1.sinks.k1.hdfs.roundUnit = hour
a1.sinks.k1.hdfs.useLocalTimeStamp = true
a1.sinks.k1.hdfs.batchSize = 100
a1.sinks.k1.hdfs.fileType = DataStream
a1.sinks.k1.hdfs.rollInterval = 60
a1.sinks.k1.hdfs.rollSize = 134217700
a1.sinks.k1.hdfs.rollCount = 0
a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1
3. 监控目录下多个追加吸入文件
监听多个实时追加的文件(可以实现断点续传),并上传到 hdfsbin/flume-ng agent -c conf/ -n a1 -f job/flume-taildir-hdfs.conf
a1.sources = r1
a1.sinks = k1
a1.channels = c1
a1.sources.r1.type = TAILDIR
a1.sources.r1.positionFile = /usr/local/flume-1.9.0/dokshare/taildir/tail_dir.json
a1.sources.r1.filegroups = f1 f2
a1.sources.r1.filegroups.f1 = /usr/local/flume-1.9.0/taildir/files/.*file.*
a1.sources.r1.filegroups.f2 = /usr/local/flume-1.9.0/taildir/files2/.*log.*
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100
a1.sinks.k1.type = hdfs
a1.sinks.k1.hdfs.path = hdfs://hd01:8020/flume/taildir/%Y%m%d/%H
a1.sinks.k1.hdfs.filePrefix = upload-
a1.sinks.k1.hdfs.round = true
a1.sinks.k1.hdfs.roundValue = 1
a1.sinks.k1.hdfs.roundUnit = hour
a1.sinks.k1.hdfs.useLocalTimeStamp = true
a1.sinks.k1.hdfs.batchSize = 100
a1.sinks.k1.hdfs.fileType = DataStream
a1.sinks.k1.hdfs.rollInterval = 60
a1.sinks.k1.hdfs.rollSize = 134217700
a1.sinks.k1.hdfs.rollCount = 0