
使用Flume-1监控文件变动,Flume-1将变动内容传递给Flume-2,Flume-2负责存储到HDFS。同时Flume-1将变动内容传递给Flume-3,Flume-3负责输出到Local FileSystem。mkdir -p /opt/egg/apache-flume-1.7.0-bin/group1配置1个接收日志文件的source和两个channel、两个sink,分别输送给flume-flume-hdfs和flume-flume-dir。vim flume-file-flume.conf# Name the components on this agenta1.sources = r1a1.sinks = k1 k2a1.channels = c1 c2# 将数据流复制给所有channela1.sources.r1.selector.type = replicating# Describe/configure the sourcea1.sources.r1.type = execa1.sources.r1.command = tail -F /opt/ha/hadoop-2.7.2/logs/hadoop-root-namenode-hadoop1.loga1.sources.r1.shell = /bin/bash -c# Describe the sink# sink端的avro是一个数据发送者a1.sinks.k1.type = avroa1.sinks.k1.hostname = hadoop1a1.sinks.k1.port = 4141a1.sinks.k2.type = avroa1.sinks.k2.hostname = hadoop1a1.sinks.k2.port = 4142# Describe the channela1.channels.c1.type = memorya1.channels.c1.capacity = 1000a1.channels.c1.transactionCapacity = 100a1.channels.c2.type = memorya1.channels.c2.capacity = 1000a1.channels.c2.transactionCapacity = 100# Bind the source and sink to the channela1.sources.r1.channels = c1 c2a1.sinks.k1.channel = c1a1.sinks.k2.channel = c2输出是到HDFS的Sinkvim flume-flume-hdfs.conf# Name the components on this agenta2.sources = r1a2.sinks = k1a2.channels = c1# Describe/configure the source# source端的avro是一个数据接收服务a2.sources.r1.type = avroa2.sources.r1.bind = hadoop1a2.sources.r1.port = 4141# Describe the sinka2.sinks.k1.type = hdfsa2.sinks.k1.hdfs.path = hdfs://hadoop1:9000/flume2/%Y%m%d/%H#上传文件的前缀a2.sinks.k1.hdfs.filePrefix = flume2-#是否按照时间滚动文件夹a2.sinks.k1.hdfs.round = true#多少时间单位创建一个新的文件夹a2.sinks.k1.hdfs.roundValue = 1#重新定义时间单位a2.sinks.k1.hdfs.roundUnit = hour#是否使用本地时间戳a2.sinks.k1.hdfs.useLocalTimeStamp = true#积攒多少个Event才flush到HDFS一次a2.sinks.k1.hdfs.batchSize = 100#设置文件类型,可支持压缩a2.sinks.k1.hdfs.fileType = DataStream#多久生成一个新的文件a2.sinks.k1.hdfs.rollInterval = 600#设置每个文件的滚动大小大概是128Ma2.sinks.k1.hdfs.rollSize = 134217700#文件的滚动与Event数量无关a2.sinks.k1.hdfs.rollCount = 0# Describe the channela2.channels.c1.type = memorya2.channels.c1.capacity = 1000a2.channels.c1.transactionCapacity = 100# Bind the source and sink to the channela2.sources.r1.channels = c1a2.sinks.k1.channel = c1输出是到本地目录的Sink:输出的本地目录必须是已经存在的目录,如果该目录不存在,并不会创建新的目录vim flume-flume-dir.conf# Name the components on this agenta3.sources = r1a3.sinks = k1a3.channels = c2# Describe/configure the sourcea3.sources.r1.type = avroa3.sources.r1.bind = hadoop1a3.sources.r1.port = 4142# Describe the sinka3.sinks.k1.type = file_rolla3.sinks.k1.sink.directory = /opt/egg/apache-flume-1.7.0-bin/group1# Describe the channela3.channels.c2.type = memorya3.channels.c2.capacity = 1000a3.channels.c2.transactionCapacity = 100# Bind the source and sink to the channela3.sources.r1.channels = c2a3.sinks.k1.channel = c2启动flumebin/flume-ng agent --conf conf/ --name a3 --conf-file job/group1/flume-flume-dir.confbin/flume-ng agent --conf conf/ --name a2 --conf-file job/group1/flume-flume-hdfs.confbin/flume-ng agent --conf conf/ --name a1 --conf-file job/group1/flume-file-flume.conf制造一些hive日志sbin/start-dfs.shsbin/start-yarn.shbin/hive