image.png

    1. Taildir Source维护了一个json格式的position File,其会定期的往position File中更新每个文件读取到的最新的位置,因此能够实现断点续传
    2. vim flume-taildir-hdfs.conf
    3. a3.sources = r3
    4. a3.sinks = k3
    5. a3.channels = c3
    6. # Describe/configure the source
    7. a3.sources.r3.type = TAILDIR
    8. #指定position_file位置
    9. a3.sources.r3.positionFile = /opt/egg/apache-flume-1.7.0-bin/upload/tail_dir.json
    10. #定义监控目录文件
    11. a3.sources.r3.filegroups = f1 f2
    12. a3.sources.r3.filegroups.f1 = /opt/egg/apache-flume-1.7.0-bin/upload/.*file.*
    13. a3.sources.r3.filegroups.f2 = /opt/module/flume/files/.*log.*
    14. # Describe the sink
    15. a3.sinks.k3.type = hdfs
    16. a3.sinks.k3.hdfs.path = hdfs://hadoop1:9000/flume/upload/%Y%m%d/%H
    17. #上传文件的前缀
    18. a3.sinks.k3.hdfs.filePrefix = tail-
    19. #是否按照时间滚动文件夹
    20. a3.sinks.k3.hdfs.round = true
    21. #多少时间单位创建一个新的文件夹
    22. a3.sinks.k3.hdfs.roundValue = 1
    23. #重新定义时间单位
    24. a3.sinks.k3.hdfs.roundUnit = hour
    25. #是否使用本地时间戳
    26. a3.sinks.k3.hdfs.useLocalTimeStamp = true
    27. #积攒多少个Event才flush到HDFS一次
    28. a3.sinks.k3.hdfs.batchSize = 100
    29. #设置文件类型,可支持压缩
    30. a3.sinks.k3.hdfs.fileType = DataStream
    31. #多久生成一个新的文件
    32. a3.sinks.k3.hdfs.rollInterval = 60
    33. #设置每个文件的滚动大小大概是128M
    34. a3.sinks.k3.hdfs.rollSize = 134217700
    35. #文件的滚动与Event数量无关
    36. a3.sinks.k3.hdfs.rollCount = 0
    37. # Use a channel which buffers events in memory
    38. a3.channels.c3.type = memory
    39. a3.channels.c3.capacity = 1000
    40. a3.channels.c3.transactionCapacity = 100
    41. # Bind the source and sink to the channel
    42. a3.sources.r3.channels = c3
    43. a3.sinks.k3.channel = c3
    44. 启动flume
    45. bin/flume-ng agent --conf conf/ --name a3 --conf-file job/flume-taildir-hdfs.conf
    46. /opt/egg/apache-flume-1.7.0-bin/upload
    47. echo hello >> file1.txt
    48. echo atguigu >> file2.txt
    49. 查看hdfs会看到两个linux 文件输出到了一个hdfs文件里
    50. -rw-r--r-- 3 root supergroup 28 2019-09-01 16:29 /flume/upload/20190901/16/tail-.1567326496953