0. 目标

  1. 基于 hadoop docker 集群搭建 ,在 hd01 搭建安装 flume 服务端

    1. 文件准备

  2. 更新 dockerfileHDFS 增加 flume

  3. 更新 docker-compose.yml 增加 flume
  4. 文件 element/configure/ 增加 flume-1.9.0/ 目录

    1. configure/
      1. flume-1.9.0/
        1. dokshare/
        2. job/
        3. lib/
    2. images/
      1. apache-flume-1.9.0-bin.tar.gz

        1.1 dockerfileHDFS

  5. 增加安装 nc 命令

  6. 增加 flume ```dockerfile FROM centos:7

SSH + net-tools + snappy

RUN yum install -y openssh-server sudo net-tools snappy.x86_64 nc RUN sed -i ‘s/UsePAM yes/UsePAM no/g’ /etc/ssh/sshd_config RUN echo “PermitRootLogin yes” >> /etc/ssh/sshd_config RUN yum install -y openssh-clients

RUN echo “root:123456” | chpasswd RUN echo “root ALL=(ALL) ALL” >> /etc/sudoers RUN ssh-keygen -t dsa -f /etc/ssh/ssh_host_dsa_key RUN ssh-keygen -t rsa -f /etc/ssh/ssh_host_rsa_key

RUN mkdir /var/run/sshd

JAVA

ADD element/images/jdk-8u141-linux-x64.tar.gz /usr/local/ RUN mv /usr/local/jdk1.8.0_141 /usr/local/jdk1.8 ENV JAVA_HOME /usr/local/jdk1.8 ENV PATH $JAVA_HOME/bin:$PATH

HADOOP

ADD element/images/hadoop-3.3.0.tar.gz /usr/local RUN mv /usr/local/hadoop-3.3.0 /usr/local/hadoop ENV HADOOP_HOME /usr/local/hadoop ENV PATH $HADOOP_HOME/bin:$PATH

HIVE

ADD element/images/apache-hive-3.1.2-bin.tar.gz /usr/local RUN mv /usr/local/apache-hive-3.1.2-bin /usr/local/hive-3.1.2 ENV HIVE_HOME /usr/local/hive-3.1.2 ENV PATH $HIVE_HOME/bin:$PATH

FLUME

ADD element/images/apache-flume-1.9.0-bin.tar.gz /usr/local RUN mv /usr/local/apache-flume-1.9.0-bin /usr/local/flume-1.9.0

RUN yum install -y which sudo

RUN mkdir /mysh

EXPOSE 22 CMD [“/usr/sbin/sshd”, “-D”]

  1. <a name="Mnd4i"></a>
  2. ## 1.2 docker-compose.yml
  3. 1. 增加 flume 目录映射
  4. ```yaml
  5. version: '3.5'
  6. services:
  7. hd01:
  8. image: my-hadoop:3.3.0
  9. container_name: hd01
  10. hostname: hd01
  11. extra_hosts:
  12. - "hd-mysql57-01:172.24.0.6"
  13. - "hd02:172.24.0.12"
  14. - "hd03:172.24.0.13"
  15. networks:
  16. hd-network:
  17. ipv4_address: 172.24.0.11
  18. volumes:
  19. - ${PWD}/element/configure/hadoop/etc-hd01:/usr/local/hadoop/etc
  20. - ${PWD}/element/configure/hadoop/dokshare:/usr/local/hadoop/dokshare
  21. - ${PWD}/element/configure/hive-3.1.2/conf:/usr/local/hive-3.1.2/conf
  22. - ${PWD}/element/configure/hive-3.1.2/lib:/usr/local/hive-3.1.2/lib
  23. - ${PWD}/element/configure/hive-3.1.2/dokshare:/usr/local/hive-3.1.2/dokshare
  24. - ${PWD}/element/mysh:/mysh
  25. - ${PWD}/element/configure/flume-1.9.0/job:/usr/local/flume-1.9.0/job
  26. - ${PWD}/element/configure/flume-1.9.0/lib:/usr/local/flume-1.9.0/lib
  27. - ${PWD}/element/configure/flume-1.9.0/dokshare:/usr/local/flume-1.9.0/dokshare
  28. environment:
  29. - HDFS_NAMENODE_USER=root
  30. - HDFS_DATANODE_USER=root
  31. - HDFS_SECONDARYNAMENODE_USER=root
  32. - YARN_RESOURCEMANAGER_USER=root
  33. - YARN_NODEMANAGER_USER=root
  34. hd02:
  35. image: my-hadoop:3.3.0
  36. container_name: hd02
  37. hostname: hd02
  38. extra_hosts:
  39. - "hd-mysql57-01:172.24.0.6"
  40. - "hd01:172.24.0.11"
  41. - "hd03:172.24.0.13"
  42. networks:
  43. hd-network:
  44. ipv4_address: 172.24.0.12
  45. volumes:
  46. - ${PWD}/element/configure/hadoop/etc-hd02:/usr/local/hadoop/etc
  47. - ${PWD}/element/configure/hadoop/dokshare:/usr/local/hadoop/dokshare
  48. - ${PWD}/element/mysh:/mysh
  49. environment:
  50. - HDFS_NAMENODE_USER=root
  51. - HDFS_DATANODE_USER=root
  52. - HDFS_SECONDARYNAMENODE_USER=root
  53. - YARN_RESOURCEMANAGER_USER=root
  54. - YARN_NODEMANAGER_USER=root
  55. hd03:
  56. image: my-hadoop:3.3.0
  57. container_name: hd03
  58. hostname: hd03
  59. extra_hosts:
  60. - "hd-mysql57-01:172.24.0.6"
  61. - "hd01:172.24.0.11"
  62. - "hd02:172.24.0.12"
  63. networks:
  64. hd-network:
  65. ipv4_address: 172.24.0.13
  66. volumes:
  67. - ${PWD}/element/configure/hadoop/etc-hd03:/usr/local/hadoop/etc
  68. - ${PWD}/element/configure/hadoop/dokshare:/usr/local/hadoop/dokshare
  69. - ${PWD}/element/mysh:/mysh
  70. environment:
  71. - HDFS_NAMENODE_USER=root
  72. - HDFS_DATANODE_USER=root
  73. - HDFS_SECONDARYNAMENODE_USER=root
  74. - YARN_RESOURCEMANAGER_USER=root
  75. - YARN_NODEMANAGER_USER=root
  76. hd-mysql57-01:
  77. image: mysql:5.7
  78. container_name: hd-mysql57-01
  79. hostname: hd-mysql57-01
  80. networks:
  81. hd-network:
  82. ipv4_address: 172.24.0.6
  83. environment:
  84. #最好使用此设定时区,其它静像也可以使用
  85. - TZ=CST-8
  86. - MYSQL_ROOT_PASSWORD=123456
  87. - MYSQL_DATABASE=hive
  88. - MYSQL_USER=my_user
  89. - MYSQL_PASSWORD=my_pw
  90. #可以加--default-time-zone='+8:00'设定时区
  91. command: --character-set-server=utf8mb4 --collation-server=utf8mb4_unicode_ci
  92. volumes:
  93. #本地文件目录
  94. - ${PWD}/element/mysql57/data/:/var/lib/mysql
  95. networks:
  96. hd-network:
  97. name: hd-network
  98. ipam:
  99. config:
  100. - subnet: 172.24.0.0/24

1.3 element/ 目录

1.3.1 element/configure/flume-1.9.0/ 目录

  1. dokshare/ 目录

这个目录主要作用是共享文件,共享后续操作使用hive需要用到的资料。

  1. job/ 目录

共享每个 agent 配置文件

  1. lib/ 目录
    1. 由于 flume 和 hadoop 使用的 lib 不同,会出现问题,所以通过 docker 映射 lib/ 目录,并删除 lib/ 目录下的文件 guava-11.0.2.jar

      1.3.2 element/images/ 目录

      下载文件:apache-flume-1.9.0-bin.tar.gz

      2. 启动

      案例:https://www.yuque.com/weghp/bigdata/rwnd0i