0. 目标
基于 hadoop docker 集群搭建 ,在 hd01 搭建安装 flume 服务端
1. 文件准备
更新
dockerfileHDFS
增加 flume- 更新
docker-compose.yml
增加 flume 文件
element/configure/
增加flume-1.9.0/
目录增加安装 nc 命令
- 增加 flume ```dockerfile FROM centos:7
SSH + net-tools + snappy
RUN yum install -y openssh-server sudo net-tools snappy.x86_64 nc RUN sed -i ‘s/UsePAM yes/UsePAM no/g’ /etc/ssh/sshd_config RUN echo “PermitRootLogin yes” >> /etc/ssh/sshd_config RUN yum install -y openssh-clients
RUN echo “root:123456” | chpasswd RUN echo “root ALL=(ALL) ALL” >> /etc/sudoers RUN ssh-keygen -t dsa -f /etc/ssh/ssh_host_dsa_key RUN ssh-keygen -t rsa -f /etc/ssh/ssh_host_rsa_key
RUN mkdir /var/run/sshd
JAVA
ADD element/images/jdk-8u141-linux-x64.tar.gz /usr/local/ RUN mv /usr/local/jdk1.8.0_141 /usr/local/jdk1.8 ENV JAVA_HOME /usr/local/jdk1.8 ENV PATH $JAVA_HOME/bin:$PATH
HADOOP
ADD element/images/hadoop-3.3.0.tar.gz /usr/local RUN mv /usr/local/hadoop-3.3.0 /usr/local/hadoop ENV HADOOP_HOME /usr/local/hadoop ENV PATH $HADOOP_HOME/bin:$PATH
HIVE
ADD element/images/apache-hive-3.1.2-bin.tar.gz /usr/local RUN mv /usr/local/apache-hive-3.1.2-bin /usr/local/hive-3.1.2 ENV HIVE_HOME /usr/local/hive-3.1.2 ENV PATH $HIVE_HOME/bin:$PATH
FLUME
ADD element/images/apache-flume-1.9.0-bin.tar.gz /usr/local RUN mv /usr/local/apache-flume-1.9.0-bin /usr/local/flume-1.9.0
RUN yum install -y which sudo
RUN mkdir /mysh
EXPOSE 22 CMD [“/usr/sbin/sshd”, “-D”]
<a name="Mnd4i"></a>
## 1.2 docker-compose.yml
1. 增加 flume 目录映射
```yaml
version: '3.5'
services:
hd01:
image: my-hadoop:3.3.0
container_name: hd01
hostname: hd01
extra_hosts:
- "hd-mysql57-01:172.24.0.6"
- "hd02:172.24.0.12"
- "hd03:172.24.0.13"
networks:
hd-network:
ipv4_address: 172.24.0.11
volumes:
- ${PWD}/element/configure/hadoop/etc-hd01:/usr/local/hadoop/etc
- ${PWD}/element/configure/hadoop/dokshare:/usr/local/hadoop/dokshare
- ${PWD}/element/configure/hive-3.1.2/conf:/usr/local/hive-3.1.2/conf
- ${PWD}/element/configure/hive-3.1.2/lib:/usr/local/hive-3.1.2/lib
- ${PWD}/element/configure/hive-3.1.2/dokshare:/usr/local/hive-3.1.2/dokshare
- ${PWD}/element/mysh:/mysh
- ${PWD}/element/configure/flume-1.9.0/job:/usr/local/flume-1.9.0/job
- ${PWD}/element/configure/flume-1.9.0/lib:/usr/local/flume-1.9.0/lib
- ${PWD}/element/configure/flume-1.9.0/dokshare:/usr/local/flume-1.9.0/dokshare
environment:
- HDFS_NAMENODE_USER=root
- HDFS_DATANODE_USER=root
- HDFS_SECONDARYNAMENODE_USER=root
- YARN_RESOURCEMANAGER_USER=root
- YARN_NODEMANAGER_USER=root
hd02:
image: my-hadoop:3.3.0
container_name: hd02
hostname: hd02
extra_hosts:
- "hd-mysql57-01:172.24.0.6"
- "hd01:172.24.0.11"
- "hd03:172.24.0.13"
networks:
hd-network:
ipv4_address: 172.24.0.12
volumes:
- ${PWD}/element/configure/hadoop/etc-hd02:/usr/local/hadoop/etc
- ${PWD}/element/configure/hadoop/dokshare:/usr/local/hadoop/dokshare
- ${PWD}/element/mysh:/mysh
environment:
- HDFS_NAMENODE_USER=root
- HDFS_DATANODE_USER=root
- HDFS_SECONDARYNAMENODE_USER=root
- YARN_RESOURCEMANAGER_USER=root
- YARN_NODEMANAGER_USER=root
hd03:
image: my-hadoop:3.3.0
container_name: hd03
hostname: hd03
extra_hosts:
- "hd-mysql57-01:172.24.0.6"
- "hd01:172.24.0.11"
- "hd02:172.24.0.12"
networks:
hd-network:
ipv4_address: 172.24.0.13
volumes:
- ${PWD}/element/configure/hadoop/etc-hd03:/usr/local/hadoop/etc
- ${PWD}/element/configure/hadoop/dokshare:/usr/local/hadoop/dokshare
- ${PWD}/element/mysh:/mysh
environment:
- HDFS_NAMENODE_USER=root
- HDFS_DATANODE_USER=root
- HDFS_SECONDARYNAMENODE_USER=root
- YARN_RESOURCEMANAGER_USER=root
- YARN_NODEMANAGER_USER=root
hd-mysql57-01:
image: mysql:5.7
container_name: hd-mysql57-01
hostname: hd-mysql57-01
networks:
hd-network:
ipv4_address: 172.24.0.6
environment:
#最好使用此设定时区,其它静像也可以使用
- TZ=CST-8
- MYSQL_ROOT_PASSWORD=123456
- MYSQL_DATABASE=hive
- MYSQL_USER=my_user
- MYSQL_PASSWORD=my_pw
#可以加--default-time-zone='+8:00'设定时区
command: --character-set-server=utf8mb4 --collation-server=utf8mb4_unicode_ci
volumes:
#本地文件目录
- ${PWD}/element/mysql57/data/:/var/lib/mysql
networks:
hd-network:
name: hd-network
ipam:
config:
- subnet: 172.24.0.0/24
1.3 element/ 目录
1.3.1 element/configure/flume-1.9.0/ 目录
- dokshare/ 目录
这个目录主要作用是共享文件,共享后续操作使用hive需要用到的资料。
- job/ 目录
共享每个 agent 配置文件
- lib/ 目录
- 由于 flume 和 hadoop 使用的 lib 不同,会出现问题,所以通过 docker 映射 lib/ 目录,并删除 lib/ 目录下的文件 guava-11.0.2.jar
1.3.2 element/images/ 目录
下载文件:apache-flume-1.9.0-bin.tar.gz
2. 启动
案例:https://www.yuque.com/weghp/bigdata/rwnd0i
- 由于 flume 和 hadoop 使用的 lib 不同,会出现问题,所以通过 docker 映射 lib/ 目录,并删除 lib/ 目录下的文件 guava-11.0.2.jar