0. 目标
- 搭建一个有3台hadoop的集群,包括 (NN + 2NN + historyserver)
hd01, hd02, hd03 分别安排:
dockerfileHDFS
- Makefile
- docker-compose.yml
- element/ 目录
FROM centos:7
# SSH
RUN yum install -y openssh-server sudo
RUN sed -i 's/UsePAM yes/UsePAM no/g' /etc/ssh/sshd_config
RUN echo "PermitRootLogin yes" >> /etc/ssh/sshd_config
RUN yum install -y openssh-clients
RUN echo "root:123456" | chpasswd
RUN echo "root ALL=(ALL) ALL" >> /etc/sudoers
RUN ssh-keygen -t dsa -f /etc/ssh/ssh_host_dsa_key
RUN ssh-keygen -t rsa -f /etc/ssh/ssh_host_rsa_key
RUN mkdir /var/run/sshd
# JAVA
ADD element/images/jdk-8u141-linux-x64.tar.gz /usr/local/
RUN mv /usr/local/jdk1.8.0_141 /usr/local/jdk1.8
ENV JAVA_HOME /usr/local/jdk1.8
ENV PATH $JAVA_HOME/bin:$PATH
# HADOOP
ADD element/images/hadoop-3.3.0.tar.gz /usr/local
RUN mv /usr/local/hadoop-3.3.0 /usr/local/hadoop
ENV HADOOP_HOME /usr/local/hadoop
ENV PATH $HADOOP_HOME/bin:$PATH
RUN yum install -y which sudo
RUN mkdir /mysh
EXPOSE 22
CMD ["/usr/sbin/sshd", "-D"]
2. Makefile
.PHONY: config
config:
sudo rm -rf element/configure/hadoop/etc-hd01 element/configure/hadoop/etc-hd02 element/configure/hadoop/etc-hd03
sudo mkdir -p element/configure/hadoop/etc-hd01 element/configure/hadoop/etc-hd02 element/configure/hadoop/etc-hd03
sudo cp -r element/configure/hadoop/etc/* element/configure/hadoop/etc-hd01/
sudo cp -r element/configure/hadoop/etc/* element/configure/hadoop/etc-hd02/
sudo cp -r element/configure/hadoop/etc/* element/configure/hadoop/etc-hd03/
.PHONY: up
up:
docker-compose up -d
docker exec -it hd01 sh /mysh/syncSshKeygen.sh
docker exec -it hd02 sh /mysh/syncSshKeygen.sh
docker exec -it hd03 sh /mysh/syncSshKeygen.sh
.PHONY: start
start:
docker-compose start
.PHONY: down
down:
docker-compose down
3. docker-compose.yml
version: '3.5'
services:
hd01:
image: my-hadoop:3.3.0
container_name: hd01
hostname: hd01
extra_hosts:
- "hd02:172.24.0.12"
- "hd03:172.24.0.13"
networks:
hd-network:
ipv4_address: 172.24.0.11
volumes:
- ${PWD}/element/configure/hadoop/etc-hd01:/usr/local/hadoop/etc
- ${PWD}/element/mysh:/mysh
environment:
- HDFS_NAMENODE_USER=root
- HDFS_DATANODE_USER=root
- HDFS_SECONDARYNAMENODE_USER=root
- YARN_RESOURCEMANAGER_USER=root
- YARN_NODEMANAGER_USER=root
hd02:
image: my-hadoop:3.3.0
container_name: hd02
hostname: hd02
extra_hosts:
- "hd01:172.24.0.11"
- "hd03:172.24.0.13"
networks:
hd-network:
ipv4_address: 172.24.0.12
volumes:
- ${PWD}/element/configure/hadoop/etc-hd02:/usr/local/hadoop/etc
- ${PWD}/element/mysh:/mysh
environment:
- HDFS_NAMENODE_USER=root
- HDFS_DATANODE_USER=root
- HDFS_SECONDARYNAMENODE_USER=root
- YARN_RESOURCEMANAGER_USER=root
- YARN_NODEMANAGER_USER=root
hd03:
image: my-hadoop:3.3.0
container_name: hd03
hostname: hd03
extra_hosts:
- "hd01:172.24.0.11"
- "hd02:172.24.0.12"
networks:
hd-network:
ipv4_address: 172.24.0.13
volumes:
- ${PWD}/element/configure/hadoop/etc-hd03:/usr/local/hadoop/etc
- ${PWD}/element/mysh:/mysh
environment:
- HDFS_NAMENODE_USER=root
- HDFS_DATANODE_USER=root
- HDFS_SECONDARYNAMENODE_USER=root
- YARN_RESOURCEMANAGER_USER=root
- YARN_NODEMANAGER_USER=root
networks:
hd-network:
name: hd-network
ipam:
config:
- subnet: 172.24.0.0/24
4. element/ 目录
- element/configure/ 目录
这个目录主要是为了方便后续进行配置修改和共用而映射出来的。
其中重要的文件有 :core-site.xml, hdfs-site.xml, yarn-site.xml
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<!-- 指定NameNode的地址 -->
<property>
<name>fs.defaultFS</name>
<value>hdfs://hd01:8020</value>
</property>
<!-- 指定hadoop数据的存储目录 -->
<property>
<name>hadoop.tmp.dir</name>
<value>/usr/local/hadoop/data</value>
</property>
<!-- 代理 -->
<property>
<name>hadoop.proxyuser.root.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.root.groups</name>
<value>*</value>
</property>
</configuration>
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<!-- nn web端访问地址 -->
<property>
<name>dfs.namenode.http-address</name>
<value>hd01:9870</value>
</property>
<!-- 2nn web端访问地址 -->
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>hd03:9868</value>
</property>
</configuration>
<?xml version="1.0"?>
<configuration>
<!-- 指定MR走shuffle -->
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<!-- 指定ResourceManager的地址 -->
<property>
<name>yarn.resourcemanager.hostname</name>
<value>hd02</value>
</property>
<!-- 环境变量的继承 -->
<property>
<name>yarn.nodemanager.env-whitelist</name>
<value>JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_MAPRED_HOME</value>
</property>
<!-- 日志聚集 -->
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<!-- 设置日志聚集服务器地址 -->
<property>
<name>yarn.log.server.url</name>
<value>http://hd01:19888/jobhistory/logs</value>
</property>
<!-- 设置日志保留时间 -->
<property>
<name>yarn.log-aggregation.retain-seconds</name>
<value>604800</value>
</property>
</configuration>
- element/images/ 目录
hadoop 依赖 java,目前都是依赖jdk1.8。
- element/mysh/ 目录
.sh
脚本用来做集群运维使用,有很多,后续继续更新文档再写出来。syncSshKeygen.sh
用来进行免密登陆配置,hadoop集群需要服务器间能免密访问。
NETS=("hd01" "hd02" "hd03")
USER="root"
PASSWORD="123456"
ssh-keygen -t rsa -P '' -f ~/.ssh/id_rsa &> /dev/null
sed -i '/StrictHostKeyChecking/c StrictHostKeyChecking no' /etc/ssh/ssh_config
rpm -q sshpass &> /dev/null || yum -y install sshpass &> /dev/null
for NET in ${NETS[@]}
do
sshpass -p $PASSWORD ssh-copy-id -i ${USER}@${NET}
done
wait
2. 启动
- 在宿主机
[kali] make config up
- 初始化 hdfs
[hd01] hdfs namenode -format
- 启动 dfs
[hd01] sbin/start-dfs.sh
- 启动 yarn
[hd02] sbin/start-yarn.sh
- 启动 历史服务器
[hd01] bin/mapred --daemon start historyserver
3. 打开监控页面
Browse Directory:http://hd01:9870/explorer.html#
All Applications: http://hd02:8088/cluster
JobHistory: http://hd01:19888/jobhistory