方式1:直接使用镜像启动
1. 获取镜像
# 查看可用的稳定版本
docker search hadoop
# Hadoop-2.7.0(镜像:1.77GB)
docker pull sequenceiq/hadoop-docker:2.7.0
docker image ls |grep hadoop
2. 启动容器
# -i:以交互模式运行容器;-t:为容器重新分配一个伪输入终端;-d:后台运行容器,并返回容器ID;
docker run -itd --name hadoop_docker -p 50070:50070 -p 9000:9000 -p 8088:8088 -p 8040:8040 -p 8042:8042 -p 49707:49707 -p 50010:50010 -p 50075:50075 -p 50090:50090 sequenceiq/hadoop-docker:2.7.0 /etc/bootstrap.sh -bash
docker ps |grep hadoop_docker
docker start hadoop_docker
docker restart hadoop_docker
docker stop hadoop_docker
docker rm hadoop_docker
3. 验证
docker exec -it hadoop_docker /bin/bash
cd $HADOOP_PREFIX
# MR作业1(grep)
bin/hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.0.jar grep input output 'dfs[a-z.]+'
# 检查MR输出
bin/hdfs dfs -cat output/*
# MR作业2(计算Pi)
bin/hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.0.jar pi 11 24
# MR作业3(wordcount)
# 数据准备
cat > word.txt <<EOF
hadoop java fink
mysql hadoop hive
spark hive hadoop
flink hadoop
EOF
# 创建HDFS输入目录
bin/hadoop fs -mkdir -p /wordcount/input
# 上传数据
bin/hadoop fs -put word.txt /wordcount/input
bin/hadoop fs -cat /wordcount/input/word.txt
# 启动作业
bin/hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.0.jar wordcount /wordcount/input /wordcount/output
# 查看输出内容
bin/hadoop fs -cat /wordcount/output/part-r-00000
Web UI
- NameNode WebUI:http://192.168.216.100:50070
- ResourceManager WebUI:http://192.168.216.100:8088
3. 备份
# 导出镜像(1.68GB)
docker save -o docker-hadoop-2.7.0-solo.save.tar sequenceiq/hadoop-docker:latest
docker save > docker-hadoop-2.7.0-solo.save.tar sequenceiq/hadoop-docker:latest
# 加载镜像
docker load -i docker-hadoop-2.7.0-solo.save.tar
docker load < docker-hadoop-2.7.0-solo.save.tar
# 运行容器
docker run -itd --name hadoop_docker -p 50070:50070 -p 9000:9000 -p 8088:8088 -p 8040:8040 -p 8042:8042 -p 49707:49707 -p 50010:50010 -p 50075:50075 -p 50090:50090 sequenceiq/hadoop-docker:latest /etc/bootstrap.sh -bash
方式2:Dockerfile构建启动
1. 软件包准备
JDK
版本:jdk-8u221-linux-x64.tar.gz
下载:https://www.oracle.com/technetwork/pt/java/javase/downloads/jdk8-downloads-2133151.html?printOnly=1
- Hadoop
版本:hadoop-2.7.2.tar.gz
下载:http://archive.apache.org/dist/hadoop/core
2. 获取镜像(选)
# 查看可用的稳定版本
docker search centos
docker pull centos:centos7.7.1908
docker image ls |grep centos
3. 编写Dockerfile
vim /share/Hadoop-Single-Dockerfile
内容如下:
FROM centos:centos7.7.1908
MAINTAINER "polaris<450733605@qq.com>"
LABEL name="Hadoop-Single" \
build_date="2020-06-21 11:24:12" \
wechat="polarisaza" \
personal_site="https://www.yuque.com/polaris-docs"
# 构建容器时需要运行的命令
RUN yum -y install openssh-server openssh-clients sudo vim net-tools
# 生成相应的主机密钥文件
RUN ssh-keygen -t rsa -f /etc/ssh/ssh_host_rsa_key
RUN ssh-keygen -t ecdsa -f /etc/ssh/ssh_host_ecdsa_key
RUN ssh-keygen -t ed25519 -f /etc/ssh/ssh_host_ed25519_key
# 创建自定义组和用户、设置密码并授予root权限
RUN groupadd -g 1124 bigdata && useradd -m -u 1124 -g bigdata hadoop
RUN echo "hadoop:hadoop" | chpasswd
RUN echo "hadoop ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers
# 创建模块和软件目录并修改权限
RUN mkdir /opt/software && mkdir /opt/moudle
# 将宿主机的文件拷贝至镜像(ADD会自动解压)
ADD jdk-8u221-linux-x64.tar.gz /opt/moudle
ADD hadoop-2.7.2.tar.gz /opt/software
RUN chown -R hadoop:bigdata /opt/moudle && chown -R hadoop:bigdata /opt/software
# 设置环境变量
ENV CENTOS_DEFAULT_HOME /root
ENV JAVA_HOME /opt/moudle/jdk1.8.0_221
ENV HADOOP_HOME /opt/software/hadoop-2.7.2
ENV JRE_HOME ${JAVA_HOME}/jre
ENV CLASSPATH ${JAVA_HOME}/lib:${JRE_HOME}/lib
ENV PATH ${JAVA_HOME}/bin:${HADOOP_HOME}/bin:${HADOOP_HOME}/sbin:$PATH
# 终端默认登录进来的工作目录
WORKDIR $CENTOS_DEFAULT_HOME
# 启动sshd服务并且暴露22端口
EXPOSE 22
CMD ["/usr/sbin/sshd", "-D"]
4. 构建镜像
构建前请确保将相关安装包(jdk--linux-x64.tar.gz、hadoop-.tar.gz)上传至Dockerfile同一目录或者其下级目录下。
cd /share
docker build -f Hadoop-Single-Dockerfile -t registry.cn-qingdao.aliyuncs.com/polaris-docker/hadoop-single:2.7.2 .
docker images | grep hadoop
5. 自定义网络
默认情况下启动的Docker容器,都是使用bridge,Docker安装时创建的桥接网络,每次Docker容器重启时,会按照顺序获取对应的IP地址,这个就导致重启后容器的IP地址就变了。因此我们需要创建自定义网络。
docker network ls
docker network inspect ad9f70638206
docker network create --subnet=172.24.0.0/24 hadoop-network
6. 容器配置
1. 进入容器
docker run -d --name hadoop --hostname hadoop --net hadoop-network --ip 172.24.0.2 -P -p 50070:50070 -p 8088:8088 -p 19888:19888 registry.cn-qingdao.aliyuncs.com/polaris-docker/hadoop-single:2.7.2
docker ps | grep hadoop
# 查看容器IP
docker inspect hadoop | grep -i ip
docker exec -it --privileged=true hadoop /bin/bash
2. 免密配置
su hadoop
ssh-keygen -t rsa -C "450733605@qq.com"
ssh-copy-id -i ~/.ssh/id_rsa.pub hadoop
ssh hadoop
3. Hadoop配置
创建所需文件夹
mkdir /opt/software/hadoop-2.7.2/tmp
mkdir -p /opt/software/hadoop-2.7.2/dfs/namenode_data
mkdir -p /opt/software/hadoop-2.7.2/dfs/datanode_data
1. hadoop-env.sh
vi ${HADOOP_HOME}/etc/hadoop/hadoop-env.sh
配置如下:
export JAVA_HOME=/opt/moudle/jdk1.8.0_221
export HADOOP_CONF_DIR=/opt/software/hadoop-2.7.2/etc/hadoop
2. core-site.xml
vi ${HADOOP_HOME}/etc/hadoop/core-site.xml
配置如下:
<configuration>
<!--默认文件系统的名称 -->
<property>
<name>fs.defaultFS</name>
<value>hdfs://hadoop:9000</value>
</property>
<!--指定HDFS执行时的临时目录 -->
<property>
<name>hadoop.tmp.dir</name>
<value>/opt/software/hadoop-2.7.2/tmp</value>
</property>
</configuration>
3. hdfs-site.xml
指定副本系数和hdfs操作权限。
vi ${HADOOP_HOME}/etc/hadoop/hdfs-site.xml
配置如下:
<configuration> <property> <!--指定hdfs保存数据副本的数量,包括自己,默认为3--> <!--伪分布式模式,此值必须为1--> <name>dfs.replication</name> <value>1</value> </property> <property> <!--namenode节点数据(元数据)的存放位置,可以指定多个目录实现容错,用逗号分隔 --> <name>dfs.name.dir</name> <value>/opt/software/hadoop-2.7.2/dfs/namenode_data</value> </property> <property> <!--datanode节点数据(数据块)的存放位置--> <name>dfs.datanode.data.dir</name> <value>/opt/software/hadoop-2.7.2/dfs/datanode_data</value> </property> <property> <!--设置hdfs操作权限,false表示任何用户都可以在hdfs上操作文件--> <name>dfs.permissions</name> <value>false</value> </property> </configuration>
4. mapred-site.xml
cp ${HADOOP_HOME}/etc/hadoop/mapred-site.xml.template ${HADOOP_HOME}/etc/hadoop/mapred-site.xml vi ${HADOOP_HOME}/etc/hadoop/mapred-site.xml
配置如下:
<configuration> <property> <!--指定mapreduce运行在yarn上--> <name>mapreduce.framework.name</name> <value>yarn</value> </property> <property> <!--配置任务历史服务器地址--> <name>mapreduce.jobhistory.address</name> <value>hadoop:10020</value> </property> <property> <!--配置任务历史服务器web-UI地址--> <name>mapreduce.jobhistory.webapp.address</name> <value>hadoop:19888</value> </property> </configuration>
5. yarn-site.xml
vi ${HADOOP_HOME}/etc/hadoop/yarn-site.xml
配置如下:
<configuration> <property> <!--指定yarn的老大resourcemanager的地址--> <name>yarn.resourcemanager.hostname</name> <value>hadoop</value> </property> <property> <!--NodeManager获取数据的方式--> <name>yarn.nodemanager.aux-services</name> <value>mapreduce_shuffle</value> </property> <property> <!--开启日志聚集功能--> <name>yarn.log-aggregation-enable</name> <value>true</value> </property> <property> <!--配置日志保留7天--> <name>yarn.log-aggregation.retain-seconds</name> <value>604800</value> </property> </configuration>
6. slaves
vi ${HADOOP_HOME}/etc/hadoop/slaves
配置如下:
hadoop
7. 启动容器
1. HDFS初始化
hdfs namenode -format
2. 启动HDFS和YARN
start-dfs.sh start-yarn.sh mr-jobhistory-daemon.sh start historyserver
8. 验证
1. 关闭防火墙
# 以下操作在宿主机上执行 # 查看防火墙状态 systemctl status firewalld # 关闭防火墙 systemctl stop firewalld.service
2. 进程查看
jps # 进程如下 <PID> NameNode <PID> SecondaryNameNode <PID> DataNode <PID> ResourceManager <PID> NodeManager <PID> JobHistoryServer
3. Web UI
NameNode WebUI:http://192.168.0.99:50070
- ResourceManager WebUI:http://192.168.0.99:8088
HistoryServer WebUI:http://192.168.0.99:19888
9. 备份和恢复镜像
# 先将配置好的Docker容器提交为镜像(-a:作者,-m:备注) # 确保容器为启动状态,docker start hadoop docker commit -a "polaris<450733605@qq.com>" -m "This is backup for hadoop-2.7.2 standalone" hadoop registry.cn-qingdao.aliyuncs.com/polaris-docker/hadoop-commit:2.7.2 # 备份镜像 docker save -o /share/docker-hadoop-2.7.2-standalone.tar.gz registry.cn-qingdao.aliyuncs.com/polaris-docker/hadoop-commit:2.7.2 # 恢复镜像 docker load -i /share/docker-hadoop-2.7.2-standalone.tar.gz
提交镜像到公仓
# 登录公仓 docker login --username=alypolarisbigdata registry.cn-qingdao.aliyuncs.com # 镜像tag docker commit -a "polaris<450733605@qq.com>" -m "This is backup for hadoop-2.7.2 standalone" hadoop registry.cn-qingdao.aliyuncs.com/polaris-docker/hadoop-commit:2.7.2 docker images |grep hadoop-2.7.2-ha # 镜像推送 docker push registry.cn-qingdao.aliyuncs.com/polaris-docker/hadoop-commit:2.7.2 # 拉取镜像 docker pull registry.cn-qingdao.aliyuncs.com/polaris-docker/hadoop-commit:2.7.2
参考
B站:Docker环境下DIY你的Hadoop镜像
https://www.bilibili.com/video/BV1C64y1T7FZ?p=1
微信:Docker之镜像拉取、删除及重命名
https://mp.weixin.qq.com/s/7L9aelZRbPl-67jyntdNXg
CSDN:使用Docker搭建Hadoop单机版
https://blog.csdn.net/qq_31142553/article/details/96652775