创建虚拟网络

docker network create —driver=bridge hadoop

拉取centos镜像

docker pull centos

创建一个centos容器

  1. docker run -itd --privileged --name Centos8 centos /usr/sbin/init
  2. docker exec -it Centos8 /bin/bash

安装 jdk1.8、Scala

  1. yum install -y vim bash-completion net-tools wget // 安装小工具
  2. cd /etc/yum.repos.d/
  3. mkdir bak
  4. mv * bak/
  5. wget https://mirrors.aliyun.com/repo/Centos-8.repo
  6. yum clean all
  7. yum makecache
  8. yum install java-1.8.0-openjdk* -y
  9. java -version
  10. yum install scale -y
  11. scale //测试

配置SSH

  • 集群间需要使用ssh互相通信

    1. yum install openssh-server openssh-clients -y //安装ssh
    2. cd
    3. ssh-keygen -t rsa -P "" //生成密钥,免密登录
    4. cat .ssh/id_rsa.pub >> .ssh/authorized_keys
    5. systemctl start sshd
    6. ssh 127.0.0.1 //测试免密登录
    7. exit // 记得退出
    8. vim ~/.bashrc //配置ssh服务开机自启动
    9. // 在文件最下面添加一行
    10. systemctl start sshd

    安装Hadoop

    1. wget https://mirrors.tuna.tsinghua.edu.cn/apache/hadoop/common/hadoop-3.2.1/hadoop-3.2.1.tar.gz
    2. tar -zxvf hadoop-3.2.1.tar.gz -C /usr/local/
    3. cd /usr/local/
    4. mv hadoop-3.2.1 hadoop

    配置hadoop

  • 编辑/etc/profile,在文件最后加入以下内容

  • JAVA_HOME 为 JDK 安装路径,用 update-alternatives —config java 可查看

    1. #java
    2. export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.272.b10-1.el8_2.x86_64
    3. export JRE_HOME=${JAVA_HOME}/jre
    4. export CLASSPATH=.:${JAVA_HOME}/lib:${JRE_HOME}/lib
    5. export PATH=${JAVA_HOME}/bin:$PATH
    6. #hadoop
    7. export HADOOP_HOME=/usr/local/hadoop
    8. export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
    9. export HADOOP_COMMON_HOME=$HADOOP_HOME
    10. export HADOOP_HDFS_HOME=$HADOOP_HOME
    11. export HADOOP_MAPRED_HOME=$HADOOP_HOME
    12. export HADOOP_YARN_HOME=$HADOOP_HOME
    13. export HADOOP_INSTALL=$HADOOP_HOME
    14. export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
    15. export HADOOP_CONF_DIR=$HADOOP_HOME
    16. export HADOOP_LIBEXEC_DIR=$HADOOP_HOME/libexec
    17. export JAVA_LIBRARY_PATH=$HADOOP_HOME/lib/native:$JAVA_LIBRARY_PATH
    18. export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
    19. export HDFS_DATANODE_USER=root
    20. export HDFS_DATANODE_SECURE_USER=root
    21. export HDFS_SECONDARYNAMENODE_USER=root
    22. export HDFS_NAMENODE_USER=root
    23. export YARN_RESOURCEMANAGER_USER=root
    24. export YARN_NODEMANAGER_USER=root
  • 生效 source /etc/profile

    修改 hadoop-env.sh

  • /usr/local/hadoop/etc/hadoop/hadoop-env.sh

  • 在文件最后追加以下内容

    1. export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.272.b10-1.el8_2.x86_64
    2. export HDFS_NAMENODE_USER=root
    3. export HDFS_DATANODE_USER=root
    4. export HDFS_SECONDARYNAMENODE_USER=root
    5. export YARN_RESOURCEMANAGER_USER=root
    6. export YARN_NODEMANAGER_USER=root

    修改 core-site.xml

  • /usr/local/hadoop/etc/hadoop/core-site.xml

  • 将文件内容修改为以下内容,其中h01为主机名字,可在/etc/hosts中添加

    1. <configuration>
    2. <property>
    3. <name>fs.default.name</name>
    4. <value>hdfs://h01:9000</value>
    5. </property>
    6. <property>
    7. <name>hadoop.tmp.dir</name>
    8. <value>/home/hadoop3/hadoop/tmp</value>
    9. </property>
    10. </configuration>

    修改 hdfs-site.xml

  • /usr/local/hadoop/etc/hadoop/hdfs-site.xml

    1. <configuration>
    2. <property>
    3. <name>dfs.replication</name>
    4. <value>2</value>
    5. </property>
    6. <property>
    7. <name>dfs.namenode.name.dir</name>
    8. <value>/home/hadoop3/hadoop/hdfs/name</value>
    9. </property>
    10. <property>
    11. <name>dfs.namenode.data.dir</name>
    12. <value>/home/hadoop3/hadoop/hdfs/data</value>
    13. </property>
    14. </configuration>

    修改 mapred-site.xml

  • /usr/local/hadoop/etc/hadoop/mapred-site.xml

    1. <configuration>
    2. <property>
    3. <name>mapreduce.framework.name</name>
    4. <value>yarn</value>
    5. </property>
    6. <property>
    7. <name>mapreduce.application.classpath</name>
    8. <value>
    9. /usr/local/hadoop/etc/hadoop,
    10. /usr/local/hadoop/share/hadoop/common/*,
    11. /usr/local/hadoop/share/hadoop/common/lib/*,
    12. /usr/local/hadoop/share/hadoop/hdfs/*,
    13. /usr/local/hadoop/share/hadoop/hdfs/lib/*,
    14. /usr/local/hadoop/share/hadoop/mapreduce/*,
    15. /usr/local/hadoop/share/hadoop/mapreduce/lib/*,
    16. /usr/local/hadoop/share/hadoop/yarn/*,
    17. /usr/local/hadoop/share/hadoop/yarn/lib/*
    18. </value>
    19. </property>
    20. </configuration>

    修改 mapred-site.xml

  • /usr/local/hadoop/etc/hadoop/yarn-site.xml

    1. <configuration>
    2. <property>
    3. <name>yarn.resourcemanager.hostname</name>
    4. <value>h01</value>
    5. </property>
    6. <property>
    7. <name>yarn.nodemanager.aux-services</name>
    8. <value>mapreduce_shuffle</value>
    9. </property>
    10. </configuration>

    修改hadoop/workers

  • /usr/local/hadoop/etc/hadoop/workers

  • 用于群起脚本
    1. h01
    2. h02
    3. h03

    创建3个一样的容器

    ```bash docker ps -a

    制作镜像

    docker commit -m “Alitahadoop” -a “Alita” 73a701cae68b alitahadoop docker images

//启动的是 h01 是做 master 节点的,所以暴露了端口,以供访问 web 页面 docker run -itd —privileged —network hadoop -h “h01” —name “h01” -p 9870:9870 -p 8088:8088 alitahadoop /usr/sbin/init docker run -itd —privileged —network hadoop -h “h02” —name “h02” alitahadoop /usr/sbin/init docker run -itd —privileged —network hadoop -h “h03” —name “h03” alitahadoop /usr/sbin/init

docker start h01 h02 h03

  1. <a name="lHL9C"></a>
  2. ### 配置各个容器对应的/etc/hosts文件
  3. ```bash
  4. # 查看ip:
  5. docker inspect h01
  6. 172.18.0.2 h01
  7. 172.18.0.3 h02
  8. 172.18.0.4 h03

在h01中启动hadoop

  1. docker exec -it h01 /bin/bash
  2. cd /usr/local/hadoop/bin
  3. hdfs namenode -format //先进行格式化操作
  4. cd /usr/local/hadoop/sbin/ //进入 hadoop sbin 目录
  5. ./start-all.sh //启动
  6. //关闭集群执行脚本 ./stop-all.sh
  7. cd /usr/local/hadoop/bin
  8. ./hadoop dfsadmin -report /查看分布式文件系统的状态

http://localhost:8088