1. 安装 JDK
- 安装 Java
Hadoop Java 版本支持
# centos
yum install -y java-1.8.0-openjdk java-1.8.0-openjdk-devel
java -version
- 配置环境变量
cat <<EOF | sudo tee /etc/profile.d/java8.sh
export JAVA_HOME=/usr/lib/jvm/jre-openjdk
export PATH=\$PATH:\$JAVA_HOME/bin
export CLASSPATH=.:\$JAVA_HOME/jre/lib:\$JAVA_HOME/lib:\$JAVA_HOME/lib/tools.jar
EOF
source /etc/profile.d/java8.sh
3 Java 安装路径
update-alternatives --display java
2. 配置 SSH 免密登陆
- 安装 SSH
yum install open-ssh
- 安装 rsync
yum install rsync
- 生成 SSH Key
ssh-keygen -t rsa -P '' -f ~/.ssh/id_rsa
cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
- 测试
第一次会提示输入密码,第二次则无需密码登陆
ssh root@localhost
3. Hadoop 安装
- 下载 Hadoop
wget https://dlcdn.apache.org/hadoop/common/hadoop-3.3.1/hadoop-3.3.1.tar.gz
- 安装 Hadoop
tar -zxvf hadoop-3.3.1.tar.gz
sudo mv hadoop-3.3.1 /usr/local/hadoop
- 设置环境变量
sudo vim ~/.bashrc
# HADOOP 路径
export HADOOP_HOME=/usr/local/hadoop
# 设置 PATH
export PATH=$PATH:$HADOOP_HOME/bin
export PATH=$PATH:$HADOOP_HOME/sbin
# 设置 HADOOP 其他环境变量
export HADOOP_MAPRED_HOME=$HADOOP_HOME
export HADOOP_COMMAN_HOME=$HADOOP_HOME
export HADOOP_HDFS_HOME=$HADOOP_HOME
# 链接库相关设置
export YARN_HOME=$HADOOP_HOME
export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
export HADOOP_OPTS="-DJava.library.path=$HADOOP_HOME/lib"
export JAVA_LIBRARY_PATH=$HADOOP_HOME/lib/native:$JAVA_LIBRARY_PATH
source ~/.bashrc
4. 修改 Hadoop 配置文件
- 编辑 Hadoop-env.sh
sudo vim /usr/local/hadoop/etc/hadoop/hadoop-env.sh
export JAVA_HOME=/usr/lib/jvm/jre-openjdk
- 编辑 core-site.xml
sudo vim /usr/local/hadoop/etc/hadoop/core-site.xml
<configuration>
<!-- 设置 HDFS 默认名称-->
<property>
<name>fs.default.name</name>
<value>hdfs://localhost:9000</value>
</property>
</configuration>
- 设置 YARN-site.xml
sudo vim /usr/local/hadoop/etc/hadoop/yarn-site.xml
<configuration>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
</configuration>
- 设置 mapred-site.xml
sudo vim /usr/local/hadoop/etc/hadoop/mapred-site.xml
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
</configuration>
- 设置 hdfs-site.xml
sudo vim /usr/local/hadoop/etc/hadoop/hdfs-site.xml
<configuration>
<!-- 设置 blocks 副本备份数量 -->
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
<!-- 设置 NameNode 数据存储目录 -->
<property>
<name>dfs.namenode.name.dir</name>
<value>file:/usr/local/hadoop/hadoop_data/hdfs/namenode</value>
</property>
<!-- 设置 DataNode 数据存储目录 -->
<property>
<name>dfs.datanode.data.dir</name>
<value>file:/usr/local/hadoop/hadoop_data/hdfs/datanode</value>
</property>
<!-- 任意网卡均可访问 -->
<property>
<name>dfs.http.address</name>
<value>0.0.0.0:50070</value>
</property>
</configuration>
5. 格式化 HDFS 目录
- 创建 namenode datanode 存储目录
mkdir -p /usr/local/hadoop/hadoop_data/hdfs/namenode
mkdir -p /usr/local/hadoop/hadoop_data/hdfs/datanode
- 修改目录所有者为 hduser(单节点使用 root用户则无需该步骤)
chown hduser:hduser -R /usr/local/hadoop
- 格式化 HFDFS
hadoop namenode -format
6. 启动 Hadoop
start-dfs.sh
start-yarn.sh
start-all.sh
使用 jps
命令查看运行进程:
> jps
#HDFS
1683 DataNode
1877 SecondaryNameNode
1545 NameNode
# YARN
2121 ResourceManager
2271 NodeManager
7. 测试
- Hadoop ResourceManager Web:本机访问 localhost:8088 或 服务器IP:8088 ;
- HDFS Web: 本机访问 localhost:50070 或 服务器IP:50070 ;
8. 注意事项
- Start-all.sh 启动,错误信息为
there is no HDFS_NAMENODE_USER defin
:
在 hadoop/sbin 目录下- start-dfs.sh 和 stop-dfs.sh 中添加参数:
HDFS_DATANODE_USER=root
HDFS_DATANODE_SECURE_USER=hdfs
HDFS_NAMENODE_USER=root
HDFS_SECONDARYNAMENODE_USER=root
- start-dfs.sh 和 stop-dfs.sh 中添加参数:
- start-yarn.sh 和 stop-yarn.sh 中添加参数:
YARN_RESOURCEMANAGER_USER=root
HDFS_DATANODE_SECURE_USER=yarn
YARN_NODEMANAGER_USER=root
- 若本机无法访问,可检查防火墙是否关闭或将8088 50070端口加入白名单;
- 端口转发,若不能访问,可开启端口转发,设置8088和50070端口;