资源规划
组件 | bigdata-node1 | bigdata-node2 | bigdata-node3 |
---|---|---|---|
OS | centos7.6 | centos7.6 | centos7.6 |
JDK | jvm | jvm | jvm |
HDFS | NameNode/SecondaryNameNode/DataNode/JobHistoryServer/ApplicationHistoryServer | DataNode | DataNode |
YARN | ResourceManager/NodeManager | NodeManager | NodeManager |
Hive | HiveServer2/Metastore/CLI/Beeline | CLI/Beeline | CLI/Beeline |
MySQL | N.A | N.A | MySQL Server |
安装介质
版本:apache-hive-2.3.4-bin.tar.gz
下载:http://archive.apache.org/dist/hive
环境准备
安装Hadoop
安装MySQL
Hive服务端安装
解压缩
# 登录bigdata-node1节点
cd /share
wget http://archive.apache.org/dist/hive/hive-2.3.4/apache-hive-2.3.4-bin.tar.gz
tar -zxvf apache-hive-2.3.4-bin.tar.gz -C ~/modules/
rm apache-hive-2.3.4-bin.tar.gz
创建相关目录
cd ~/modules/apache-hive-2.3.4-bin/conf
cp hive-env.sh.template hive-env.sh
cp hive-default.xml.template hive-site.xml
cp hive-log4j2.properties.template hive-log4j2.properties
mkdir ~/modules/apache-hive-2.3.4-bin/logs
mkdir ~/modules/apache-hive-2.3.4-bin/tmpdir
配置Hive
配置hive-log4j2.properties。
vi ~/modules/apache-hive-2.3.4-bin/conf/hive-log4j2.properties
配置如下:
# 日志目录需要提前创建
property.hive.log.dir=/home/vagrant/modules/apache-hive-2.3.4-bin/logs
配置hive-env.sh。
vi ~/modules/apache-hive-2.3.4-bin/conf/hive-env.sh
配置如下:
# 末尾添加
export HADOOP_HOME=/home/vagrant/modules/hadoop-2.7.2
export HIVE_CONF_DIR=/home/vagrant/modules/apache-hive-2.3.4-bin/conf
export HIVE_AUX_JARS_PATH=/home/vagrant/modules/apache-hive-2.3.4-bin/lib
3.配置hive-site.xml。
rm -rf ~/modules/apache-hive-2.3.4-bin/conf/hive-site.xml
vi ~/modules/apache-hive-2.3.4-bin/conf/hive-site.xml
服务端配置:
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>system:java.io.tmpdir</name>
<value>/home/vagrant/modules/apache-hive-2.3.4-bin/tmpdir</value>
</property>
<property>
<name>system:user.name</name>
<value>vagrant</value>
</property>
<property>
<name>hive.metastore.warehouse.dir</name>
<value>/user/hive/warehouse</value>
</property>
<!-- Hive服务端配置 -->
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://bigdata-node3:3306/hive2_metadata?createDatabaseIfNotExist=true&useSSL=false</value>
</property>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.jdbc.Driver</value>
</property>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>hive2</value>
</property>
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>hive2</value>
</property>
</configuration>
客户端配置:
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>system:java.io.tmpdir</name>
<value>/home/vagrant/modules/apache-hive-2.3.4-bin/tmpdir</value>
</property>
<property>
<name>system:user.name</name>
<value>vagrant</value>
</property>
<!-- Hive客户端配置 -->
<property>
<name>hive.metastore.warehouse.dir</name>
<value>/user/hive/warehouse</value>
</property>
<property>
<name>hive.metastore.uris</name>
<value>thrift://bigdata-node1:9083</value>
</property>
<property>
<name>hive.metastore.local</name>
<value>false</value>
</property>
</configuration>
配置Hadoop
1.配置core-site.xml。
vi ~/modules/hadoop-2.7.2/etc/hadoop/core-site.xml
配置如下:
<!-- hiveserver2增加了权限控制,需要在hadoop的配置 -->
<property>
<name>hadoop.proxyuser.vagrant.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.vagrant.groups</name>
<value>*</value>
</property>
2.配置hdfs-site.xml。
vi ~/modules/hadoop-2.7.2/etc/hadoop/hdfs-site.xml
配置如下:
<property>
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property>
修改完配置文件后注意分发到集群其他节点。
scp -r ~/modules/hadoop-2.7.2/etc/hadoop/core-site.xml vagrant@bigdata-node2:~/modules/hadoop-2.7.2/etc/hadoop/
scp -r ~/modules/hadoop-2.7.2/etc/hadoop/core-site.xml vagrant@bigdata-node3:~/modules/hadoop-2.7.2/etc/hadoop/
scp -r ~/modules/hadoop-2.7.2/etc/hadoop/hdfs-site.xml vagrant@bigdata-node2:~/modules/hadoop-2.7.2/etc/hadoop/
scp -r ~/modules/hadoop-2.7.2/etc/hadoop/hdfs-site.xml vagrant@bigdata-node3:~/modules/hadoop-2.7.2/etc/hadoop/
之后在HDFS上创建Hive仓库存储目录。
# 启动HDFS并创建Hive相关目录
cd ~/modules/hadoop-2.7.2/
# 创建Hive相关路径并赋权
bin/hdfs dfs -mkdir -p /user/hive/warehouse
bin/hdfs dfs -mkdir -p /user/hive/tmp
bin/hdfs dfs -mkdir -p /user/hive/log
bin/hdfs dfs -chmod -R 777 /user/hive/warehouse
bin/hdfs dfs -chmod -R 777 /user/hive/tmp
bin/hdfs dfs -chmod -R 777 /user/hive/log
环境变量设置
vi ~/.bashrc # :$到达行尾添加
配置如下:
export HIVE_HOME=/home/vagrant/modules/apache-hive-2.3.4-bin
export PATH=$HIVE_HOME/bin:$PATH
环境变量生效:
source ~/.bashrc
集成MySQL
1.上传MySQL驱动包至${HIVE_HOME}/lib,推荐mysql-connector-java-5.1.40.jar以上版本。(下载地址:https://mvnrepository.com/artifact/mysql/mysql-connector-java)
cp /share/mysql-connector-java-5.1.47.jar /home/vagrant/modules/apache-hive-2.3.4-bin/lib/
2.创建元数据库用户。
# bigdata-node3(MySQL安装节点,root)
source /etc/profile
mysql -uroot -p123456
CREATE USER 'hive2'@'%' IDENTIFIED BY 'hive2';
# CREATE user 'hive2'@'localhost' IDENTIFIED BY 'hive2';
GRANT ALL PRIVILEGES ON *.* TO 'hive2'@'%' WITH GRANT OPTION;
GRANT ALL PRIVILEGES ON *.* TO 'hive2'@'localhost' with grant option;
flush privileges;
quit;
3.初始化元数据,看到 schemaTool completed ,即初始化成功!
cd /home/vagrant/modules/apache-hive-2.3.4-bin
schematool -initSchema -dbType mysql -verbose
4.元数据库授权。
# bigdata-node3(MySQL安装节点,root)
source /etc/profile
mysql -uroot -p123456
use mysql;
select User, Host from user;
update user set host='%' where host='localhost';
-- delete from user where host='localhost' and User='hive2';
-- 删除root用户的其他host(%之外)
use hive2_metadata;
grant all on hive2_metadata.* to hive2@'%' identified by 'hive2';
grant all on hive2_metadata.* to hive2@localhost identified by 'hive2';
ALTER DATABASE hive2_metadata CHARACTER SET latin1;
flush privileges;
quit;
5.配置hive-site.xml(以上章节已配置)。
6.无秘钥登录,确保本节点到集群其他节点无秘钥登录。
7.Hive服务启动与测试# 需提前启动HDFS和Yarn服务
# 创建数据文件
vi ~/datas/stu.txt
内容如下(注意:请检查确定列分割符为\t):
00001 zhangsan
00002 lisi
00003 wangwu
00004 zhaoliu
创建库表并加载数据到Hive表:
cd ~/modules/apache-hive-2.3.4-bin/bin
./hive
# 打开debug模式
./hive -hiveconf hive.root.logger=DEBUG,console
# 创建表
hive>> CREATE TABLE stu(id INT,name STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' ;
# 加载数据
hive>> load data local inpath '/home/vagrant/datas/stu.txt' into table stu;
# 查看库表
hive>> select * from stu;
服务端验证
CLI本地连接Hive
cd ~/modules/apache-hive-2.3.4-bin
# 启动CLI,将自动开启Metastore服务
bin/hive
# 查看数据库
hive>> show databases;
# 使用默认数据库
hive>> use default;
# 查看表
hive>> show tables;
Beeline本地连接Hive
cd ~/modules/apache-hive-2.3.4-bin
# 1)启动hiveserver2,将自动开启Metastore服务
bin/hiveserver2 >/dev/null 2>&1 &
# 检查hiveserver2是否正常启动
ps -aux| grep hiveserver2
# 检查hiveserver2端口
# sudo yum install net-tools # netstat安装
netstat -nl|grep 10000
# 检查metastore是否正常启动
ps -aux| grep metastore
# 2)启动beeline。
# 方式1
bin/beeline
beeline>> !connect jdbc:hive2://localhost:10000 hive2 hive2
# 方式2
bin/beeline -u jdbc:hive2://localhost:10000
Hive客户端安装
# 分发Hive到客户端节点
scp -r ~/modules/apache-hive-2.3.4-bin vagrant@bigdata-node2:~/modules/
scp -r ~/modules/apache-hive-2.3.4-bin vagrant@bigdata-node3:~/modules/
客户端验证
CLI远程连接Hive
# 服务端需开启metastore服务
cd ~/modules/apache-hive-2.3.4-bin
bin/hive --service metastore >/dev/null 2>&1 &
# 启动客户端CLI
bin/hive
Beeline远程连接Hive
# 服务端需开启HiveServer2服务(自动开启Metastore服务)
cd ~/modules/apache-hive-2.3.4-bin
# 启动客户端Beeline
# 方式1
bin/beeline
beeline>> !connect jdbc:hive2://bigdata-node1:10000 hive2 hive2
# 方式2
bin/beeline -u jdbc:hive2://bigdata-node1:10000