Hive工具-数据库操作 - 《拉钩→学习笔记→大数据课程》

建库
1. create database mydb
2. create database if not exists mydb
查看数据库
1. show database; 查看所有的
2. desc database mydb；查看单独的数据库
使用数据库
1. use mydb;
删除数据库
1. drop database mydb；删除数据库

建表（默认内部表）
1. create table if not exists table_name (内部表)
2. create external table if not exists table_name （外部表）

create external table t2(
id int,
name string,
hobby array,
addr map
)
row format delimited
fields terminated by “;”
collection items terminated by “,”
map keys terminated by “:”;

内外部表相互转换
1. alter table t1 set tblproperties(‘EXTERNAL’=’TRUE’);
分区表

create table if not exists t3(
id int
,name string
,hobby array
,addr map
)
partitioned by (dt string)
row format delimited
fields terminated by ‘;’
collection items terminated by ‘,’
map keys terminated by ‘:’;

查看分区
1. show partitions t3;
添加分区
1. alter table t1 add partition(dt=’2020-06-03’);

分桶表

create table course(
id int,
name string,
score int
)
clustered by (id) into 3 buckets
row format delimited
fields terminated by “\t”;

修改表、删除表

— 修改表名。rename
alter table course_common rename to course_common1;
— 修改列名。change column
alter table course_common1 change column id cid int;
— 修改字段类型。change column
alter table course_common1 change column cid cid string;
注意：
— 修改字段数据类型时，要满足数据类型转换的要求。如int可以转为string，但是 string不能转为int
— 增加字段。add columns
alter table course_common1 add columns (common string);

— 删除字段：replace columns
— 这里仅仅只是在元数据中删除了字段，并没有改动hdfs上的数据文件
alter table course_common1 replace columns( id string, cname string, score int);
— 删除表
drop table course_common1;

数据导入
1. 装载数据（load）

— 加载本地文件到hive(tabA表中)
LOAD DATA LOCAL INPATH ‘/home/hadoop/data/sourceA.txt’ INTO TABLE tabA;
— 检查本地文件还在

— 加载hdfs文件到hive(tabA)
LOAD DATA INPATH ‘data/sourceA.txt’ INTO TABLE tabA;
— 检查HDFS文件，已经被转移

— 加载数据覆盖表中已有数据
LOAD DATA INPATH ‘data/sourceA.txt’ OVERWRITE INTO TABLE tabA;

— 创建表时加载数据
hdfs dfs -mkdir /user/hive/tabB
hdfs dfs -put sourceA.txt /user/hive/tabB
CREATE TABLE tabB (
id INT
,name string
,area string
) ROW FORMAT DELIMITED FIELDS TERMINATED BY ‘,’
Location ‘/user/hive/tabB’;

插入数据（insert）
— 插入数据
insert into table tabC partition(month=’202001’)
values (5, ‘wangwu’, ‘BJ’), (4, ‘lishi’, ‘SH’), (3, ‘zhangsan’, ‘TJ’);
— 插入查询的结果数据
insert into table tabC partition(month=’202002’)
select id, name, area from tabC where month=’202001’;
— 多表（多分区）插入模式
from tabC
insert overwrite table tabC partition(month=’202003’)
select id, name, area where month=’202002’
insert overwrite table tabC partition(month=’202004’)
select id, name, area where month=’202002’;

创建表并插入数据(as select)
— 根据查询结果创建表
create table if not exists tabD as select * from tabC;

2.数据导出
数据导出
— 将查询结果导出到本地
insert overwrite local directory ‘/home/hadoop/data/tabC’ select from tabC;
— 将查询结果格式化输出到本地
insert overwrite local directory ‘/home/hadoop/data/tabC2’ row format delimited fields terminated by ‘ ‘ select from tabC;
— 将查询结果导出到HDFS
insert overwrite directory ‘/user/hadoo p/data/tabC3’ row format delimited fields terminated by ‘ ‘ select from tabC;
— dfs 命令导出数据到本地。
本质是执行数据文件的拷贝
dfs -get /user/hive/warehouse/mydb.db/tabc/month=202001 /home/hadoop/data/tabC4
— hive 命令导出数据到本地。执行查询将查询结果重定向到文件
hive -e “select from tabC” > a.log
— export 导出数据到HDFS。
使用export导出数据时，不仅有数还有表的元数据信息
export table tabC to ‘/user/hadoop/data/tabC4’;
— export 导出的数据，可以使用 import 命令导入到 Hive 表中
— 使用 like tname创建的表结构与原表一致。
create … as select … 结构可能不一致 create table tabE like tabc; import table tabE from ‘’/user/hadoop/data/tabC4’;
— 截断表，清空数据。(注意：仅能操作内部表) truncate table tabE;
— 以下语句报错，外部表不能执行 truncate 操作
alter table tabC set tblproperties(“EXTERNAL”=”TRUE”); truncate table tabC;

小结：
数据导入：load data / insert / create table …. as select ….. / import table
数据导出：insert overwrite … diretory … / hdfs dfs -get / hive -e “select …” > a.log / export table …
Hive的数据导入与导出还可以使用其他工具：Sqoop、DataX等

HQL操作之——DQL命令