建表
CREATE[EXTERNAL] TABLE [IF NOT EXISTS] table_name [(col_name data_type [COMMENT col_comment], ...)] [COMMENT table_comment] [PARTITIONED BY (col_name data_type [COMMENT col_comment], ...)] [CLUSTERED BY (col_name, col_name, ...) [SORTED BY (col_name [ASC|DESC], ...)] INTO num_buckets BUCKETS] [ROW FORMAT row_format] [STORED AS file_format] [LOCATION hdfs_path] CREATE TABLE `mydb.dept`( `dept_no` int, `addr` string, `tel` string)partitioned by(date string ) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' ;
ROW FORMAT
CREATE TABLE mingxing
(
id int
) ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
COLLECTION ITEMS TERMINATED BY ','
MAP KEYS TERMINATED BY ':';
;
- ROW FORMAT DELIMITED是指明后面的关键词是列和元素分隔符的
- FIELDS TERMINATED BY 是字段分隔符
- COLLECTION ITEMS TERMINATED BY是元素分隔符(Array中的各元素、Struct中的各元素、Map中 的key-value对之间)
- MAP KEYS TERMINATED BY是Map中key与value的分隔符
- LINES TERMINATED BY是行之间的分隔符
删除
DROP DATABASE mydb; --删除库
DROP DATABASE IF EXISTS mydb CASCADE; -- 删除库和里面的表
DROP TABLE IF EXISTS userdb; --删除表
TRUNCATE TABLE mingxing;-- 清空数据
-- 删除分区
ALTER TABLE mingxing DROP IF EXISTS PARTITION (city = 'beijing');
ALTER TABLE mingxing DROP IF EXISTS PARTITION (city = 'beijing',street='jiangtai');
-- 删除字段:
drop(不支持),替代方案可以使用replace
CASCADE 会去修改历史分区数据
ALTER TABLE test_dev.trxa_odr_all_pay_da CHANGE COLUMN avg_pay avg_pay string CASCADE;
ALTER TABLE test_dev.trxa_odr_all_pay_da ADD COLUMNS (province string);
ALTER TABLE test_dev.trxa_odr_all_pay_da REPLACE COLUMNS (user_type string, user_cnt bigint,avg_pay double);
修改
ALTER TABLE table_name [PARTITION partition_spec] CHANGE [COLUMN] col_old_name col_new_name column_type
[COMMENT col_comment] [FIRST|AFTER column_name] [CASCADE|RESTRICT];
-- 重命名表
ALTER TABLE mingxing RENAME TO student;ç
-- 增加字段
ALTER TABLE mingxing ADD COLUMNS (province string);
ALTER TABLE mingxing ADD COLUMNS (province string, xxx bigint);
-- 增加字段并修改旧分区数据
ALTER TABLE mingxing ADD COLUMNS (province string) CASCADE;
-- 修改字段:
ALTER TABLE mingxing change age newage string;
ALTER TABLE mingxing change age newage string FIRST | AFTER id;
-- 修改字段并修改旧分区数据
ALTER TABLE mingxing change age newage string CASCADE;
-- 替换字段
ALTER TABLE mingxing replace columns(id INT, NAME string, sex string);
-- 增加分区
ALTER TABLE mingxing ADD PARTITION (city = 'beijing');
ALTER TABLE mingxing ADD PARTITION (city = 'beijing') PARTITION (city = 'tianjin');
-- 修改分区数据路径:
ALTER TABLE mingxing PARTITION (city = 'beijing') SET LOCATION '/home/hadoop/data/beijing';
查看
DESC mingxing; -- 查看表的字段:
DESC EXTENDED mingxing; --查看表的详细信息
DESC FORMATTED mingxing; --查看表的格式化了之后的详细信息
load
LOAD DATA LOCAL INPATH './student.txt' INTO TABLE mingxing;
LOAD DATA LOCAL INPATH './student.txt' OVERWRITE INTO TABLE mingxing;
LOAD DATA LOCAL INPATH '/root/hivedata/student.txt' INTO TABLE mingxing;
LOAD DATA INPATH '/root/hivedata/student.txt' INTO TABLE mingxing;
insert
-- 单条数据插入:
INSERT INTO TABLE mingxing
VALUES (001, 'huangbo', 'male', 50, 'MA');
-- 单重插入模式:
INSERT INTO TABLE student
SELECT id, name, sex, age, department
FROM mingxing;
-- 多重插入模式:
FROM mingxing
INSERT
INTO TABLE student1
SELECT id, name, sex, age
INSERT
INTO TABLE student2
SELECT id, department;
-- 静态分区插入
LOAD DATA LOCAL INPATH '/root/hivedata/student.txt' INTO TABLE student PARTITION (city = 'henan');
-- 动态分区插入
INSERT INTO TABLE student PARTITION (id)
SELECT name, department, id
FROM mingxing2;
-- 直接把查询出来的结果存储到新建的一张表里
CREATE TABLE student AS
SELECT id, name, age, department
FROM mingxing;
-- 复制表
CREATE TABLE student LIKE mingxing;
-- 导出数据到本地:
INSERT OVERWRITE LOCAL DIRECTORY '/root/outputdata'
SELECT id, name, sex, age, department
FROM mingxing;
-- 导出数据到HDFS:
INSERT OVERWRITE DIRECTORY 'hdfs://hadoop01:9000/root/outputdata1'
SELECT id, name, sex, age, department
FROM mingxing;
-- 修复分区
MSCK REPAIR TABLE
LEFT SEMI JOIN/IN/EXISTS
SELECT student.*, mingxing.*
FROM student
LEFT SEMI
JOIN mingxing ON student.id = mingxing.id;
-- 等价于
SELECT student.*
FROM student
WHERE student.id IN (SELECT DISTINCT id FROM mingxing);
- EXPLAIN EXTENDED SQL 查看执行计划
- set hive.exec.mode.local.auto=true 本地执行