hive - hive 常见命令* - 《Hadoop》

1.基本查询
2.函数

1.基本查询

创建数据库
create database  if not exists heima  ;
show databases;
desc database heima
drop  database  myhive2  cascade; 
表操作
create table if not exists stu (id int ,name string) row format delimited fields terminated by "\t";
#修改表名
alter table stu2 rename to st22;
#修改字段
alter table stu22 change column id id_string string;
#清空数据
truncate table table_name
#导出数据
insert overwrite local directory  "/export/data/hive/download" select * from scroe;
#导出指定格式 没有local 则导出之hdfs
insert overwrite local directory  "/export/data/hive/download" row format  delimited fields terminated by "\t" select * from scroe ;
hive -e "select * from myhive.score;" > /export/data/exporthive/score.txt
将结果作为一张新的表
create table stu_copy as select * from stu;
#copy包结构（只创建表）
create table stu2 like stu;
#展示表结构
desc stu2 || desc formatted stu2;
#根据hdfs创建内部表
create external table if not exists sut_dfs_path (id int ,name string) 
row format  delimited fields terminated by  "\t"
location "/export/data/hive/stu"
#加载数据 注意local 模式为文件copy, hdfs 也就是不写local 则执行移动
load data [local] inpath "/export/data/hive/stu" [overwrite] into table stu; 
#分区模式 注意   partitioned
create table scroe (sId string,cId string ,score int) partitioned  by (month string) row format  delimited  fields terminated by "\t";
#加载数据
load data local inpath '/export/data/hivedatas/score.csv' into table score partition (month='202006');
#复杂类型
create external  table  if not exists hive_array (name string,location_array array<string>)
row format  delimited fields terminated by "\t"
collection items terminated by ","
#查看数据长度
select name,size(location_array) from hive_array;
select name,location_array[0] from hive_array;
select name,array_contains(location_array,"changchun") from hive_array;
#分区
create table  if not exists score_partition(s_id string ,c_id string ,score int )
partitioned by (year string, month string ,day string) 
row format  delimited fields terminated by "\t";
#加载数据
load data local inpath "/export/data/hive/score" overwrite into table stu_partition partition(year="2021", month="01", day="21");
#查询所有的分区
show partitions stu_partition;
#分桶表 注意创建分区表后数据只能通过insert into 插入，不能通过hdfs ddfs 或者load 方式导入
create table course_buk  (c_id string,c_name string,t_id string) clustered by (c_id) into 3 buckets 
row format delimited fields terminated by "\t";
#导入数据
set hive.enforce.bucketing=true;
set mapreduce.job.reduces=3;  
insert overwrite  table  course_buk select * from course cluster by(c_id);
#排序
order by 只会产生一个mapReduce 慎用！慎用！！慎用！！！
sort by maptask 每个mr内部排序（局部有序）
distribute by 类似MR中的Partitioner 通常和sort by 一起使用
当 distribute by 和sort by 同时使用并且字段相同时可以用cluster by 代替

2.函数

#四舍五入
select round(3.6415);
#2代表精度位数
select round(3.1415,2) ;
#向下取整
select floor(3.6415);
#向上取整函数
select ceil(3.1415926) ;
#随机数
select rand();
#幂运算函数: pow
select pow(2,4) ; #16
#绝对值
select abs(-3.9);
#字符串长度
select length('aa');
#字符串反转
select reverse("abcd");
#字符串拼接
select concat("hello","world");
#指定分隔符 拼接
select concat_ws(":","key","value");
#字符串截取函数 substr(string A, int start),substring(string A, int start) 
#               substr(string A, int start, int len),substring(string A, intstart, int len)
#字符串转大写函数：upper,ucase  upper(string A) ucase(string A) 
       转小写 lower(string A) lcase(string A)
#去空格函数：trim 左边去空格函数：ltrim 右边去空格函数：rtrim
#正则表达式替换函数：regexp_replace regexp_replace(string A, string B, string C)
select regexp_replace('foobar', 'oo|ar', '');
#URL解析函数：parse_url   parse_url(string urlString, string partToExtract [, stringkeyToExtract])
说明：返回URL中指定的部分。partToExtract的有效值为：HOST, PATH, QUERY, REF, PROTOCOL, AUTHORITY, FILE, and USERINFO.
select parse_url('http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1', 'HOST');
#分割字符串函数: split  按照pat字符串分割str，会返回分割后的array
select split("ab,cd,ef",',');
日期函数
#获取当前UNIX时间戳函数:unix_timestamp
select unix_timestamp(); unix_timestamp(string date)
# unix_timestamp(string date, string pattern) from_unixtime(bigint unixtime,[string format])
select from_unixtime(1598079966,'yyyy-MM-dd HH:mm:ss'); 
#返回日期时间字段中的日期部分 to_date(string timestamp) 返回日期中的年： year(string date)
#返回日期中的月份:month (string date) day (string date) 返回日期在当前的周数:weekofyear (string date)
#返回结束日期减去开始日期的天数 datediff(string enddate, string startdate)
#返回开始日期startdate增加days天后的日期 date_add(string startdate, int days)
#返回开始日期startdate减少days天后的日期。 date_sub (string startdate, int days)
逻辑判断
select if(1==2,100,200);
#case 判断
select case when 1=2 then 'tom' when 2=2 then 'mary' else'tim' end ;
#强转cast(表达式 as 数据类型)
select cast(12.35 as int);
select cast('20190607' as int)
select cast('2020-12-05' as date);

DDL:https://cwiki.apache.org/confluence/display/Hive/LanguageManual+DDL

第五章-Hive讲义.docx