1.基本查询
创建数据库create database if not exists heima ;show databases;desc database heimadrop database myhive2 cascade;表操作create table if not exists stu (id int ,name string) row format delimited fields terminated by "\t";#修改表名alter table stu2 rename to st22;#修改字段alter table stu22 change column id id_string string;#清空数据truncate table table_name#导出数据insert overwrite local directory "/export/data/hive/download" select * from scroe;#导出指定格式 没有local 则导出之hdfsinsert overwrite local directory "/export/data/hive/download" row format delimited fields terminated by "\t" select * from scroe ;hive -e "select * from myhive.score;" > /export/data/exporthive/score.txt将结果作为一张新的表create table stu_copy as select * from stu;#copy包结构(只创建表)create table stu2 like stu;#展示表结构desc stu2 || desc formatted stu2;#根据hdfs创建内部表create external table if not exists sut_dfs_path (id int ,name string)row format delimited fields terminated by "\t"location "/export/data/hive/stu"#加载数据 注意local 模式为文件copy, hdfs 也就是不写local 则执行移动load data [local] inpath "/export/data/hive/stu" [overwrite] into table stu;#分区模式 注意 partitionedcreate table scroe (sId string,cId string ,score int) partitioned by (month string) row format delimited fields terminated by "\t";#加载数据load data local inpath '/export/data/hivedatas/score.csv' into table score partition (month='202006');#复杂类型create external table if not exists hive_array (name string,location_array array<string>)row format delimited fields terminated by "\t"collection items terminated by ","#查看数据长度select name,size(location_array) from hive_array;select name,location_array[0] from hive_array;select name,array_contains(location_array,"changchun") from hive_array;#分区create table if not exists score_partition(s_id string ,c_id string ,score int )partitioned by (year string, month string ,day string)row format delimited fields terminated by "\t";#加载数据load data local inpath "/export/data/hive/score" overwrite into table stu_partition partition(year="2021", month="01", day="21");#查询所有的分区show partitions stu_partition;#分桶表 注意创建分区表后数据只能通过insert into 插入,不能通过hdfs ddfs 或者load 方式导入create table course_buk (c_id string,c_name string,t_id string) clustered by (c_id) into 3 bucketsrow format delimited fields terminated by "\t";#导入数据set hive.enforce.bucketing=true;set mapreduce.job.reduces=3;insert overwrite table course_buk select * from course cluster by(c_id);#排序order by 只会产生一个mapReduce 慎用!慎用!!慎用!!!sort by maptask 每个mr内部排序(局部有序)distribute by 类似MR中的Partitioner 通常和sort by 一起使用当 distribute by 和sort by 同时使用并且字段相同时可以用cluster by 代替
2.函数
#四舍五入select round(3.6415);#2代表精度位数select round(3.1415,2) ;#向下取整select floor(3.6415);#向上取整函数select ceil(3.1415926) ;#随机数select rand();#幂运算函数: powselect pow(2,4) ; #16#绝对值select abs(-3.9);#字符串长度select length('aa');#字符串反转select reverse("abcd");#字符串拼接select concat("hello","world");#指定分隔符 拼接select concat_ws(":","key","value");#字符串截取函数 substr(string A, int start),substring(string A, int start)# substr(string A, int start, int len),substring(string A, intstart, int len)#字符串转大写函数:upper,ucase upper(string A) ucase(string A)转小写 lower(string A) lcase(string A)#去空格函数:trim 左边去空格函数:ltrim 右边去空格函数:rtrim#正则表达式替换函数:regexp_replace regexp_replace(string A, string B, string C)select regexp_replace('foobar', 'oo|ar', '');#URL解析函数:parse_url parse_url(string urlString, string partToExtract [, stringkeyToExtract])说明:返回URL中指定的部分。partToExtract的有效值为:HOST, PATH, QUERY, REF, PROTOCOL, AUTHORITY, FILE, and USERINFO.select parse_url('http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1', 'HOST');#分割字符串函数: split 按照pat字符串分割str,会返回分割后的arrayselect split("ab,cd,ef",',');日期函数#获取当前UNIX时间戳函数:unix_timestampselect unix_timestamp(); unix_timestamp(string date)# unix_timestamp(string date, string pattern) from_unixtime(bigint unixtime,[string format])select from_unixtime(1598079966,'yyyy-MM-dd HH:mm:ss');#返回日期时间字段中的日期部分 to_date(string timestamp) 返回日期中的年: year(string date)#返回日期中的月份:month (string date) day (string date) 返回日期在当前的周数:weekofyear (string date)#返回结束日期减去开始日期的天数 datediff(string enddate, string startdate)#返回开始日期startdate增加days天后的日期 date_add(string startdate, int days)#返回开始日期startdate减少days天后的日期。 date_sub (string startdate, int days)逻辑判断select if(1==2,100,200);#case 判断select case when 1=2 then 'tom' when 2=2 then 'mary' else'tim' end ;#强转cast(表达式 as 数据类型)select cast(12.35 as int);select cast('20190607' as int)select cast('2020-12-05' as date);
DDL:https://cwiki.apache.org/confluence/display/Hive/LanguageManual+DDL
