1.基本查询
创建数据库
create database if not exists heima ;
show databases;
desc database heima
drop database myhive2 cascade;
表操作
create table if not exists stu (id int ,name string) row format delimited fields terminated by "\t";
#修改表名
alter table stu2 rename to st22;
#修改字段
alter table stu22 change column id id_string string;
#清空数据
truncate table table_name
#导出数据
insert overwrite local directory "/export/data/hive/download" select * from scroe;
#导出指定格式 没有local 则导出之hdfs
insert overwrite local directory "/export/data/hive/download" row format delimited fields terminated by "\t" select * from scroe ;
hive -e "select * from myhive.score;" > /export/data/exporthive/score.txt
将结果作为一张新的表
create table stu_copy as select * from stu;
#copy包结构(只创建表)
create table stu2 like stu;
#展示表结构
desc stu2 || desc formatted stu2;
#根据hdfs创建内部表
create external table if not exists sut_dfs_path (id int ,name string)
row format delimited fields terminated by "\t"
location "/export/data/hive/stu"
#加载数据 注意local 模式为文件copy, hdfs 也就是不写local 则执行移动
load data [local] inpath "/export/data/hive/stu" [overwrite] into table stu;
#分区模式 注意 partitioned
create table scroe (sId string,cId string ,score int) partitioned by (month string) row format delimited fields terminated by "\t";
#加载数据
load data local inpath '/export/data/hivedatas/score.csv' into table score partition (month='202006');
#复杂类型
create external table if not exists hive_array (name string,location_array array<string>)
row format delimited fields terminated by "\t"
collection items terminated by ","
#查看数据长度
select name,size(location_array) from hive_array;
select name,location_array[0] from hive_array;
select name,array_contains(location_array,"changchun") from hive_array;
#分区
create table if not exists score_partition(s_id string ,c_id string ,score int )
partitioned by (year string, month string ,day string)
row format delimited fields terminated by "\t";
#加载数据
load data local inpath "/export/data/hive/score" overwrite into table stu_partition partition(year="2021", month="01", day="21");
#查询所有的分区
show partitions stu_partition;
#分桶表 注意创建分区表后数据只能通过insert into 插入,不能通过hdfs ddfs 或者load 方式导入
create table course_buk (c_id string,c_name string,t_id string) clustered by (c_id) into 3 buckets
row format delimited fields terminated by "\t";
#导入数据
set hive.enforce.bucketing=true;
set mapreduce.job.reduces=3;
insert overwrite table course_buk select * from course cluster by(c_id);
#排序
order by 只会产生一个mapReduce 慎用!慎用!!慎用!!!
sort by maptask 每个mr内部排序(局部有序)
distribute by 类似MR中的Partitioner 通常和sort by 一起使用
当 distribute by 和sort by 同时使用并且字段相同时可以用cluster by 代替
2.函数
#四舍五入
select round(3.6415);
#2代表精度位数
select round(3.1415,2) ;
#向下取整
select floor(3.6415);
#向上取整函数
select ceil(3.1415926) ;
#随机数
select rand();
#幂运算函数: pow
select pow(2,4) ; #16
#绝对值
select abs(-3.9);
#字符串长度
select length('aa');
#字符串反转
select reverse("abcd");
#字符串拼接
select concat("hello","world");
#指定分隔符 拼接
select concat_ws(":","key","value");
#字符串截取函数 substr(string A, int start),substring(string A, int start)
# substr(string A, int start, int len),substring(string A, intstart, int len)
#字符串转大写函数:upper,ucase upper(string A) ucase(string A)
转小写 lower(string A) lcase(string A)
#去空格函数:trim 左边去空格函数:ltrim 右边去空格函数:rtrim
#正则表达式替换函数:regexp_replace regexp_replace(string A, string B, string C)
select regexp_replace('foobar', 'oo|ar', '');
#URL解析函数:parse_url parse_url(string urlString, string partToExtract [, stringkeyToExtract])
说明:返回URL中指定的部分。partToExtract的有效值为:HOST, PATH, QUERY, REF, PROTOCOL, AUTHORITY, FILE, and USERINFO.
select parse_url('http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1', 'HOST');
#分割字符串函数: split 按照pat字符串分割str,会返回分割后的array
select split("ab,cd,ef",',');
日期函数
#获取当前UNIX时间戳函数:unix_timestamp
select unix_timestamp(); unix_timestamp(string date)
# unix_timestamp(string date, string pattern) from_unixtime(bigint unixtime,[string format])
select from_unixtime(1598079966,'yyyy-MM-dd HH:mm:ss');
#返回日期时间字段中的日期部分 to_date(string timestamp) 返回日期中的年: year(string date)
#返回日期中的月份:month (string date) day (string date) 返回日期在当前的周数:weekofyear (string date)
#返回结束日期减去开始日期的天数 datediff(string enddate, string startdate)
#返回开始日期startdate增加days天后的日期 date_add(string startdate, int days)
#返回开始日期startdate减少days天后的日期。 date_sub (string startdate, int days)
逻辑判断
select if(1==2,100,200);
#case 判断
select case when 1=2 then 'tom' when 2=2 then 'mary' else'tim' end ;
#强转cast(表达式 as 数据类型)
select cast(12.35 as int);
select cast('20190607' as int)
select cast('2020-12-05' as date);
DDL:https://cwiki.apache.org/confluence/display/Hive/LanguageManual+DDL