1.基本查询

  1. 创建数据库
  2. create database if not exists heima ;
  3. show databases;
  4. desc database heima
  5. drop database myhive2 cascade;
  6. 表操作
  7. create table if not exists stu (id int ,name string) row format delimited fields terminated by "\t";
  8. #修改表名
  9. alter table stu2 rename to st22;
  10. #修改字段
  11. alter table stu22 change column id id_string string;
  12. #清空数据
  13. truncate table table_name
  14. #导出数据
  15. insert overwrite local directory "/export/data/hive/download" select * from scroe;
  16. #导出指定格式 没有local 则导出之hdfs
  17. insert overwrite local directory "/export/data/hive/download" row format delimited fields terminated by "\t" select * from scroe ;
  18. hive -e "select * from myhive.score;" > /export/data/exporthive/score.txt
  19. 将结果作为一张新的表
  20. create table stu_copy as select * from stu;
  21. #copy包结构(只创建表)
  22. create table stu2 like stu;
  23. #展示表结构
  24. desc stu2 || desc formatted stu2;
  25. #根据hdfs创建内部表
  26. create external table if not exists sut_dfs_path (id int ,name string)
  27. row format delimited fields terminated by "\t"
  28. location "/export/data/hive/stu"
  29. #加载数据 注意local 模式为文件copy, hdfs 也就是不写local 则执行移动
  30. load data [local] inpath "/export/data/hive/stu" [overwrite] into table stu;
  31. #分区模式 注意 partitioned
  32. create table scroe (sId string,cId string ,score int) partitioned by (month string) row format delimited fields terminated by "\t";
  33. #加载数据
  34. load data local inpath '/export/data/hivedatas/score.csv' into table score partition (month='202006');
  35. #复杂类型
  36. create external table if not exists hive_array (name string,location_array array<string>)
  37. row format delimited fields terminated by "\t"
  38. collection items terminated by ","
  39. #查看数据长度
  40. select name,size(location_array) from hive_array;
  41. select name,location_array[0] from hive_array;
  42. select name,array_contains(location_array,"changchun") from hive_array;
  43. #分区
  44. create table if not exists score_partition(s_id string ,c_id string ,score int )
  45. partitioned by (year string, month string ,day string)
  46. row format delimited fields terminated by "\t";
  47. #加载数据
  48. load data local inpath "/export/data/hive/score" overwrite into table stu_partition partition(year="2021", month="01", day="21");
  49. #查询所有的分区
  50. show partitions stu_partition;
  51. #分桶表 注意创建分区表后数据只能通过insert into 插入,不能通过hdfs ddfs 或者load 方式导入
  52. create table course_buk (c_id string,c_name string,t_id string) clustered by (c_id) into 3 buckets
  53. row format delimited fields terminated by "\t";
  54. #导入数据
  55. set hive.enforce.bucketing=true;
  56. set mapreduce.job.reduces=3;
  57. insert overwrite table course_buk select * from course cluster by(c_id);
  58. #排序
  59. order by 只会产生一个mapReduce 慎用!慎用!!慎用!!!
  60. sort by maptask 每个mr内部排序(局部有序)
  61. distribute by 类似MR中的Partitioner 通常和sort by 一起使用
  62. distribute by sort by 同时使用并且字段相同时可以用cluster by 代替

2.函数

  1. #四舍五入
  2. select round(3.6415);
  3. #2代表精度位数
  4. select round(3.1415,2) ;
  5. #向下取整
  6. select floor(3.6415);
  7. #向上取整函数
  8. select ceil(3.1415926) ;
  9. #随机数
  10. select rand();
  11. #幂运算函数: pow
  12. select pow(2,4) ; #16
  13. #绝对值
  14. select abs(-3.9);
  15. #字符串长度
  16. select length('aa');
  17. #字符串反转
  18. select reverse("abcd");
  19. #字符串拼接
  20. select concat("hello","world");
  21. #指定分隔符 拼接
  22. select concat_ws(":","key","value");
  23. #字符串截取函数 substr(string A, int start),substring(string A, int start)
  24. # substr(string A, int start, int len),substring(string A, intstart, int len)
  25. #字符串转大写函数:upper,ucase upper(string A) ucase(string A)
  26. 转小写 lower(string A) lcase(string A)
  27. #去空格函数:trim 左边去空格函数:ltrim 右边去空格函数:rtrim
  28. #正则表达式替换函数:regexp_replace regexp_replace(string A, string B, string C)
  29. select regexp_replace('foobar', 'oo|ar', '');
  30. #URL解析函数:parse_url parse_url(string urlString, string partToExtract [, stringkeyToExtract])
  31. 说明:返回URL中指定的部分。partToExtract的有效值为:HOST, PATH, QUERY, REF, PROTOCOL, AUTHORITY, FILE, and USERINFO.
  32. select parse_url('http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1', 'HOST');
  33. #分割字符串函数: split 按照pat字符串分割str,会返回分割后的array
  34. select split("ab,cd,ef",',');
  35. 日期函数
  36. #获取当前UNIX时间戳函数:unix_timestamp
  37. select unix_timestamp(); unix_timestamp(string date)
  38. # unix_timestamp(string date, string pattern) from_unixtime(bigint unixtime,[string format])
  39. select from_unixtime(1598079966,'yyyy-MM-dd HH:mm:ss');
  40. #返回日期时间字段中的日期部分 to_date(string timestamp) 返回日期中的年: year(string date)
  41. #返回日期中的月份:month (string date) day (string date) 返回日期在当前的周数:weekofyear (string date)
  42. #返回结束日期减去开始日期的天数 datediff(string enddate, string startdate)
  43. #返回开始日期startdate增加days天后的日期 date_add(string startdate, int days)
  44. #返回开始日期startdate减少days天后的日期。 date_sub (string startdate, int days)
  45. 逻辑判断
  46. select if(1==2,100,200);
  47. #case 判断
  48. select case when 1=2 then 'tom' when 2=2 then 'mary' else'tim' end ;
  49. #强转cast(表达式 as 数据类型)
  50. select cast(12.35 as int);
  51. select cast('20190607' as int)
  52. select cast('2020-12-05' as date);

DDL:https://cwiki.apache.org/confluence/display/Hive/LanguageManual+DDL

第五章-Hive讲义.docx