1、连续7天登录的用户

  1. -- 数据。uid dt status(1 正常登录,0 异常)
  2. 1 2019-07-11 1
  3. 1 2019-07-12 1
  4. 1 2019-07-13 1
  5. 1 2019-07-14 1
  6. 1 2019-07-15 1
  7. 1 2019-07-16 1
  8. 1 2019-07-17 1
  9. 1 2019-07-18 1
  10. 2 2019-07-11 1
  11. 2 2019-07-12 1
  12. 2 2019-07-13 0
  13. 2 2019-07-14 1
  14. 2 2019-07-15 1
  15. 2 2019-07-16 0
  16. 2 2019-07-17 1
  17. 2 2019-07-18 0
  18. 3 2019-07-11 1
  19. 3 2019-07-12 1
  20. 3 2019-07-13 1
  21. 3 2019-07-14 0
  22. 3 2019-07-15 1
  23. 3 2019-07-16 1
  24. 3 2019-07-17 1
  25. 3 2019-07-18 1
  26. -- 建表语句
  27. create table ulogin(
  28. uid int,
  29. dt date,
  30. status int
  31. )
  32. row format delimited fields terminated by ' ';
  33. -- 加载数据
  34. load data local inpath '/home/hadoop/data/ulogin.dat' into table ulogin;
  35. -- 连续值的求解,面试中常见的问题。这也是同一类,基本都可按照以下思路进行
  36. -- 1、使用 row_number 在组内给数据编号(rownum)
  37. -- 2、某个值 - rownum = gid,得到结果可以作为后面分组计算的依据
  38. -- 3、根据求得的gid,作为分组条件,求最终结果
  39. select uid, dt,
  40. date_sub(dt, row_number() over (partition by uid order by dt)) gid
  41. from ulogin
  42. where status=1;
  43. select uid, count(*) logincount
  44. from (select uid, dt,
  45. date_sub(dt, row_number() over (partition by uid order by dt)) gid
  46. from ulogin
  47. where status=1) t1
  48. group by uid, gid
  49. having logincount>=7;

2、编写sql语句实现每班前三名,分数一样并列,同时求出前三名按名次排序的分差

-- 数据。
sid class score 
1 1901 90
2 1901 90
3 1901 83
4 1901 60
5 1902 66
6 1902 23
7 1902 99
8 1902 67
9 1902 87

-- 待求结果数据如下:
class score rank lagscore 
1901 90 1 0
1901 90 1 0
1901 83 2 -7
1901 60 3 -23
1902 99 1 0
1902 87 2 -12
1902 67 3 -20

-- 建表语句
create table stu(
    sno int,
    class string,
    score int
)row format delimited fields terminated by ' ';
-- 加载数据
load data local inpath '/home/hadoop/data/stu.dat' into table stu;

-- 求解思路:
-- 1、上排名函数,分数一样并列,所以用dense_rank 
-- 2、将上一行数据下移,相减即得到分数差
-- 3、处理 NULL
with tmp as (
  select sno, class, score,
       dense_rank() over (partition by class order by score desc) as rank
from stu)
select class, score, rank,
       nvl(score - lag(score) over (partition by class order by score desc), 0) lagscore
  from tmp
 where rank<=3;

3、行 <=> 列

-- 数据:id course 
1 java
1 hadoop
1 hive
1 hbase
2 java
2 hive
2 spark
2 flink
3 java
3 hadoop
3 hive
3 kafka

-- 建表加载数据
create table rowline1(
  id string,
  course string
)row format delimited fields terminated by ' ';

load data local inpath '/root/data/data1.dat' into table rowline1;

-- 编写sql,得到结果如下(1表示选修,0表示未选修)
id java hadoop hive hbase spark flink kafka 
1   1     1     1     1     0     0     0 
2   1     0     1     0     1     1     0 
3   1     1     1     0     0     0     1

-- 使用case when;group by + sum
select id,
  sum(case when course="java" then 1 else 0 end) as java, 
  sum(case when course="hadoop" then 1 else 0 end) as hadoop, 
  sum(case when course="hive" then 1 else 0 end) as hive, 
  sum(case when course="hbase" then 1 else 0 end) as hbase, 
  sum(case when course="spark" then 1 else 0 end) as spark, 
  sum(case when course="flink" then 1 else 0 end) as flink, 
  sum(case when course="kafka" then 1 else 0 end) as kafka
from rowline1
group by id;
-- 数据。
id1 id2 flag 
a b 2
a b 1
a b 3
c d 6 
c d 8 
c d 8

-- 编写sql实现如下结果 
id1 id2 flag
a   b   2|1|3
c   d   6|8
-- 创建表 & 加载数据 
create table rowline2(
    id1 string,
    id2 string,
    flag int
) row format delimited fields terminated by ' ';
load data local inpath '/root/data/data2.dat' into table rowline2;
-- 第一步 将元素聚拢
select id1, id2, collect_set(flag) flag from rowline2 group by id1, id2;
select id1, id2, collect_list(flag) flag from rowline2 group by id1, id2;
select id1, id2, sort_array(collect_set(flag)) flag from rowline2 group by id1, id2;
-- 第二步 将元素连接在一起
select id1, id2, concat_ws("|", collect_set(flag)) flag
  from rowline2
group by id1, id2;
-- 这里报错,CONCAT_WS must be "string or array<string>"。加一个类型 转换即可
select id1, id2, concat_ws("|", collect_set(cast (flag as string))) flag
  from rowline2
group by id1, id2;

-- 创建表 rowline3
create table rowline3 as
select id1, id2, concat_ws("|", collect_set(cast (flag as string))) flag
  from rowline2
group by id1, id2;
-- 第一步:将复杂的数据展开
select explode(split(flag, "\\|")) flat from rowline3;
-- 第二步:lateral view 后与其他字段关联 
select id1, id2, newflag
  from rowline3 lateral view explode(split(flag, "\\|")) t1 as newflag;

lateralView: LATERAL VIEW udtf(expression) tableAlias AS
columnAlias (',' columnAlias)*
fromClause: FROM baseTable (lateralView)*