1. 需求描述

针对销售数据,完成统计:
1. 按年统计销售额
2. 销售金额在 10W 以上的订单
3. 每年销售额的差值
4. 年度订单金额前10位(年度、订单号、订单金额、排名)
5. 季度订单金额前10位(年度、季度、订单id、订单金额、排名)
6. 求所有交易日中订单金额最高的前10位
7. 每年度销售额最大的交易日
8. 年度最畅销的商品(即每年销售金额最大的商品)

2. 数据说明

日期表(dimdate)
dt date 日期
yearmonth int 年月
year smallint
month tinyint
day tinyint
week tinyint 周几
weeks tinyint 第几周
quat tinyint 季度
tendays tinyint
halfmonth tinyint 半月
订单表(sale)
orderid string 订单号
locationid string 交易位置
dt date 交易日期
订单销售明细表(saledetail)
orderid string 订单号
rownum int 行号
itemid string 货品
num int 数量
price double 单价
amount double 金额

3. 实现

步骤一:创建表

将数据存放在ORC文件中

  1. -- createtable.hql
  2. drop database sale cascade;
  3. create database if not exists sale;
  4. create table sale.dimdate_ori(
  5. dt date,
  6. yearmonth int,
  7. year smallint,
  8. month tinyint,
  9. day tinyint,
  10. week tinyint,
  11. weeks tinyint,
  12. quat tinyint,
  13. tendays tinyint,
  14. halfmonth tinyint
  15. )
  16. row format delimited fields terminated by ",";
  17. create table sale.sale_ori(
  18. orderid string,
  19. locationid string,
  20. dt date
  21. )
  22. row format delimited fields terminated by ",";
  23. create table sale.saledetail_ori(
  24. orderid string,
  25. rownum int,
  26. goods string,
  27. num int,
  28. price double,
  29. amount double
  30. )
  31. row format delimited fields terminated by ",";
  32. create table sale.dimdate(
  33. dt date,
  34. yearmonth int,
  35. year smallint,
  36. month tinyint,
  37. day tinyint,
  38. week tinyint,
  39. weeks tinyint,
  40. quat tinyint,
  41. tendays tinyint,
  42. halfmonth tinyint
  43. )
  44. stored as orc;
  45. create table sale.sale(
  46. orderid string,
  47. locationid string,
  48. dt date
  49. )
  50. stored as orc;
  51. create table sale.saledetail(
  52. orderid string,
  53. rownum int,
  54. goods string,
  55. num int,
  56. price double,
  57. amount double
  58. )
  59. stored as orc;
  60. hive -f createtable.hql

步骤二:导入数据

-- 加载数据
use sale;
load data local inpath "/root/data/tbDate.dat"
overwrite into table dimdate_ori;
load data local inpath "/root/data/tbSale.dat"
overwrite into table sale_ori;
load data local inpath "/root/data/tbSaleDetail.dat"
overwrite into table saledetail_ori;

-- 导入数据
insert into table dimdate select * from dimdate_ori;
insert into table sale select * from sale_ori;
insert into table saledetail select * from saledetail_ori;

hive -f loaddata.hql

步骤三:SQL实现

1、按年统计销售额

SELECT year(B.dt) year, round(sum(A.amount)/10000, 2) amount
  FROM saledetail A 
    join sale B
    on A.orderid=B.orderid
group by year(B.dt);

2、销售金额在 10W 以上的订单

SELECT orderid, round(sum(amount), 2) amount
  FROM saledetail 
  group by orderid
having sum(amount) > 100000

3、每年销售额的差值

SELECT year, round(amount, 2) amount, 
       round(lag(amount) over (ORDER BY year), 2) prioramount ,
       round(amount - lag(amount) over (ORDER BY year), 2) diff
 from (SELECT year(B.dt) year, sum(A.amount) amount 
         from saledetail A
        join sale B
        on A.orderid=B.orderid
       group by year(B.dt) ) tmp;

4、年度订单金额前10位(年度、订单号、订单金额、排名)

-- 方法一 
SELECT dt, orderid, amount, rank 
  from (SELECT dt, orderid, amount,
               dense_rank() over(PARTITION BY dt ORDER BY amount desc) rank 
          from (SELECT year(B.dt) dt, A.orderid, sum(A.amount) amount
                  from saledetail A 
                 join sale B
                 on A.orderid=B.orderid
                GROUP BY year(B.dt), A.orderid ) tmp1
       ) tmp2
where rank <= 10;

-- 方法二
with tmp as (
  SELECT year(B.dt) dt, A.orderid, sum(A.amount) amount
    from saledetail A
   join sale B
   on A.orderid=B.orderid
  GROUP BY year(B.dt), A.orderid
)

SELECT dt, orderid, amount, rank
  from (SELECT dt, orderid, amount, 
          dense_rank() over(PARTITION BY dt ORDER BY amount desc) rank 
        from tmp 
       ) tmp2
where rank <= 10;

5、季度订单金额前10位(年度、季度、订单id、订单金额、排名)

-- 方法一
with tmp as (
  select C.year, C.quat, A.orderid, round(sum(B.amount), 2) amount
    from sale A
   join saledetail B
   on A.orderid=B.orderid
   join dimdate C
   on A.dt=C.dt
  group by C.year, C.quat, A.orderid
)

select year, quat, orderid, amount, rank
  from (select year, quat, orderid, amount,
               dense_rank() over (partition by year, quat order by amount desc) rank
         from tmp
       ) tmp1 
where rank <= 10;

-- 方法二
with tmp as(
  select year(A.dt) year,
         case when month(A.dt) <= 3 then 1
              when month(A.dt) <= 6 then 2
              when month(A.dt) <= 9 then 3
              else 4 end quat,
         A.orderid, 
         round(sum(B.amount), 2) amount 
  from sale A
    join saledetail B
    on A.orderid = B.orderid
  group by year(A.dt),
           case when month(A.dt) <= 3 then 1
                when month(A.dt) <= 6 then 2
                when month(A.dt) <= 9 then 3
                else 4 end,
           A.orderid
)

select year, quat, orderid, amount, rank
  from (select year, quat, orderid, amount,
               dense_rank() over (partition by year, quat order by amount desc) rank
         from tmp
       ) tmp1
where rank <= 10;

-- 方法三。求季度
select floor(month(dt/3.1)) + 1;

with tmp as (
  select year(A.dt) year,
         floor(month(A.dt)/3.1) + 1 quat,
         A.orderid,
         round(sum(B.amount), 2) amount
    from sale A 
     join saledetail B
     on A.orderid=B.orderid
  group by year(A.dt), floor(month(A.dt)/3.1) + 1, A.orderid
)

select year, quat, orderid, amount, rank
  from (select year, quat, orderid, amount,
               dense_rank() over (partition by year, quat order by amount desc) rank
          from tmp 
       ) tmp1
where rank <= 10;

6、求所有交易日中订单金额最高的前10位

topN问题:
1、基础数据
2、上排名函数
3、解决N的问题

with tmp as (
  select A.dt, A.orderid, round(sum(B.amount), 2) amount
    from sale A 
      join saledetail B
      on A.orderid=B.orderid
  group by A.dt, A.orderid
)

select dt, orderid, amount, rank
  from (select dt, orderid, amount,
               dense_rank() over(order by amount desc) rank from tmp
       ) tmp1
where rank <= 10;

7、每年度销售额最大的交易日

with tmp as (
  select A.dt, round(sum(B.amount), 2) amount
    from sale A
      join saledetail B
      on A.orderid=B.orderid
  group by A.dt
)

select year(dt) year, max(amount) dayamount 
  from tmp 
  group by year(dt);

8、年度最畅销的商品(即每年销售金额最大的商品)

with tmp as (
  select year(B.dt) year, goods, round(sum(amount),2) amount
    from saledetail A
   join sale B
   on A.orderid=B.orderid
  group by year(B.dt), goods
)

select year, goods, amount
  from (select year, goods, amount,
               dense_rank() over (partition by year order by amount desc) rank
          from tmp
       ) tmp1
where rank = 1;