1. 需求描述
针对销售数据,完成统计:
1. 按年统计销售额
2. 销售金额在 10W 以上的订单
3. 每年销售额的差值
4. 年度订单金额前10位(年度、订单号、订单金额、排名)
5. 季度订单金额前10位(年度、季度、订单id、订单金额、排名)
6. 求所有交易日中订单金额最高的前10位
7. 每年度销售额最大的交易日
8. 年度最畅销的商品(即每年销售金额最大的商品)
2. 数据说明
日期表(dimdate) | ||
---|---|---|
dt | date | 日期 |
yearmonth | int | 年月 |
year | smallint | 年 |
month | tinyint | 月 |
day | tinyint | 日 |
week | tinyint | 周几 |
weeks | tinyint | 第几周 |
quat | tinyint | 季度 |
tendays | tinyint | 旬 |
halfmonth | tinyint | 半月 |
订单表(sale) | ||
---|---|---|
orderid | string | 订单号 |
locationid | string | 交易位置 |
dt | date | 交易日期 |
订单销售明细表(saledetail) | ||
---|---|---|
orderid | string | 订单号 |
rownum | int | 行号 |
itemid | string | 货品 |
num | int | 数量 |
price | double | 单价 |
amount | double | 金额 |
3. 实现
步骤一:创建表
将数据存放在ORC文件中
-- createtable.hql
drop database sale cascade;
create database if not exists sale;
create table sale.dimdate_ori(
dt date,
yearmonth int,
year smallint,
month tinyint,
day tinyint,
week tinyint,
weeks tinyint,
quat tinyint,
tendays tinyint,
halfmonth tinyint
)
row format delimited fields terminated by ",";
create table sale.sale_ori(
orderid string,
locationid string,
dt date
)
row format delimited fields terminated by ",";
create table sale.saledetail_ori(
orderid string,
rownum int,
goods string,
num int,
price double,
amount double
)
row format delimited fields terminated by ",";
create table sale.dimdate(
dt date,
yearmonth int,
year smallint,
month tinyint,
day tinyint,
week tinyint,
weeks tinyint,
quat tinyint,
tendays tinyint,
halfmonth tinyint
)
stored as orc;
create table sale.sale(
orderid string,
locationid string,
dt date
)
stored as orc;
create table sale.saledetail(
orderid string,
rownum int,
goods string,
num int,
price double,
amount double
)
stored as orc;
hive -f createtable.hql
步骤二:导入数据
-- 加载数据
use sale;
load data local inpath "/root/data/tbDate.dat"
overwrite into table dimdate_ori;
load data local inpath "/root/data/tbSale.dat"
overwrite into table sale_ori;
load data local inpath "/root/data/tbSaleDetail.dat"
overwrite into table saledetail_ori;
-- 导入数据
insert into table dimdate select * from dimdate_ori;
insert into table sale select * from sale_ori;
insert into table saledetail select * from saledetail_ori;
hive -f loaddata.hql
步骤三:SQL实现
1、按年统计销售额
SELECT year(B.dt) year, round(sum(A.amount)/10000, 2) amount
FROM saledetail A
join sale B
on A.orderid=B.orderid
group by year(B.dt);
2、销售金额在 10W 以上的订单
SELECT orderid, round(sum(amount), 2) amount
FROM saledetail
group by orderid
having sum(amount) > 100000
3、每年销售额的差值
SELECT year, round(amount, 2) amount,
round(lag(amount) over (ORDER BY year), 2) prioramount ,
round(amount - lag(amount) over (ORDER BY year), 2) diff
from (SELECT year(B.dt) year, sum(A.amount) amount
from saledetail A
join sale B
on A.orderid=B.orderid
group by year(B.dt) ) tmp;
4、年度订单金额前10位(年度、订单号、订单金额、排名)
-- 方法一
SELECT dt, orderid, amount, rank
from (SELECT dt, orderid, amount,
dense_rank() over(PARTITION BY dt ORDER BY amount desc) rank
from (SELECT year(B.dt) dt, A.orderid, sum(A.amount) amount
from saledetail A
join sale B
on A.orderid=B.orderid
GROUP BY year(B.dt), A.orderid ) tmp1
) tmp2
where rank <= 10;
-- 方法二
with tmp as (
SELECT year(B.dt) dt, A.orderid, sum(A.amount) amount
from saledetail A
join sale B
on A.orderid=B.orderid
GROUP BY year(B.dt), A.orderid
)
SELECT dt, orderid, amount, rank
from (SELECT dt, orderid, amount,
dense_rank() over(PARTITION BY dt ORDER BY amount desc) rank
from tmp
) tmp2
where rank <= 10;
5、季度订单金额前10位(年度、季度、订单id、订单金额、排名)
-- 方法一
with tmp as (
select C.year, C.quat, A.orderid, round(sum(B.amount), 2) amount
from sale A
join saledetail B
on A.orderid=B.orderid
join dimdate C
on A.dt=C.dt
group by C.year, C.quat, A.orderid
)
select year, quat, orderid, amount, rank
from (select year, quat, orderid, amount,
dense_rank() over (partition by year, quat order by amount desc) rank
from tmp
) tmp1
where rank <= 10;
-- 方法二
with tmp as(
select year(A.dt) year,
case when month(A.dt) <= 3 then 1
when month(A.dt) <= 6 then 2
when month(A.dt) <= 9 then 3
else 4 end quat,
A.orderid,
round(sum(B.amount), 2) amount
from sale A
join saledetail B
on A.orderid = B.orderid
group by year(A.dt),
case when month(A.dt) <= 3 then 1
when month(A.dt) <= 6 then 2
when month(A.dt) <= 9 then 3
else 4 end,
A.orderid
)
select year, quat, orderid, amount, rank
from (select year, quat, orderid, amount,
dense_rank() over (partition by year, quat order by amount desc) rank
from tmp
) tmp1
where rank <= 10;
-- 方法三。求季度
select floor(month(dt/3.1)) + 1;
with tmp as (
select year(A.dt) year,
floor(month(A.dt)/3.1) + 1 quat,
A.orderid,
round(sum(B.amount), 2) amount
from sale A
join saledetail B
on A.orderid=B.orderid
group by year(A.dt), floor(month(A.dt)/3.1) + 1, A.orderid
)
select year, quat, orderid, amount, rank
from (select year, quat, orderid, amount,
dense_rank() over (partition by year, quat order by amount desc) rank
from tmp
) tmp1
where rank <= 10;
6、求所有交易日中订单金额最高的前10位
topN问题:
1、基础数据
2、上排名函数
3、解决N的问题
with tmp as (
select A.dt, A.orderid, round(sum(B.amount), 2) amount
from sale A
join saledetail B
on A.orderid=B.orderid
group by A.dt, A.orderid
)
select dt, orderid, amount, rank
from (select dt, orderid, amount,
dense_rank() over(order by amount desc) rank from tmp
) tmp1
where rank <= 10;
7、每年度销售额最大的交易日
with tmp as (
select A.dt, round(sum(B.amount), 2) amount
from sale A
join saledetail B
on A.orderid=B.orderid
group by A.dt
)
select year(dt) year, max(amount) dayamount
from tmp
group by year(dt);
8、年度最畅销的商品(即每年销售金额最大的商品)
with tmp as (
select year(B.dt) year, goods, round(sum(amount),2) amount
from saledetail A
join sale B
on A.orderid=B.orderid
group by year(B.dt), goods
)
select year, goods, amount
from (select year, goods, amount,
dense_rank() over (partition by year order by amount desc) rank
from tmp
) tmp1
where rank = 1;