— 建立数据表 create table user_trade ( uesr_name varchar ( 20 ),piece int , price double , pay_amount double ,goods_category varchar ( 20 ) , pay_time date );

— 累计计算函数 sum over

— 需求1)查询出2019年每月支付金额和当年累积支付总额

— 1\过滤出19年数据 select * from user_trade where year(pay_time)=2019; — 2\在1基础上group by 分组统计出月支付金额 select month(pay_time),sum(pay_amount) from user_trade where year(pay_time)=2019 group by month(pay_time); — 3\ 在2基础上增加窗口函数实现需求 SELECT a.MONTH, a.pay_amount, sum( pay_amount ) over ( ORDER BY a.MONTH )— 窗口函数,需要的数据就是2019所以不用分组,没有使用rows指定创库数据范围,默认当前行及其所有行 from ( SELECT MONTH ( pay_time ) MONTH, sum( pay_amount ) pay_amount FROM user_trade WHERE YEAR ( pay_time ) = 2019 GROUP BY MONTH ( pay_time ) ) a;— 作为子查询

— 需求2 查询出2018-2019年每月的支付总额和当年累积支付总额

SELECT a.YEAR, a.MONTH, a.pay_amount, sum( a.pay_amount ) over ( PARTITION BY a.YEAR ORDER BY a.MONTH ) from ( SELECT YEAR ( pay_time ) YEAR, MONTH ( pay_time ) MONTH, sum( pay_amount ) pay_amount FROM user_trade WHERE YEAR ( pay_time ) IN ( 2018, 2019 ) GROUP BY YEAR ( pay_time ), MONTH ( pay_time ) )a;

— 需求3: 查询出2019年每个月的近三月移动平均支付金额

— 操作rows窗口范围

select a.month, a.pay_amount,avg(a.pay_amount) over (order by a.month rows between 2 preceding and current row) from ( SELECT YEAR ( pay_time ) YEAR, MONTH ( pay_time ) MONTH, sum( pay_amount ) pay_amount FROM user_trade WHERE YEAR ( pay_time ) IN ( 2018, 2019 ) GROUP BY YEAR ( pay_time ), MONTH ( pay_time ) )a; — 窗口函数max min

— 需求4: 查询出每四个月的最大月总支付金额

— 1\把各月(yyyymm)月度支付总额算出来(sum) select a.month, a.pay_amount, max(a.pay_amount) over (order by a.month rows between 3 preceding and current row) FROM ( SELECT substring( pay_time, 1, 7 ) MONTH, sum( pay_amount ) pay_amount FROM user_trade GROUP BY substring( pay_time, 1, 7 ))a

排序函数

— 需求5: 2020年1月,购买商品品类数的用户排名

— 先统计出用户购买商品涉及到的品类数目 SELECT uesr_name, count( DISTINCT goods_category )category_count ,row_number() over() FROM user_trade WHERE substring( pay_time, 1, 7 ) = ‘2020-01’ GROUP BY uesr_name; — 在此基础上排名 SELECT uesr_name, count( DISTINCT goods_category ) category_count, row_number ( ) over ( ORDER BY count( DISTINCT goods_category ) ) order1, rank ( ) over ( ORDER BY count( DISTINCT goods_category ) ) order2, dense_rank ( ) over ( ORDER BY count( DISTINCT goods_category ) ) order3 — 三种排序方式 dense排序符合需求 FROM user_trade WHERE substring( pay_time, 1, 7 ) = ‘2020-01’ GROUP BY uesr_name;

— 需求6: 查询出将2020年2月的支付用户,按照支付金额分成5组后的结果

— 过滤基础数据 SELECT uesr_name, sum( pay_amount ), — 将支付总金额切成5份 ntile(5) over(order by sum( pay_amount ) desc) level FROM user_trade WHERE substring( pay_time, 1, 7 ) = ‘2020-02’ GROUP BY uesr_name;

— 需求7: 查询出2020年支付金额排名前30%的所有用户

SELECT a.uesr_name, a.pay_amount, a.LEVEL FROM ( SELECT uesr_name, sum( pay_amount ) pay_amount, ntile ( 10 ) over ( ORDER BY sum( pay_amount ) DESC ) LEVEL FROM user_trade WHERE YEAR ( pay_time ) = ‘2020’ GROUP BY uesr_name ) a WHERE a.LEVEL IN ( 1, 2, 3 );

— 偏移分析函数

SELECT user_name, pay_time, lag(pay_time,1,pay_time) over(partition by user_name order by pay_time) lag1, — 没有传入偏移量,那么默认就是1,找不到的话,此处也没有给默认值,为null lag(pay_time) over(partition by user_name order by pay_time) lag1_s, lag(pay_time,2,pay_time) over(partition by user_name order by pay_time) lag2, lag(pay_time,2) over(partition by user_name order by pay_time) lag2_s FROM user_trade WHERE user_name in (‘King’,’West’);

— 需求10: 查询出支付时间间隔超过100天的用户数

SELECT count( DISTINCT user_name ) FROM ( SELECT user_name, pay_time, lead ( pay_time, 1 ) over ( PARTITION BY user_name ORDER BY pay_time ) lead_time FROM user_trade ) a WHERE datediff( a.lead_time, a.pay_time ) > 100;

— 需求11: 查询出每年支付时间间隔最长的用户

select b.years,b.user_name,b.interal_days from ( select a.years, a.user_name, datediff(a.pay_time,a.lag_time) interal_days, — 年度内排名 rank()over(partition by a.years order by datediff(a.pay_time,a.lag_time) desc) rank1 from (select year( pay_time ) years, user_name, pay_time, lag ( pay_time ) over ( partition by user_name, year ( pay_time ) order by pay_time asc ) lag_time from user_trade)a)b WHERE b.rank1=1 ;