作业题
1、找出全部夺得3连贯的队伍
WITH tmp as (
SELECT team, (year - row_number() over (partition by team order by year)) num
FROM t1
)
SELECT team
FROM tmp
GROUP BY team, num
HAVING COUNT(*) = 3;
[
](https://blog.csdn.net/weixin_44847293/article/details/109692802)
2、找出每个id在在一天之内所有的波峰与波谷值
select id,time,price,case when price >p1 and price >p2 then ‘波峰’ when price
select id,time,price,LAG(price,1,price) over(partition by id order by id) as p1,
LEAD(price,1,price) over(partition by id order by id) as p2
from t2) t3
where (price >p1 and price >p2) or ( price <p1 and price <p2 );
3、写SQL
3.1、每个id浏览时长、步长
select id,sum(timeStep) sum_time,max(rank) sum_step
from (select id, dt,browseid,row_number() over (partition by id order by dt) rank,
(unix_timestamp(dt, ‘yyyy/MM/dd HH:mm’) -
unix_timestamp(nvl(lag(dt) over (partition by id order by dt),dt), ‘yyyy/MM/dd HH:mm’))/60 as timeStep from t3) tmp1
group by id;
3.2、如果两次浏览之间的间隔超过30分钟,认为是两个不同的浏览时间;再求每个id浏 览时长、步长
select firstType,id,(max(unix_timestamp(dt, ‘yyyy/MM/dd HH:mm’))
- min(unix_timestamp(dt, ‘yyyy/MM/dd HH:mm’)))/60 as period,count(id) step
from (select id, dt,browseid,rank,minuxBefore,type,
sum(type) over (partition by id order by dt rows
between unbounded preceding and current row) as firstType
from(select id, dt,browseid,rank,minuxBefore,type
from (select id, dt,browseid,
row_number() over (partition by id order by dt) rank,
(unix_timestamp(dt, ‘yyyy/MM/dd HH:mm’) - unix_timestamp(nvl(lag(dt) over (partition by id order by dt),dt), ‘yyyy/MM/dd HH:mm’))/60 minuxBefore,
case when (unix_timestamp(dt, ‘yyyy/MM/dd HH:mm’)
- unix_timestamp(nvl(lag(dt) over (partition by id order by dt),dt), ‘yyyy/MM/dd HH:mm’))/60 >=30 then 1
else 0
end type
from t3
)t4)t5)t6 group by id,firstType;
备注:请仔细阅读计算规则
id dt browseid id:唯一的用户id dt:用户在这个时间点点击进入了一个页面 browseid:用户浏览了哪个页面 简化数据(以下为某个用户,在一天内的浏览记录): 1 08:20 1.html 1 08:23 2.html 1 08:24 3.html 1 08:40 4.html 1 09:33 5.html 1 09:40 6.html 1 09:30 7.html 1 09:36 8.html 1 09:37 9.html 1 09:41 a.html 3.1、每个id浏览时长、步长 用户1的浏览时长 = 09:41 - 08:20 = 81分钟 用户1的浏览步长 = count数 = 10次 3.2、如果两次浏览之间的间隔超过30分钟,认为是两个不同的浏览时间;再求每个id浏览时长、步长 用户1在 8:40 - 09:30 的间隔超过了30。生产中认为: 用户1在 08:20 - 08:40 浏览一次网站。这次浏览时长为20分钟,步长为4 用户1在 09:30 - 09:41 又浏览一次网站。这次浏览时长为11分钟,步长为6 对于测试数据SQL1的结果: 934e8bee978a42c7a8dbb4cfa8af0b4f 104.0 13 对于测试数据SQL2的结果: 934e8bee978a42c7a8dbb4cfa8af0b4f 32.0 6 934e8bee978a42c7a8dbb4cfa8af0b4f 35.0 7 |
|||||
---|---|---|---|---|---|