User_id,subject_id,score
(1)求每个学科下的学生平均分数
(2)求每个学科下最高分数的学生
select subject_id, avg(score) as avg_score
from table_a
group by subject_id
select *
from (
select *,rank()over(partition by subject_id order by score desc) as flag
from table_a
) as t1
where flag=1
order by subject_id;
*因为窗口函数的执行顺序在where之后,所以窗口函数不能出现在where筛选语句中,必须调用表格,再筛选。
查询每个科目下,前1名同学的成绩和当前同学成绩的差值
select stud_id, lesson_id,score,pre_score,(score-pre_score) as diff
from (
select stud_id,lesson_id,score,lag(score)over(partition by lesson_id order by score) as pre_score
from table_a
) as t
order by lesson_id, score;
截止到当前成绩,按照日期排序查询第1个和最后1个的同学分数
第一种方法:窗口函数first_value和last_value
头尾函数:FIRST_VALUE(expr),LAST_VALUE(expr)
用途:返回第一个(FIRST_VALUE(expr)
)或最后一个(LAST_VALUE(expr)
)expr的值
select stud_id,lesson_id,score,create_time,
first_value(score)over(partition by lesson_id order by create_time) as first_score,
last_value(score)over(partition by lesson_id order by create_time) as last_score
from table_a
order by lesson_id;
第二种方法:找出每门课下满足日期最大、最小的学生,利用union联合两张表
select stud_id,lesson_id,score,create_time
from table_a as a1
where create_time = (
select max(create_time) from table_a as a2 where a1.lesson_id=a2.lesson_id)#相关子查询, 找出每门课的成绩最晚日期
union
select stud_id,lesson_id,score,create_time
from table_a as t1
where create_time=(
select min(create_time) from table_a t2 where t1.lesson_id=t2.lesson_id)#相关子查询, 找出每门课的成绩最早日期
第三种方法不可行(尝试在第二种的基础上不用union,而是在where中用in使得create_time 满足日期最大、或者日期最小)因为max和min生成了2列,而不是1列多结果
select stud_id,lesson_id,score,create_time
from table_a as a1
where create_time in (
select max(create_time), min(create_time)
from table_a as a2
where a1.lesson_id=a2.lesson_id
)
#相关子查询
#找出每门课成绩记录中的最早、最晚日期
case专题-商品订单数据
数据表:
订单表orders,大概字段有(orderid’订单号,’_user_id‘用户编号’, order_pay‘订单金额’ , order_time‘下单时间’,’商品一级类目commodity_level_I’,’商品二级类目commodity_level_2’)
1.求最近7天内每一个一级类目下成交总额排名前3的二级类目:
select *
from (
select commodity_level1,
commodity_level2,
total_money,
rank()over(partition by commodity_level1 order by total_money desc) as flag
from (
select commodity_level1,
commodity_level2,
sum(order_pay) as total_money
from table_a
where datediff(now(),order_time)<=7
group by commodity_level2
) as t1
) as t2
where flag<=3
order by commodity_level1, flag;
注:now()=’2020-01-29 09:10:00’
curdate()=’2020-01-29’
curtime()=’09:10:00’
datediff是计算两个日期之前的天数差别
形式为datediff(date1,date2)或者datediff(日期时间1,日期时间2)
差值为前-后
timestampdiff是计算两个时间之间的间隔差别,可以自定义差值以秒/分/小时/日/月/年等等间隔来计算
形式为datediff(minute, 小日期时间,大日期时间)
差值为后-前,
eg:可以用来计算年龄,当前时间减去出生日期
小疑问:group by应该是不能和窗口函数连用,因为group by对每组只返回一个值,而窗口函数是对每组中的每条记录都返回一个值
select act_id,
count(ord_id) as '总订单数',
datediff(now(),min(a.create_time))+1 as '时间间隔',
count(ord_id)/(datediff(now(), min(a.create_time))+1) as '平均每日产生的订单数'
from act_usr a
join ord as o
on a.user_id=o.user_id and a.create_time<=o.create_time
group by act_id;
select date(reg_datetime) as reg_date, count(distinct user_id) as new_reg_amt
from (
select user_id, min(log_time) as reg_datetime
from tracking_log
group by user_id
) as t1
group by date(reg_datetime)
order by date(reg_datetime);
craete table reg_log as
select user_id,date(min(log_time)) as reg_date
from tracking_log
group by user_id;
select reg_date,
datediff(date(log_time),r.reg_date) as '时间间隔',
count(distinct t.user_id) as '留存数',
count(distinct t.user_id)/(select count(*) from reg_log as r1 where r.reg_date=r1.reg_date) as '留存率'
from reg_log as r join tracking_log as t
on r.user_id=t.user_id and datediff(date(log_time),r.reg_date) in (1,29)
group by reg_date,datediff(date(log_time),r.reg_date)
select reg_date,'时间间隔','留存数'/ '新增访客数' as '留存率'
from (
select reg_date,count(*) as '新增访客数'
from reg_log
group by reg
) as t1
join (
select reg_date,
datediff(date(t.log_time),r.reg_date) as '时间间隔',
count(distinct t.user_id) as '留存数'
from reg_log as r
join tracking_log t
on r.user_id=t.user_id and datediff(date(t.log_time),r.reg_date) in (1,29)
group by reg, datediff(date(t.log_time),r.reg_date)
) as t2
on t1.reg_date=t2.reg_date
order by reg_date,'时间间隔';
case专题-学生成绩分析
查询每一科目成绩最高和最低分数的学生,输出courseid,studentid,score
select *
from enrollments as t1
where grade=(select max(grade) from enrollments as t2 where t1.course=t2.course)
union
select *
from enrollments as a1
where grade=(select min(grade) from enrollments as a2 where a1.course=a2.course)
case专题-学生做题情况分析
表t:做题日期(time),学生id(stuid),_题目id(exer_id)
统计10.1-10.10每天做新题的人的数量,重点在每天
思路:
1)找到每个学生每道题的第一次做题时间,即找出min(time)eg:用户登记表,最小登陆日期作为新注册表
2)作为临时表,查找做新题日期数=10
3)筛选出10内每天做新题的学生后,计算学生的人数
select count(*) cnt
from (
select stu_id, count(distinct first_time) as date_cnt
from (
select stu_id,exer_id,min(time) as first_time
from exer_log
where time between '2020-10-01' and '2020-10-10'
group by stu_id,exer_id
) as t1
group by stu_id
having date_cnt=10
) as t2