- 各部门工资最高的员工(难度:中等)
- 创建Employee 表,包含所有员工信息,每个员工有其对应的 Id, salary 和 department Id
- 创建Department 表,包含公司所有部门的信息
- 找出每个部门工资最高的员工 ```sql — 建表语句 create table Employee ( Id INTEGER PRIMARY KEY, Name VARCHAR(128), Salary BIGINT, DepartmentId INTEGER );
create table Department( Id INTEGER PRIMARY KEY, Name varchar(128) );
— 插入数据 insert into employee value(1, ‘Joe’, 70000, 1); insert into employee value(2, ‘Henry’, 80000, 2); insert into employee value(3, ‘Sam’, 60000, 2); insert into employee value(4, ‘Max’, 90000, 1);
insert into department value(1, ‘IT’); insert into department value (2, ‘Sales’);
— 查询语句 select d.name as Departname, e.Name, Salary from employee e left join department d on e.DepartmentId = d.Id where (e.DepartmentId, Salary) in ( select e.DepartmentId, max(Salary) as Salary from employee e group by DepartmentId ) ORDER BY d.Id;
执行结果如下图所示: <br />- **换座位(难度:中等)**- 小美是一所中学的信息科技老师,她有一张 seat 座位表,平时用来储存学生名字和与他们相对应的座位id;- 其中**纵列的id是连续递增的**;- 写出一个语句帮助小美改变**相邻俩个学生的座位**```sql-- 建表语句create table seat(id INT PRIMARY KEY AUTO_INCREMENT,student VARCHAR(128));-- 插入数据insert into seat value(1, 'Abbot');insert into seat value(2, 'Doris');insert into seat value(3, 'Emerson');insert into seat value(4, 'Green');insert into seat value(5, 'Jeams');-- 查询语句select(case-- 当前座位号为奇数且为最后一个则保持不变when mod(id, 2) = 1 and id = maxId then id-- 当前座位号为奇数且不为最后一个则加一when (mod(id, 2) = 1 and id != maxId) or mod(maxId, 2) = 0 then (id + 1)-- 偶数情况全部减一else id - 1end ) as id,studentfromseat, (select count(*) as maxId from seat) cntorder by id;
执行结果如下: 
- 连续出现的数字(难度:中等)
- 编写一个 SQL 查询,查找所有至少连续出现三次的数字 ```sql — 建表语句 create table number( Id INT PRIMARY KEY, Num INT NOT NULL );
— 插入数据 insert into number value(1, 1); insert into number value(2, 1); insert into number value(3, 1); insert into number value(4, 2); insert into number value(5, 1); insert into number value(6, 2); insert into number value(7, 2);
— 执行查询 select Num, count(*) as cnt from ( — Id 减去 rk 得到的值可以标记连续的元素,相同的gap的元素就是连续的 select Num, Id - rk as gap from ( — 按数字开窗求排名 select Id, Num, rank() over (partition by Num order by id) as rk from number ) as rk_table ) as gap_table group by Num, gap having cnt = 3;
结果如下:

- **连续出现的数字 II(难度:困难)**(**本题可以扩展到计算连续登录的用户这一场景**)
- 在上一题的基础上追加一个条件:**中间隔一个也算连续**;
- 查找所有至少连续出现三次的数字
```sql
-- 思路一:类似于上一题,两次开窗聚合
select
Num,
sum(cnt) as cnt_2
from (
select
Num,
cnt,
cast(gap as signed) - cast(rk_2 as signed) as gap_2
from (
select
Num,
gap,
cnt,
rank() over(partition by Num order by gap) as rk_2
from (
-- 连续数字表
select
Num,
gap,
count(*) as cnt
from (
-- Id 减去 rk 得到的值可以标记连续的元素,相同的gap的元素就是连续的
select
Num,
Id - rk as gap
from (
-- 按数字开窗求排名
select
Id,
Num,
rank() over (partition by Num order by id) as rk
from
number
) as rk_table
) as gap_table
group by
Num, gap
) as continue_1_table
) as rk_table_2
) as gap_table_2
group by
Num, gap_2
having
cnt_2 = 3;
-- 思路二:上一行的ID下移 求差值
select
Num,
-- max(id) - min(id) + 1 连续登录场景下用这个方式求解
count(*) as cnt
from (
select
Num,
Id,
sum(if(gap > 2, 1, 0)) over(partition by Num ORDER BY Id) as flag
from (
select
Num,
Id,
Id - LagId as gap
from (
-- 通过 lag 函数将上一行数据下移,第一行的上一行默认为0
select
Num,
Id,
lag(Id, 1, 0) over(partition by Num order by Id) as lagId
from
number
) as lag_table
) as gap_table
) as flag_table
group by
Num, flag
having
cnt = 3;
得到的结果如下: 
【补充】:
- 窗口函数Lag用于求当前行的前n个记录的值,其中第一个参数是目标值,第二个参数是前面的记录数量,第三个参数是没有前N个记录对应的默认值。
- 树节点 (难度:中等)
- 写一条查询语句打印节点id及对应的节点类型,按照节点Id排序
- 每个节点都是以下三种类型中的一种:
- Root: 如果节点是根节点;
- Leaf: 如果节点是叶子节点;
- Inner: 如果节点既不是根节点也不是叶子节点。 ```sql — 建表 create table tree( id INT PRIMARY KEY, p_id INT );
— 插入数据 insert into tree(id) values(1); insert into tree values(2, 1); insert into tree values(3, 1); insert into tree values(4, 2); insert into tree values(5, 2);
— 执行查询 — case when + 子查询 select id, case when p_id is NULL then ‘root’ when p_id is not NULL and id in (select distinct p_id from tree) then ‘inner’ else ‘leaf’ end as type from tree order by id;
结果如下: <br />
- **至少有五名直接下属的经理 (难度:中等)**
- Employee表包含所有员工及其上级的信息。每位员工都有一个Id,并且还有一个对应主管的Id(ManagerId);
- 针对Employee表,写一条SQL语句找出有5个下属的主管
```sql
-- 建表语句
DROP TABLE if exists Employee;
CREATE TABLE Employee
(
id INT,
name varchar(20),
department varchar(20),
managerid INT,
PRIMARY KEY (id)
);
-- 插入数据
INSERT INTO Employee VALUES (101, 'John', 'A', null);
INSERT INTO Employee VALUES (102, 'Dan', 'A', 101);
INSERT INTO Employee VALUES (103, 'James', 'A', 101);
INSERT INTO Employee VALUES (104, 'Amy', 'A', 101);
INSERT INTO Employee VALUES (105, 'Anne', 'A', 101);
INSERT INTO Employee VALUES (106, 'Ron', 'B', 101);
-- 执行查询
select
Name
from
Employee
where Id in (
select
Managerid
from
Employee
group by
Managerid
having
count(*) >=5
);
- 查询回答率最高的问题 (难度:中等)
- 写一条sql语句找出回答率最高的
question_id; ```sql — 建表语句 DROP TABLE IF EXISTS survey_log; CREATE TABLE survey_log ( uid INT, action VARCHAR (20), question_id INT, answer_id INT, q_num INT, TIMESTAMP INT );
- 写一条sql语句找出回答率最高的
— 插入数据 INSERT INTO survey_log VALUES (5, ‘show’, 285, NULL, 1, 123); INSERT INTO survey_log VALUES (5, ‘answer’, 285, 124124, 1, 124); INSERT INTO survey_log VALUES (5, ‘show’, 369, NULL, 2, 125); INSERT INTO survey_log VALUES (5, ‘skip’, 369, NULL, 2, 126);
— 执行查询 select question_id, sum(if(action = ‘answer’, 1, 0)) / sum(if(action = ‘skip’, 0, 1)) as answer_rate from survey_log group by question_id order by answer_rate desc limit 1;
- 各部门前3高工资的员工(难度:中等)
- 编写一个 SQL 查询,找出每个部门工资前三高的员工
```sql
-- 创建表
DROP TABLE if exists employee;
create table Employee (
Id INTEGER PRIMARY KEY,
Name VARCHAR(128),
Salary BIGINT,
DepartmentId INTEGER
);
-- 插入数据
insert into employee value(1, 'Joe', 70000, 1);
insert into employee value(2, 'Henry', 80000, 2);
insert into employee value(3, 'Sam', 60000, 2);
insert into employee value(4, 'Max', 90000, 1);
INSERT INTO employee value(5, 'Janet', 69000, 1);
INSERT INTO employee value(6, 'Randy', 85000, 1);
-- 执行查询
-- 窗口函数的应用
select
dp.Name as Department,
e.Name as Employee,
Salary
from (
select
Name,
Salary,
DepartmentId,
rank() over(partition by DepartmentId order by Salary desc) as rk
from
employee
) e inner join department dp on dp.Id = e.DepartmentId
where
rk <= 3
order by
Department,
Salary desc;
- 平面上最近距离 (难度: 困难)
- 写一条查询语句求出表中的点的最短距离并保留2位小数 ```sql — 建表语句 DROP TABLE IF EXISTS point_2d; CREATE TABLE point_2d ( x INT, y INT );
— 插入数据 INSERT INTO point_2d VALUES(-1, -1); INSERT INTO point_2d VALUES( 0, 0); INSERT INTO point_2d VALUES(-1, -2);
— 执行查询 select format(sqrt((x_1 - x_2) (x_1 - x_2) + (y_1 - y_2) (y_1 - y_2)), 2) as dist from ( select A.x as x_1, A.y as y_1, B.x as x_2, B.y as y_2 from point_2d A join point_2d B on (A.x != B.x or A.y != B.y) ) as dist_table order by dist limit 1;
注意这里格式化要用`format()`函数,因为产生的结果如果是整数的话使用`round()`函数则不会生成两个小数位。
- **行程和用户(难度:困难)**
- Trips 表中存所有出租车的行程信息:
- 每段行程有唯一键 Id;
- Client_Id 和 Driver_Id 是 Users 表中 Users_Id 的外键;
- Status 是枚举类型,枚举成员为 (‘completed’, ‘cancelled_by_driver’, ‘cancelled_by_client’)。
- Users 表存所有用户:
- 每个用户有唯一键 Users_Id;
- Banned 表示这个用户是否被禁止;
- Role 则是一个表示(‘client’, ‘driver’, ‘partner’)的枚举类型。
- 写一段 SQL 语句查出**2013年10月1日**至**2013年10月3日**期间非禁止用户的取消率,保留两位小数。
```sql
-- 建表语句
DROP TABLE if EXISTS Trips;
CREATE TABLE Trips
(Id INT,
Client_Id INT,
Driver_Id INT,
City_Id INT,
Status VARCHAR(30),
Request_at DATE,
PRIMARY KEY (Id));
DROP TABLE if EXISTS Users ;
CREATE TABLE Users
(Users_Id INT,
Banned VARCHAR(30),
Role VARCHAR(30),
PRIMARY KEY (Users_Id));
-- 插入数据
INSERT INTO Trips VALUES (1, 1, 10, 1, 'completed', '2013-10-1');
INSERT INTO Trips VALUES (2, 2, 11, 1, 'cancelled_by_driver', '2013-10-1');
INSERT INTO Trips VALUES (3, 3, 12, 6, 'completed', '2013-10-1');
INSERT INTO Trips VALUES (4, 4, 13, 6, 'cancelled_by_client', '2013-10-1');
INSERT INTO Trips VALUES (5, 1, 10, 1, 'completed', '2013-10-2');
INSERT INTO Trips VALUES (6, 2, 11, 6, 'completed', '2013-10-2');
INSERT INTO Trips VALUES (7, 3, 12, 6, 'completed', '2013-10-2');
INSERT INTO Trips VALUES (8, 2, 12, 12, 'completed', '2013-10-3');
INSERT INTO Trips VALUES (9, 3, 10, 12, 'completed', '2013-10-3');
INSERT INTO Trips VALUES (10, 4, 13, 12, 'cancelled_by_driver', '2013-10-3');
INSERT INTO Users VALUES (1, 'No', 'client');
INSERT INTO Users VALUES (2, 'Yes', 'client');
INSERT INTO Users VALUES (3, 'No', 'client');
INSERT INTO Users VALUES (4, 'No', 'client');
INSERT INTO Users VALUES (10, 'No', 'driver');
INSERT INTO Users VALUES (11, 'No', 'driver');
INSERT INTO Users VALUES (12, 'No', 'driver');
INSERT INTO Users VALUES (13, 'No', 'driver');
-- 执行查询
select
Request_at as Day,
format(sum(if(Status = 'completed', 0, 1))/count(*),2) as "Cancellation Rate"
from (
-- 找出非禁止用户的订单
select
Id,
Client_Id,
Status,
Request_at
from trips t left join (
select
Users_Id
from
users
where
Role = 'client' and Banned = 'No'
) usr on t.Client_Id = usr.Users_Id
) as usr_trips
group by
Request_at
order by
Request_at;
- 谁是明星带货主播(难度:中等)
- 如果某主播的某日销售额占比达到该平台当日销售总额的 90% 及以上,则称该主播为明星主播,当天也称为明星主播日;
- 请使用 SQL 完成如下计算:
- 2021年有多少个明星主播日? ```sql — 建表语句 DROP TABLE if EXISTS anchor_sales; CREATE TABLE anchor_sales (anchor_name VARCHAR(20), date INT, sales INT);
— 插入数据 INSERT INTO anchor_sales VALUES(‘A’, 20210101, 40000); INSERT INTO anchor_sales VALUES(‘B’, 20210101, 80000); INSERT INTO anchor_sales VALUES(‘A’, 20210102, 10000); INSERT INTO anchor_sales VALUES(‘C’, 20210102, 90000); INSERT INTO anchor_sales VALUES(‘A’, 20210103, 7500); INSERT INTO anchor_sales VALUES(‘C’, 20210103, 80000);
— 执行代码 select distinct date from ( select anchor_name, date, sales / ( select sum(sales) from anchor_sales as2 where as1.date = as2.date group by as2.date ) as sales_rate from anchor_sales as1 where year(date) = ‘2021’ ) as sales_table where sales_rate >= 0.9;
- 2021年有多少个明星主播?
```sql
select
distinct anchor_name
from (
select
anchor_name,
sales / (
select
sum(sales)
from
anchor_sales as2
where
as1.date = as2.date
group by
as2.date
) as sales_rate
from
anchor_sales as1
where
year(date) = '2021'
) as sales_table
where
sales_rate >= 0.9;
- 列转行应用 —— 求考试成绩(难度:中等)
- 假设 A B C 三位小朋友期末考试成绩如下所示:
+-----+-----------+------| | name| subject |score | +-----+-----------+------| | A | chinese | 99 | | A | math | 98 | | A | english | 97 | | B | chinese | 92 | | B | math | 91 | | B | english | 90 | | C | chinese | 88 | | C | math | 87 | | C | english | 86 | +-----+-----------+------|
- 假设 A B C 三位小朋友期末考试成绩如下所示:
- 使用SQL语句将以上乘积转换为如下形式:
+-----+-----------+------|---------| | name| chinese | math | english | +-----+-----------+------|---------| | A | 99 | 98 | 97 | | B | 92 | 91 | 90 | | C | 88 | 87 | 86 | +-----+-----------+------|---------|
-- 建表语句
DROP TABLE IF EXISTS `score`;
CREATE TABLE `score` (
`name` varchar(20) DEFAULT NULL,
`subject` varchar(20) DEFAULT NULL,
`score` int DEFAULT NULL
);
-- 插入数据
INSERT INTO score VALUES('A', 'chinese', 99);
INSERT INTO score VALUES('A', 'math', 98);
INSERT INTO score VALUES('A', 'english', 97);
INSERT INTO score VALUES('B', 'chinese', 92);
INSERT INTO score VALUES('B', 'math', 91);
INSERT INTO score VALUES('B', 'english', 90);
INSERT INTO score VALUES('C', 'chinese', 88);
INSERT INTO score VALUES('C', 'math', 87);
INSERT INTO score VALUES('C', 'english', 86);
-- 执行查询
-- 列转行用 UNION
select
name,
'chinese' as subject,
score
from
score
where
subject = 'chinese'
union
select
name,
'math' as subject,
score
from
score
where
subject = 'math'
union
select
name,
'english' as subject,
score
from
score
where
subject = 'english'
order by name;
- 列转行应用 —— 求比赛结果(难度:困难,含有聚合函数的列转行)
- 假设有如下比赛结果:
+--------------+-----+-----| | 比赛日期 | 胜 | 负 | +--------------+-----------+ | 2021-01-01 | 2 | 1 | | 2021-01-03 | 1 | 2 | +--------------+-----------+
- 假设有如下比赛结果:
- 请使用 SQL 将比赛结果转换为如下形式:
+--------------+-----------+ | cdate | result | +--------------+-----------+ | 2021-01-01 | 胜 | | 2021-01-01 | 胜 | | 2021-01-01 | 负 | | 2021-01-03 | 胜 | | 2021-01-03 | 负 | | 2021-01-03 | 负 | +------------+-------------+
-- 建表
DROP TABLE IF EXISTS col_row;
create table col_row
(`比赛日期` date
,`胜` int
,`负` int);
-- 插入数据
insert into col_row values ('2021-01-01', 2, 1);
insert into col_row values ('2021-01-03', 1, 2);
-- 执行查询
-- 含有聚合函数的列转行
select
cdate,
result
from (
select
*
from (
select
`比赛日期` as cdate,
'胜' as result,
`胜` as cnt
from
col_row
union
select
`比赛日期` as cdate,
'负' as result,
`负` as cnt
from
col_row
) table_1
union
select
*
from (
select
`比赛日期` as cdate,
'胜' as result,
`胜` - 1 as cnt
from
col_row
union
select
`比赛日期` as cdate,
'负' as result,
`负` - 1 as cnt
from
col_row
) table_2
where
cnt > 0
) as result_table
order by
cdate, result;
【补充】:
- 本题是针对最大胜场和最大负场均为2的情况,如果有有更多的最大胜负情况则需要按最大情况的值的数量的表进行UNION
- 行转列应用 —— 求考试成绩(难度:中等)
- 需求和列转行对应的题目相反
``sql -- 建表语句 DROP TABLE IF EXISTSscore2; CREATE TABLEscore2(namevarchar(20) DEFAULT NULL,subjectvarchar(20) DEFAULT NULL,score` int DEFAULT NULL );
— 插入数据 INSERT INTO score2 VALUES(‘A’, ‘chinese’, 99); INSERT INTO score2 VALUES(‘A’, ‘math’, 98); INSERT INTO score2 VALUES(‘A’, ‘english’, 97); INSERT INTO score2 VALUES(‘B’, ‘chinese’, 92); INSERT INTO score2 VALUES(‘B’, ‘math’, 91); INSERT INTO score2 VALUES(‘B’, ‘english’, 90); INSERT INTO score2 VALUES(‘C’, ‘chinese’, 88); INSERT INTO score2 VALUES(‘C’, ‘math’, 87); INSERT INTO score2 VALUES(‘C’, ‘english’, 86);
— 执行查询 — 行转列用 case when select name, max(if(subject=’chinese’, score, 0)) as chinese, max(if(subject=’english’, score, 0)) as english, max(if(subject=’math’, score, 0)) as math from score2 group by name;
- **行转列应用 —— 求比赛结果(难度:中等)**
- 需求和列转行对应的题目相反
```sql
-- 建表
DROP TABLE IF EXISTS row_col;
create table row_col
(cdate date,
result varchar(255));
-- 插入数据
insert into row_col values ('2021-01-01','胜');
insert into row_col values ('2021-01-01','负');
insert into row_col values ('2021-01-03','胜');
insert into row_col values ('2021-01-03','负');
insert into row_col values ('2021-01-01','胜');
insert into row_col values ('2021-01-03','负');
-- 执行查询
select
cdate as `比赛日期`,
sum(if(result='胜', 1, 0)) as `胜`,
sum(if(result='负', 1, 0)) as `负`
from
row_col
group by
cdate
order by
cdate;
- 连续登录(难度:困难)
- 有用户表行为记录表t_act_records表,包含两个字段:uid(用户ID),imp_date(日期):
- 计算2021年每个月,每个用户连续登录的最多天数 ```sql — 建表语句 DROP TABLE if EXISTS t_act_records; CREATE TABLE t_act_records (uid VARCHAR(20), imp_date DATE);
- 有用户表行为记录表t_act_records表,包含两个字段:uid(用户ID),imp_date(日期):
— 插入数据 INSERT INTO t_act_records VALUES(‘u1001’, 20210101); INSERT INTO t_act_records VALUES(‘u1002’, 20210101); INSERT INTO t_act_records VALUES(‘u1003’, 20210101); INSERT INTO t_act_records VALUES(‘u1003’, 20210102); INSERT INTO t_act_records VALUES(‘u1004’, 20210101); INSERT INTO t_act_records VALUES(‘u1004’, 20210102); INSERT INTO t_act_records VALUES(‘u1004’, 20210103); INSERT INTO t_act_records VALUES(‘u1004’, 20210104); INSERT INTO t_act_records VALUES(‘u1004’, 20210105);
— 需求一 计算2021年每个月,每个用户连续登录的最多天数
— 方法一 开窗求差
— 将相同gap的数据聚合,得到最终结果
select
uid,
count(*) as cnt
from (
— 日期和rk作差,得到gap
select
uid,
date_format(imp_date, ‘YY-mm’) as year_month,
imp_date - rk as gap
from (
select
uid,
imp_date,
rank() over(partition by uid, date_format(imp_date, ‘YY-mm’) order by day(imp_date)) as rk
from
t_act_records
) as rk_table
) as gap_table
group by
uid, year_month, gap;
— 方法二 上一行数据下移 select uid, max(imp_date) - min(imp_date) + 1 as cnt from ( select uid, imp_date, sum(if(gap>1, 1, 0)) over (partition by uid, date_format(imp_date, ‘YYYY-mm’) order by imp_date) as flag from ( select uid, imp_date, datediff(imp_date, lagDt) as gap from ( — 上一行数据下移 select uid, imp_date, lag(imp_date, 1, ‘1970-01-01’) over (partition by uid, date_format(imp_date, ‘YYYY-mm’) order by imp_date) as lagDt from t_act_records ) as lag_table ) as gap_table ) as result_table group by uid, flag;
- 计算2021年每个月,连续2天都有登录的用户名单
```sql
-- 在上一题的结果上筛选出cnt >= 2的结果
select
uid,
count(*) as cnt
from (
-- 日期和rk作差,得到gap
select
uid,
date_format(imp_date, 'YY-mm') as `year_month`,
imp_date - rk as gap
from (
select
uid,
imp_date,
rank() over(partition by uid, date_format(imp_date, 'YY-mm') order by day(imp_date)) as rk
from
t_act_records
) as rk_table
) as gap_table
group by
uid, `year_month`, gap
having
cnt >= 2
- 计算2021年每个月,连续5天都有登录的用户数
select
count(*) as cnt
from (
select
uid,
count(*) as cnt
from (
-- 日期和rk作差,得到gap
select
uid,
date_format(imp_date, 'YY-mm') as `year_month`,
imp_date - rk as gap
from (
select
uid,
imp_date,
rank() over(partition by uid, date_format(imp_date, 'YY-mm') order by day(imp_date)) as rk
from
t_act_records
) as rk_table
) as gap_table
group by
uid, `year_month`, gap
) as result_table
where
cnt >= 5;
- 用户购买商品推荐(难度:困难)
- 假设现在需要根据算法给每个
user_id推荐购买商品,推荐算法比较简单,推荐和他相似的用户购买过的product即可,说明如下:- 排除用户自己购买过的商品
- 相似用户定义:曾经购买过 2 种或 2 种以上的相同的商品
-- 类似于求差集
- 假设现在需要根据算法给每个
参考资料
- Wonderful-SQL:https://github.com/datawhalechina/wonderful-sql
