5. SQL题目 - 《MySQL》

参考资料

各部门工资最高的员工（难度：中等）
- 创建Employee 表，包含所有员工信息，每个员工有其对应的 Id, salary 和 department Id
- 创建Department 表，包含公司所有部门的信息
- 找出每个部门工资最高的员工 ```sql — 建表语句 create table Employee ( Id INTEGER PRIMARY KEY, Name VARCHAR(128), Salary BIGINT, DepartmentId INTEGER );

create table Department( Id INTEGER PRIMARY KEY, Name varchar(128) );

— 插入数据 insert into employee value(1, ‘Joe’, 70000, 1); insert into employee value(2, ‘Henry’, 80000, 2); insert into employee value(3, ‘Sam’, 60000, 2); insert into employee value(4, ‘Max’, 90000, 1);

insert into department value(1, ‘IT’); insert into department value (2, ‘Sales’);

— 查询语句 select d.name as Departname, e.Name, Salary from employee e left join department d on e.DepartmentId = d.Id where (e.DepartmentId, Salary) in ( select e.DepartmentId, max(Salary) as Salary from employee e group by DepartmentId ) ORDER BY d.Id;

执行结果如下图所示： <br />![image.png](https://cdn.nlark.com/yuque/0/2022/png/8424773/1656315418029-37325b00-feef-415f-997a-e222b1048b64.png#clientId=u9a7b8d37-6b29-4&crop=0&crop=0&crop=1&crop=1&from=paste&height=75&id=u30dd9bfb&name=image.png&originHeight=94&originWidth=521&originalType=binary&ratio=1&rotation=0&showTitle=false&size=8495&status=done&style=none&taskId=u5ba64d4c-1fc9-49bc-bf0b-cad8631ef70&title=&width=416.8)
-  **换座位（难度：中等）** 
   - 小美是一所中学的信息科技老师，她有一张 seat 座位表，平时用来储存学生名字和与他们相对应的座位id；
   - 其中**纵列的id是连续递增的**；
   - 写出一个语句帮助小美改变**相邻俩个学生的座位**
```sql
-- 建表语句
create table seat(
    id INT PRIMARY KEY AUTO_INCREMENT,
    student VARCHAR(128)
);
-- 插入数据
insert into seat value(1, 'Abbot');
insert into seat value(2, 'Doris');
insert into seat value(3, 'Emerson');
insert into seat value(4, 'Green');
insert into seat value(5, 'Jeams');
-- 查询语句
select
    (case
        -- 当前座位号为奇数且为最后一个则保持不变
        when mod(id, 2) = 1 and id = maxId then id
        -- 当前座位号为奇数且不为最后一个则加一
        when (mod(id, 2) = 1 and id != maxId) or mod(maxId, 2) = 0 then (id + 1)
        -- 偶数情况全部减一
        else id - 1
    end ) as id,
    student
from
    seat, (select count(*) as maxId from seat) cnt
order by id;

执行结果如下：

连续出现的数字（难度：中等）
- 编写一个 SQL 查询，查找所有至少连续出现三次的数字 ```sql — 建表语句 create table number( Id INT PRIMARY KEY, Num INT NOT NULL );

— 插入数据 insert into number value(1, 1); insert into number value(2, 1); insert into number value(3, 1); insert into number value(4, 2); insert into number value(5, 1); insert into number value(6, 2); insert into number value(7, 2);

— 执行查询 select Num, count(*) as cnt from ( — Id 减去 rk 得到的值可以标记连续的元素，相同的gap的元素就是连续的 select Num, Id - rk as gap from ( — 按数字开窗求排名 select Id, Num, rank() over (partition by Num order by id) as rk from number ) as rk_table ) as gap_table group by Num, gap having cnt = 3;

结果如下：

![image.png](https://cdn.nlark.com/yuque/0/2022/png/8424773/1656315451556-b16437ba-6ac8-4ba6-821a-899969d2790c.png#clientId=u9a7b8d37-6b29-4&crop=0&crop=0&crop=1&crop=1&from=paste&height=48&id=u6c719ec7&name=image.png&originHeight=60&originWidth=266&originalType=binary&ratio=1&rotation=0&showTitle=false&size=2441&status=done&style=none&taskId=u5548b945-ba94-4ea8-ae66-dcf59ed648e&title=&width=212.8)

-  **连续出现的数字 II（难度：困难）**（**本题可以扩展到计算连续登录的用户这一场景**） 
   - 在上一题的基础上追加一个条件：**中间隔一个也算连续**；
   - 查找所有至少连续出现三次的数字
```sql
-- 思路一：类似于上一题，两次开窗聚合
select
    Num,
    sum(cnt) as cnt_2
from (
    select
        Num,
        cnt,
        cast(gap as signed) - cast(rk_2 as signed) as gap_2
    from (
        select
            Num,
            gap,
            cnt,
            rank() over(partition by Num order by gap) as rk_2
        from (
            -- 连续数字表
            select
                Num,
                gap,
                count(*) as cnt
            from (
                -- Id 减去 rk 得到的值可以标记连续的元素，相同的gap的元素就是连续的
                select
                    Num,
                    Id - rk as gap
                from (
                    -- 按数字开窗求排名
                    select
                        Id,
                        Num,
                        rank() over (partition by Num order by id) as rk
                    from
                        number
                 ) as rk_table
            ) as gap_table
            group by
                Num, gap
                 ) as continue_1_table
         ) as rk_table_2
    ) as gap_table_2
group by
    Num, gap_2
having
    cnt_2 = 3;

-- 思路二：上一行的ID下移 求差值
select
    Num,
    -- max(id) - min(id) + 1 连续登录场景下用这个方式求解
    count(*) as cnt
from (
    select
    Num,
    Id,
    sum(if(gap > 2, 1, 0)) over(partition by Num ORDER BY Id) as flag
from (
    select
        Num,
        Id,
        Id - LagId as gap
    from (
        -- 通过 lag 函数将上一行数据下移，第一行的上一行默认为0
        select
            Num,
            Id,
            lag(Id, 1, 0) over(partition by Num order by Id) as lagId
        from
            number
        ) as lag_table
    ) as gap_table
) as flag_table
group by
    Num, flag
having
    cnt = 3;

得到的结果如下：

【补充】：

窗口函数Lag用于求当前行的前n个记录的值，其中第一个参数是目标值，第二个参数是前面的记录数量，第三个参数是没有前N个记录对应的默认值。
- 树节点（难度：中等）
写一条查询语句打印节点id及对应的节点类型，按照节点Id排序
每个节点都是以下三种类型中的一种：
- Root: 如果节点是根节点；
- Leaf: 如果节点是叶子节点；
- Inner: 如果节点既不是根节点也不是叶子节点。 ```sql — 建表 create table tree( id INT PRIMARY KEY, p_id INT );

— 插入数据 insert into tree(id) values(1); insert into tree values(2, 1); insert into tree values(3, 1); insert into tree values(4, 2); insert into tree values(5, 2);

— 执行查询 — case when + 子查询 select id, case when p_id is NULL then ‘root’ when p_id is not NULL and id in (select distinct p_id from tree) then ‘inner’ else ‘leaf’ end as type from tree order by id;

结果如下： <br />![image.png](https://cdn.nlark.com/yuque/0/2022/png/8424773/1656315611429-2054cb20-c0cc-4fe3-b245-a72225ef3843.png#clientId=u9a7b8d37-6b29-4&crop=0&crop=0&crop=1&crop=1&from=paste&height=48&id=u16ab7d33&name=image.png&originHeight=60&originWidth=292&originalType=binary&ratio=1&rotation=0&showTitle=false&size=2978&status=done&style=none&taskId=uc5955884-a8d0-4089-a4cb-bbb23b14117&title=&width=233.6)

-  **至少有五名直接下属的经理 （难度：中等）** 
   - Employee表包含所有员工及其上级的信息。每位员工都有一个Id，并且还有一个对应主管的Id（ManagerId）；
   - 针对Employee表，写一条SQL语句找出有5个下属的主管
```sql
-- 建表语句
DROP TABLE if exists Employee;
CREATE TABLE Employee
(
    id         INT,
    name       varchar(20),
    department varchar(20),
    managerid  INT,
    PRIMARY KEY (id)
);

-- 插入数据
INSERT INTO Employee VALUES (101, 'John', 'A', null);
INSERT INTO Employee VALUES (102, 'Dan', 'A', 101);
INSERT INTO Employee VALUES (103, 'James', 'A', 101);
INSERT INTO Employee VALUES (104, 'Amy', 'A', 101);
INSERT INTO Employee VALUES (105, 'Anne', 'A', 101);
INSERT INTO Employee VALUES (106, 'Ron', 'B', 101);

-- 执行查询
select
    Name
from
    Employee
where Id in (
    select
        Managerid
    from
        Employee
    group by
        Managerid
    having
        count(*) >=5
);

查询回答率最高的问题（难度：中等）
- 写一条sql语句找出回答率最高的 question_id； ```sql — 建表语句 DROP TABLE IF EXISTS survey_log; CREATE TABLE survey_log ( uid INT, action VARCHAR (20), question_id INT, answer_id INT, q_num INT, TIMESTAMP INT );

— 插入数据 INSERT INTO survey_log VALUES (5, ‘show’, 285, NULL, 1, 123); INSERT INTO survey_log VALUES (5, ‘answer’, 285, 124124, 1, 124); INSERT INTO survey_log VALUES (5, ‘show’, 369, NULL, 2, 125); INSERT INTO survey_log VALUES (5, ‘skip’, 369, NULL, 2, 126);

— 执行查询 select question_id, sum(if(action = ‘answer’, 1, 0)) / sum(if(action = ‘skip’, 0, 1)) as answer_rate from survey_log group by question_id order by answer_rate desc limit 1;


-  各部门前3高工资的员工（难度：中等） 
   - 编写一个 SQL 查询，找出每个部门工资前三高的员工
```sql
-- 创建表
DROP TABLE if exists employee;
create table Employee (
    Id INTEGER PRIMARY KEY,
    Name VARCHAR(128),
    Salary BIGINT,
    DepartmentId INTEGER
);

-- 插入数据
insert into employee value(1, 'Joe', 70000, 1);
insert into employee value(2, 'Henry', 80000, 2);
insert into employee value(3, 'Sam', 60000, 2);
insert into employee value(4, 'Max', 90000, 1);
INSERT INTO employee value(5, 'Janet', 69000, 1);
INSERT INTO employee value(6, 'Randy', 85000, 1);

-- 执行查询
-- 窗口函数的应用
select
    dp.Name as Department,
    e.Name as Employee,
    Salary
from (
    select
        Name,
        Salary,
        DepartmentId,
        rank() over(partition by DepartmentId order by Salary desc) as rk
    from
        employee
    ) e inner join department dp on dp.Id = e.DepartmentId
where
    rk <= 3
order by
    Department,
    Salary desc;

平面上最近距离 (难度: 困难）
- 写一条查询语句求出表中的点的最短距离并保留2位小数 ```sql — 建表语句 DROP TABLE IF EXISTS point_2d; CREATE TABLE point_2d ( x INT, y INT );

— 插入数据 INSERT INTO point_2d VALUES(-1, -1); INSERT INTO point_2d VALUES( 0, 0); INSERT INTO point_2d VALUES(-1, -2);

— 执行查询 select format(sqrt((x_1 - x_2) (x_1 - x_2) + (y_1 - y_2) (y_1 - y_2)), 2) as dist from ( select A.x as x_1, A.y as y_1, B.x as x_2, B.y as y_2 from point_2d A join point_2d B on (A.x != B.x or A.y != B.y) ) as dist_table order by dist limit 1;

注意这里格式化要用`format()`函数，因为产生的结果如果是整数的话使用`round()`函数则不会生成两个小数位。 

-  **行程和用户（难度：困难）** 
   - Trips 表中存所有出租车的行程信息： 
      - 每段行程有唯一键 Id；
      - Client_Id 和 Driver_Id 是 Users 表中 Users_Id 的外键；
      - Status 是枚举类型，枚举成员为 (‘completed’, ‘cancelled_by_driver’, ‘cancelled_by_client’)。
   - Users 表存所有用户： 
      - 每个用户有唯一键 Users_Id；
      - Banned 表示这个用户是否被禁止；
      - Role 则是一个表示（‘client’, ‘driver’, ‘partner’）的枚举类型。
   - 写一段 SQL 语句查出**2013年10月1日**至**2013年10月3日**期间非禁止用户的取消率，保留两位小数。
```sql
-- 建表语句
DROP TABLE if EXISTS Trips;
CREATE TABLE Trips
(Id INT,
Client_Id INT,
Driver_Id INT,
City_Id INT,
Status VARCHAR(30),
Request_at DATE,
PRIMARY KEY (Id));

DROP TABLE if EXISTS Users ;
CREATE TABLE Users
(Users_Id  INT,
 Banned    VARCHAR(30),
 Role      VARCHAR(30),
PRIMARY KEY (Users_Id));

-- 插入数据
INSERT INTO Trips VALUES (1, 1, 10, 1, 'completed', '2013-10-1');
INSERT INTO Trips VALUES (2, 2, 11, 1, 'cancelled_by_driver', '2013-10-1');
INSERT INTO Trips VALUES (3, 3, 12, 6, 'completed', '2013-10-1');
INSERT INTO Trips VALUES (4, 4, 13, 6, 'cancelled_by_client', '2013-10-1');
INSERT INTO Trips VALUES (5, 1, 10, 1, 'completed', '2013-10-2');
INSERT INTO Trips VALUES (6, 2, 11, 6, 'completed', '2013-10-2');
INSERT INTO Trips VALUES (7, 3, 12, 6, 'completed', '2013-10-2');
INSERT INTO Trips VALUES (8, 2, 12, 12, 'completed', '2013-10-3');
INSERT INTO Trips VALUES (9, 3, 10, 12, 'completed', '2013-10-3');
INSERT INTO Trips VALUES (10, 4, 13, 12, 'cancelled_by_driver', '2013-10-3');

INSERT INTO Users VALUES (1,    'No',  'client');
INSERT INTO Users VALUES (2,    'Yes', 'client');
INSERT INTO Users VALUES (3,    'No',  'client');
INSERT INTO Users VALUES (4,    'No',  'client');
INSERT INTO Users VALUES (10,   'No',  'driver');
INSERT INTO Users VALUES (11,   'No',  'driver');
INSERT INTO Users VALUES (12,   'No',  'driver');
INSERT INTO Users VALUES (13,   'No',  'driver');

-- 执行查询
select
    Request_at as Day,
    format(sum(if(Status = 'completed', 0, 1))/count(*),2) as "Cancellation Rate"
from (
    -- 找出非禁止用户的订单
    select
        Id,
        Client_Id,
        Status,
        Request_at
    from trips t left join (
        select
            Users_Id
        from
            users
        where
            Role = 'client' and Banned = 'No'
        ) usr on t.Client_Id = usr.Users_Id
) as usr_trips
group by
    Request_at
order by
    Request_at;

谁是明星带货主播（难度：中等）
- 如果某主播的某日销售额占比达到该平台当日销售总额的 90% 及以上，则称该主播为明星主播，当天也称为明星主播日；
- 请使用 SQL 完成如下计算：
  - 2021年有多少个明星主播日？ ```sql — 建表语句 DROP TABLE if EXISTS anchor_sales; CREATE TABLE anchor_sales (anchor_name VARCHAR(20), date INT, sales INT);

— 插入数据 INSERT INTO anchor_sales VALUES(‘A’, 20210101, 40000); INSERT INTO anchor_sales VALUES(‘B’, 20210101, 80000); INSERT INTO anchor_sales VALUES(‘A’, 20210102, 10000); INSERT INTO anchor_sales VALUES(‘C’, 20210102, 90000); INSERT INTO anchor_sales VALUES(‘A’, 20210103, 7500); INSERT INTO anchor_sales VALUES(‘C’, 20210103, 80000);

— 执行代码 select distinct date from ( select anchor_name, date, sales / ( select sum(sales) from anchor_sales as2 where as1.date = as2.date group by as2.date ) as sales_rate from anchor_sales as1 where year(date) = ‘2021’ ) as sales_table where sales_rate >= 0.9;


      -  2021年有多少个明星主播？ 
```sql
select
    distinct anchor_name
from (
    select
        anchor_name,
        sales / (
            select
                sum(sales)
            from
                anchor_sales as2
            where
                as1.date = as2.date
            group by
                as2.date
            ) as sales_rate
    from
        anchor_sales as1
    where
        year(date) = '2021'
) as sales_table
where
    sales_rate >= 0.9;

列转行应用 —— 求考试成绩（难度：中等）

假设 A B C 三位小朋友期末考试成绩如下所示：

+-----+-----------+------|
| name|   subject |score |
+-----+-----------+------|
|  A  |  chinese  |  99  |
|  A  |  math     |  98  |
|  A  |  english  |  97  |
|  B  |  chinese  |  92  |
|  B  |  math     |  91  |
|  B  |  english  |  90  |
|  C  |  chinese  |  88  |
|  C  |  math     |  87  |
|  C  |  english  |  86  |
+-----+-----------+------|

使用SQL语句将以上乘积转换为如下形式：

+-----+-----------+------|---------|
| name|   chinese | math | english |
+-----+-----------+------|---------|
|  A  |     99    |  98  |    97   |
|  B  |     92    |  91  |    90   |
|  C  |     88    |  87  |    86   |
+-----+-----------+------|---------|

-- 建表语句
DROP TABLE IF EXISTS `score`;
CREATE TABLE `score` (
  `name` varchar(20) DEFAULT NULL,
  `subject` varchar(20) DEFAULT NULL,
  `score` int DEFAULT NULL
);

-- 插入数据
INSERT INTO score VALUES('A', 'chinese', 99);
INSERT INTO score VALUES('A', 'math',    98);
INSERT INTO score VALUES('A', 'english', 97);
INSERT INTO score VALUES('B', 'chinese', 92);
INSERT INTO score VALUES('B', 'math',    91);
INSERT INTO score VALUES('B', 'english', 90);
INSERT INTO score VALUES('C', 'chinese', 88);
INSERT INTO score VALUES('C', 'math',    87);
INSERT INTO score VALUES('C', 'english', 86);

-- 执行查询
-- 列转行用 UNION
select
    name,
    'chinese' as subject,
    score
from
    score
where
    subject = 'chinese'
union
select
    name,
    'math' as subject,
    score
from
    score
where
    subject = 'math'
union
select
    name,
    'english' as subject,
    score
from
    score
where
    subject = 'english'
order by name;

列转行应用 —— 求比赛结果（难度：困难，含有聚合函数的列转行）

假设有如下比赛结果：

+--------------+-----+-----|
|  比赛日期     | 胜  | 负  |
+--------------+-----------+
|  2021-01-01  |  2  |  1  |
|  2021-01-03  |  1  |  2  |
+--------------+-----------+

请使用 SQL 将比赛结果转换为如下形式：

+--------------+-----------+
|    cdate     |   result  |
+--------------+-----------+
|  2021-01-01  |     胜    |
|  2021-01-01  |     胜    |
|  2021-01-01  |     负    |
|  2021-01-03  |     胜    |
|  2021-01-03  |     负    |
|  2021-01-03  |     负    |
+------------+-------------+

-- 建表
DROP TABLE IF EXISTS col_row;
create table col_row 
(`比赛日期` date
,`胜` int
,`负` int);

-- 插入数据
insert into col_row values ('2021-01-01', 2, 1);
insert into col_row values ('2021-01-03', 1, 2);

-- 执行查询
-- 含有聚合函数的列转行
select
    cdate,
    result
from (
    select
        *
    from (
         select
            `比赛日期` as cdate,
            '胜' as result,
            `胜` as cnt
         from
            col_row
         union
         select
            `比赛日期` as cdate,
            '负' as result,
            `负` as cnt
         from
            col_row
    ) table_1
    union
    select
        *
    from (
        select
            `比赛日期` as cdate,
            '胜' as result,
            `胜` - 1 as cnt
        from
            col_row
        union
        select
            `比赛日期` as cdate,
            '负' as result,
            `负` - 1 as cnt
        from
            col_row
    ) table_2
    where
        cnt > 0
) as result_table
order by
    cdate, result;

【补充】：

本题是针对最大胜场和最大负场均为2的情况，如果有有更多的最大胜负情况则需要按最大情况的值的数量的表进行UNION
- 行转列应用 —— 求考试成绩（难度：中等）
需求和列转行对应的题目相反 ``sql -- 建表语句 DROP TABLE IF EXISTSscore2; CREATE TABLEscore2(namevarchar(20) DEFAULT NULL,subjectvarchar(20) DEFAULT NULL,score` int DEFAULT NULL );

— 插入数据 INSERT INTO score2 VALUES(‘A’, ‘chinese’, 99); INSERT INTO score2 VALUES(‘A’, ‘math’, 98); INSERT INTO score2 VALUES(‘A’, ‘english’, 97); INSERT INTO score2 VALUES(‘B’, ‘chinese’, 92); INSERT INTO score2 VALUES(‘B’, ‘math’, 91); INSERT INTO score2 VALUES(‘B’, ‘english’, 90); INSERT INTO score2 VALUES(‘C’, ‘chinese’, 88); INSERT INTO score2 VALUES(‘C’, ‘math’, 87); INSERT INTO score2 VALUES(‘C’, ‘english’, 86);

— 执行查询 — 行转列用 case when select name, max(if(subject=’chinese’, score, 0)) as chinese, max(if(subject=’english’, score, 0)) as english, max(if(subject=’math’, score, 0)) as math from score2 group by name;


-  **行转列应用 —— 求比赛结果（难度：中等）** 
   - 需求和列转行对应的题目相反
```sql
-- 建表
DROP TABLE IF EXISTS row_col;
create table row_col
(cdate date,
result varchar(255));

-- 插入数据
insert into row_col values ('2021-01-01','胜');
insert into row_col values ('2021-01-01','负');
insert into row_col values ('2021-01-03','胜');
insert into row_col values ('2021-01-03','负');
insert into row_col values ('2021-01-01','胜');
insert into row_col values ('2021-01-03','负');

-- 执行查询
select
    cdate as `比赛日期`,
    sum(if(result='胜', 1, 0)) as `胜`,
    sum(if(result='负', 1, 0)) as `负`
from
    row_col
group by
    cdate
order by
    cdate;

连续登录（难度：困难）
- 有用户表行为记录表t_act_records表，包含两个字段：uid（用户ID），imp_date（日期）：
  - 计算2021年每个月，每个用户连续登录的最多天数 ```sql — 建表语句 DROP TABLE if EXISTS t_act_records; CREATE TABLE t_act_records (uid VARCHAR(20), imp_date DATE);

— 插入数据 INSERT INTO t_act_records VALUES(‘u1001’, 20210101); INSERT INTO t_act_records VALUES(‘u1002’, 20210101); INSERT INTO t_act_records VALUES(‘u1003’, 20210101); INSERT INTO t_act_records VALUES(‘u1003’, 20210102); INSERT INTO t_act_records VALUES(‘u1004’, 20210101); INSERT INTO t_act_records VALUES(‘u1004’, 20210102); INSERT INTO t_act_records VALUES(‘u1004’, 20210103); INSERT INTO t_act_records VALUES(‘u1004’, 20210104); INSERT INTO t_act_records VALUES(‘u1004’, 20210105);

— 需求一计算2021年每个月，每个用户连续登录的最多天数 — 方法一开窗求差 — 将相同gap的数据聚合，得到最终结果 select uid, count(*) as cnt from ( — 日期和rk作差，得到gap select uid, date_format(imp_date, ‘YY-mm’) as year_month, imp_date - rk as gap from ( select uid, imp_date, rank() over(partition by uid, date_format(imp_date, ‘YY-mm’) order by day(imp_date)) as rk from t_act_records ) as rk_table ) as gap_table group by uid, year_month, gap;

— 方法二上一行数据下移 select uid, max(imp_date) - min(imp_date) + 1 as cnt from ( select uid, imp_date, sum(if(gap>1, 1, 0)) over (partition by uid, date_format(imp_date, ‘YYYY-mm’) order by imp_date) as flag from ( select uid, imp_date, datediff(imp_date, lagDt) as gap from ( — 上一行数据下移 select uid, imp_date, lag(imp_date, 1, ‘1970-01-01’) over (partition by uid, date_format(imp_date, ‘YYYY-mm’) order by imp_date) as lagDt from t_act_records ) as lag_table ) as gap_table ) as result_table group by uid, flag;



      -  计算2021年每个月，连续2天都有登录的用户名单 
```sql
-- 在上一题的结果上筛选出cnt >= 2的结果
select
    uid,
    count(*) as cnt
from (
    -- 日期和rk作差，得到gap
    select
        uid,
        date_format(imp_date, 'YY-mm') as `year_month`,
        imp_date - rk as gap
    from (
        select
            uid,
            imp_date,
            rank() over(partition by uid, date_format(imp_date, 'YY-mm') order by day(imp_date)) as rk
        from
            t_act_records
    ) as rk_table
) as gap_table
group by
    uid, `year_month`, gap
having
    cnt >= 2

  -  计算2021年每个月，连续5天都有登录的用户数

select
    count(*) as cnt
from (
    select
        uid,
        count(*) as cnt
    from (
        -- 日期和rk作差，得到gap
        select
            uid,
            date_format(imp_date, 'YY-mm') as `year_month`,
            imp_date - rk as gap
        from (
            select
                uid,
                imp_date,
                rank() over(partition by uid, date_format(imp_date, 'YY-mm') order by day(imp_date)) as rk
            from
                t_act_records
        ) as rk_table
    ) as gap_table
    group by
        uid, `year_month`, gap
) as result_table
where
    cnt >= 5;

用户购买商品推荐（难度：困难）
- 假设现在需要根据算法给每个 user_id 推荐购买商品，推荐算法比较简单，推荐和他相似的用户购买过的 product 即可，说明如下：
  - 排除用户自己购买过的商品
  - 相似用户定义：曾经购买过 2 种或 2 种以上的相同的商品
```
-- 类似于求差集
```

参考资料

Wonderful-SQL：https://github.com/datawhalechina/wonderful-sql