1. 组合两个表
需求:编写一个 SQL 查询,对两表进行关联,展示列为:
FirstName, LastName, City, State
展示效果:
| FirstName | LastName | City | State |
|---|---|---|---|
| Allen | Wang | New York City | New York |
Create table If Not Exists 1_Person (PersonId int, FirstName varchar(255), LastName varchar(255));Create table If Not Exists 1_Address (AddressId int, PersonId int, City varchar(255), State varchar(255));Truncate table 1_Person;Truncate table 1_Address;insert into 1_Person (PersonId, LastName, FirstName) values (1, 'Wang', 'Allen');insert into 1_Address (AddressId, PersonId, City, State) values (1, 1, 'New York City', 'New York');
最终SQL:
selectp.FirstName,p.LastName,a.City,a.Statefrom1_Person as pleft join1_Address as aonp.PersonId = a.PersonId;
2. 第二高的薪水
需求一:编写一个 SQL 查询,获取 Employee 表中第二高的薪水(Salary)。如果不存在第二高的薪水,那么查询应返回 null。
展示效果:
| SecondHighestSalary |
|---|
| 200 |
建表语句:
Create table If Not Exists 2_Employee (Id int, Salary int);Truncate table 2_Employee;insert into 2_Employee (Id, Salary) values (1, 100);insert into 2_Employee (Id, Salary) values (2, 200);insert into 2_Employee (Id, Salary) values (3, 300);
最终SQL:
-- 方法一:selectIFNULL((selectDISTINCT Salaryfrom2_Employeeorder bySalary DESClimit 1,1), NULL) as SecondHighestSalary;-- 方法二:selectmax(Salary) as SecondHighestSalaryfrom2_EmployeewhereSalary < (selectmax(Salary)from2_Employee);-- 方法三:selectmax(e1.salary) as SecondHighestSalaryfrom2_Employee e1,2_Employee e2group bye1.idhavingsum(if(e1.salary > e2.salary,1,0)) = 1;
提示:LIMIT 子句可以被用于强制 SELECT 语句返回指定的记录数。LIMIT 接受一个或两个数字参数。参数必须是一个整数常量。如果给定两个参数,第一个参数指定第一个返回记录行的偏移量,第二个参数指定返回记录行的最大数目。
需求二:编写一个 SQL 查询,获取 Employee 表中第 n 高的薪水(Salary)。
-- 方法一:CREATE FUNCTION getNthHighestSalary_1(N INT) RETURNS INTBEGINSET n = N-1;RETURN (SELECT DISTINCT Salary FROM 2_Employee ORDER BY Salary DESC LIMIT n,1);END;select getNthHighestSalary_1(2) ;-- 方案二:CREATE FUNCTION getNthHighestSalary_2(N INT) RETURNS INTBEGINRETURN (SELECT IF(count<N,NULL,min)FROM(SELECTMIN(Salary) AS min, COUNT(1) AS countFROM(SELECTDISTINCT SalaryFROM2_EmployeeORDER BYSalary DESCLIMIT N) AS a) as b);END;select getNthHighestSalary_2(2) ;
3. 分数排名
需求:编写一个 SQL 查询来实现分数排名。如果两个分数相同,则两个分数排名(Rank)相同。请注意,平分后的下一个名次应该是下一个连续的整数值。换句话说,名次之间不应该有“间隔”。
展示效果:
| Score | Rank |
|---|---|
| 4.00 | 1 |
| 4.00 | 1 |
| 3.85 | 2 |
| 3.65 | 3 |
| 3.65 | 3 |
| 3.50 | 4 |
Create table If Not Exists 3_Scores (Id int, Score DECIMAL(3,2));Truncate table 3_Scores;insert into 3_Scores (Id, Score) values (1, 3.5);insert into 3_Scores (Id, Score) values (2, 3.65);insert into 3_Scores (Id, Score) values (3, 4.0);insert into 3_Scores (Id, Score) values (4, 3.85);insert into 3_Scores (Id, Score) values (5, 4.0);insert into 3_Scores (Id, Score) values (6, 3.65);
最终SQL:
-- 方法一:selecta.Score as score ,(selectcount(distinct b.Score)from3_Scores bwhereb.Score >=a.Score) as `rank`from3_Scores aorder byScore DESC;-- 方法二:selectScore,dense_rank() over(order by Score desc) `rank`from3_Scores;
4. 连续出现的数字
需求:编写一个 SQL 查询,查找所有至少连续出现三次的数字。
展示效果:
| ConsecutiveNums |
|---|
| 1 |
Create table If Not Exists 4_Logs (Id int, Num int);Truncate table 4_Logs;insert into 4_Logs (Id, Num) values (1, 1);insert into 4_Logs (Id, Num) values (2, 1);insert into 4_Logs (Id, Num) values (3, 1);insert into 4_Logs (Id, Num) values (4, 2);insert into 4_Logs (Id, Num) values (5, 1);insert into 4_Logs (Id, Num) values (6, 2);insert into 4_Logs (Id, Num) values (7, 2);
最终SQL:
-- 方法一:SELECTl1.NumFROM4_Logs l1,4_Logs l2,4_Logs l3WHEREl1.Id = l2.Id - 1 AND l1.Num = l2.NumAND l2.Id = l3.Id - 1 AND l2.Num = l3.Num;-- 方法二:SELECTl1.NumFROM4_Logs l1left join4_Logs l2onl1.Id = l2.Id - 1left join4_Logs l3onl2.Id = l3.Id - 1wherel1.num = l2.num and l2.num = l3.num;-- 方法三:select distinct Num ConsecutiveNumsfrom(selectNum,lead(Num,1,null) over(order by id) n2,lead(Num,2,null) over(order by id) n3from 4_Logs)t1where Num = n2 and Num = n3;
5. 超过经理收入的员工
需求:Employee 表包含所有员工,他们的经理也属于员工。每个员工都有一个 Id,此外还有一列对应员工的经理的 Id。编写一个 SQL 查询,该查询可以获取收入超过他们经理的员工的姓名。
数据样式:
| Id | Name | Salary | ManagerId |
|---|---|---|---|
| 1 | Joe | 70000 | 3 |
| 2 | Henry | 80000 | 4 |
| 3 | Sam | 60000 | null |
| 4 | Max | 90000 | null |
展示效果:
| Employee |
|---|
| Joe |
create table If Not Exists 5_Employee (Id int, Name varchar(255), Salary int, ManagerId int);truncate table 5_Employee;insert into 5_Employee (Id, Name, Salary, ManagerId) values (1, 'Joe', 70000, 3);insert into 5_Employee (Id, Name, Salary, ManagerId) values (2, 'Henry', 80000, 4);insert into 5_Employee (Id, Name, Salary, ManagerId) values (3, 'Sam', 60000, null);insert into 5_Employee (Id, Name, Salary, ManagerId) values (4, 'Max', 90000, null);
最终SQL:
SELECTa.NAME AS EmployeeFROM5_Employee AS aJOIN5_Employee AS bONa.ManagerId = b.IdANDa.Salary > b.Salary;
6. 查找重复的邮箱
需求:编写一个 SQL 查询,查找 Person 表中所有重复的电子邮箱。
展示效果:
| a@b.com |
Create table If Not Exists 6_Person (Id int, Email varchar(255));Truncate table 6_Person;insert into 6_Person (Id, Email) values (1, 'a@b.com');insert into 6_Person (Id, Email) values (2, 'c@d.com');insert into 6_Person (Id, Email) values (3, 'a@b.com');
最终SQL:
selectfrom6_Persongroup byhavingcount(Email) > 1;
7. 从不订购的客户
需求:某网站包含两个表,Customers 表和 Orders 表。编写一个 SQL 查询,找出所有从不订购任何东西的客户。
展示效果:
| Customers |
|---|
| Henry |
| Max |
Create table If Not Exists 7_Customers (Id int, Name varchar(255));Create table If Not Exists 7_Orders (Id int, CustomerId int);Truncate table 7_Customers;insert into 7_Customers (Id, Name) values (1, 'Joe');insert into 7_Customers (Id, Name) values (2, 'Henry');insert into 7_Customers (Id, Name) values (3, 'Sam');insert into 7_Customers (Id, Name) values (4, 'Max');Truncate table 7_Orders;insert into 7_Orders (Id, CustomerId) values (1, 3);insert into 7_Orders (Id, CustomerId) values (2, 1);
最终SQL:
-- 方法一:selectc.name as 'Customers'from7_Customers as cwherec.id not in(select customerid from 7_Orders);-- 方法二:selectc.Name Customersfrom7_Customers cleft join7_Orders oonc.id = o.CustomerIdwhereo.id is null;
8. 部门工资最高的员工
需求一:编写一个 SQL 查询,找出每个部门工资最高的员工。例如,根据上述给定的表格,Max 在 IT 部门有最高工资,Henry 在 Sales 部门有最高工资。
展示效果:
| Department | Employee | Salary |
|---|---|---|
| IT | Jim | 90000 |
| IT | Max | 90000 |
| Sales | Henry | 80000 |
Create table If Not Exists 8_Employee (Id int, Name varchar(255), Salary int, DepartmentId int);Create table If Not Exists 8_Department (Id int, Name varchar(255));Truncate table 8_Employee;insert into 8_Employee (Id, Name, Salary, DepartmentId) values (1, 'Joe', 75000, 1);insert into 8_Employee (Id, Name, Salary, DepartmentId) values (2, 'Jim', 90000, 1);insert into 8_Employee (Id, Name, Salary, DepartmentId) values (3, 'Henry', 80000, 2);insert into 8_Employee (Id, Name, Salary, DepartmentId) values (4, 'Sam', 60000, 2);insert into 8_Employee (Id, Name, Salary, DepartmentId) values (5, 'Max', 90000, 1);insert into 8_Employee (Id, Name, Salary, DepartmentId) values (6, 'Randy', 85000, 1);insert into 8_Employee (Id, Name, Salary, DepartmentId) values (7, 'Will', 70000, 1);Truncate table 8_Department;insert into 8_Department (Id, Name) values (1, 'IT');insert into 8_Department (Id, Name) values (2, 'Sales');
最终SQL:
-- 方法一:SELECTd.name AS 'Department',e.name AS 'Employee',SalaryFROM8_Employee as eJOIN8_Department as dONe.DepartmentId = d.IdWHERE(e.DepartmentId , Salary) IN( SELECTDepartmentId, MAX(Salary)FROM8_EmployeeGROUP BY DepartmentId);-- 方法二:selectDepartment,Employee,Salaryfrom(selectd.Name Department,e.Name Employee,e.Salary,rank() over(partition by d.id order by Salary desc) rkfrom8_Employee ejoin8_Department don e.DepartmentId=d.id)tmpwhere rk = 1
需求二:编写一个 SQL 查询,找出每个部门获得前三高工资的所有员工。
展示效果:
| Department | Employee | Salary |
|---|---|---|
| IT | Max | 90000 |
| IT | Jim | 90000 |
| IT | Randy | 85000 |
| IT | Joe | 75000 |
| Sales | Henry | 80000 |
| Sales | Sam | 60000 |
最终SQL:
-- 方法一:SELECTd.Name AS 'Department', e1.Name AS 'Employee', e1.SalaryFROM8_Employee e1JOIN8_Department dONe1.DepartmentId = d.IdWHERE(SELECTCOUNT(DISTINCT e2.Salary)FROM8_Employee e2WHEREe1.Salary < e2.SalaryANDe1.DepartmentId = e2.DepartmentId) < 3order by Department, e1.salary desc;-- 方法二:selectDepartment,Employee,Salaryfrom(selectd.Name Department,e.Name Employee,e.Salary,dense_rank() over(partition by d.id order by Salary desc) rkfrom8_Employee ejoin8_Department done.DepartmentId=d.id)tmpwhere rk <=3
9. 删除重复的电子邮箱
需求:编写一个 SQL 查询,来删除 Person 表中所有重复的电子邮箱,重复的邮箱里只保留 Id 最小 的那个。
展示效果:
| Id | |
|---|---|
| 1 | john@example.com |
| 2 | bob@example.com |
Create table If Not Exists 9_Person (Id int, email varchar(255));Truncate table 9_Person;insert into 9_Person (Id, email) values (1, 'john@example.com');insert into 9_Person (Id, email) values (2, 'bob@example.com');insert into 9_Person (Id, email) values (3, 'john@example.com');
最终SQL:
DELETEp1FROM9_Person p1,9_Person p2WHEREp1.Email = p2.Email AND p1.Id > p2.Id;
10. 上升的温度
需求:编写一个 SQL 查询,来查找与之前(昨天的)日期相比温度更高的所有日期的 Id。
| Id |
|---|
| 2 |
| 4 |
Create table If Not Exists 10_Weather (Id int, RecordDate date, Temperature int);Truncate table 10_Weather;insert into 10_Weather (Id, RecordDate, Temperature) values (1, '2015-01-01', 10);insert into 10_Weather (Id, RecordDate, Temperature) values (2, '2015-01-02', 25);insert into 10_Weather (Id, RecordDate, Temperature) values (3, '2015-01-03', 20);insert into 10_Weather (Id, RecordDate, Temperature) values (4, '2015-01-04', 30);
最终SQL:
-- 方法一:SELECTw1.id AS 'Id'FROM10_Weather w1JOIN10_Weather w2ONDATEDIFF(w1.RecordDate, w2.RecordDate) = 1AND w1.Temperature > w2.Temperature;-- 方法二:selectIdfrom(selectId,RecordDate,Temperature,lag(RecordDate,1,9999-99-99) over (order by RecordDate) yd,lag(Temperature,1,999) over(order by RecordDate ) ytfrom10_Weather)tmpwhereTemperature > yt and datediff(RecordDate,yd)=1;
11. 行程和用户
需求:写一段 SQL 语句查出 2019年10月1日 至 2019年10月3日 期间非禁止用户的取消率。基于上表,你的 SQL 语句应返回如下结果,取消率(Cancellation Rate)保留两位小数。取消率的计算方式如下:(被司机或乘客取消的非禁止用户生成的订单数量) / (非禁止用户生成的订单总数)
Trips表:所有出租车的行程信息。每段行程有唯一键 Id,Client_Id 和 Driver_Id 是 Users 表中 Users_Id 的外键。Status 是枚举类型,枚举成员为 (‘completed’, ‘cancelled_by_driver’, ‘cancelled_by_client’)。
| Id | Client_Id | Driver_Id | City_Id | Status | Request_at |
|---|---|---|---|---|---|
| 1 | 1 | 10 | 1 | completed | 2019-10-01 |
| 2 | 2 | 11 | 1 | cancelled_by_driver | 2019-10-01 |
| 3 | 3 | 12 | 6 | completed | 2019-10-01 |
| 4 | 4 | 13 | 6 | cancelled_by_client | 2019-10-01 |
| 5 | 1 | 10 | 1 | completed | 2019-10-02 |
| 6 | 2 | 11 | 6 | completed | 2019-10-02 |
| 7 | 3 | 12 | 6 | completed | 2019-10-02 |
| 8 | 2 | 12 | 12 | completed | 2019-10-03 |
| 9 | 3 | 10 | 12 | completed | 2019-10-03 |
| 10 | 4 | 13 | 12 | cancelled_by_driver | 2019-10-03 |
Users 表存所有用户。每个用户有唯一键 Users_Id。Banned 表示这个用户是否被禁止,Role 则是一个表示(‘client’, ‘driver’, ‘partner’)的枚举类型。
| Users_Id | Banned | Role |
|---|---|---|
| 1 | No | client |
| 2 | Yes | client |
| 3 | No | client |
| 4 | No | client |
| 10 | No | driver |
| 11 | No | driver |
| 12 | No | driver |
| 13 | No | driver |
展示效果:
| Day | Cancellation Rate |
|---|---|
| 2019-10-01 | 0.33 |
| 2019-10-02 | 0.00 |
| 2019-10-03 | 0.50 |
Create table If Not Exists 11_Trips (Id int, Client_Id int, Driver_Id int, City_Id int, Status ENUM('completed', 'cancelled_by_driver', 'cancelled_by_client'), Request_at varchar(50));Create table If Not Exists 11_Users (Users_Id int, Banned varchar(50), Role ENUM('client', 'driver', 'partner'));Truncate table 11_Trips;insert into 11_Trips (Id, Client_Id, Driver_Id, City_Id, Status, Request_at) values (1, 1, 10, 1, 'completed', '2019-10-01');insert into 11_Trips (Id, Client_Id, Driver_Id, City_Id, Status, Request_at) values (2, 2, 11, 1, 'cancelled_by_driver', '2019-10-01');insert into 11_Trips (Id, Client_Id, Driver_Id, City_Id, Status, Request_at) values (3, 3, 12, 6, 'completed', '2019-10-01');insert into 11_Trips (Id, Client_Id, Driver_Id, City_Id, Status, Request_at) values (4, 4, 13, 6, 'cancelled_by_client', '2019-10-01');insert into 11_Trips (Id, Client_Id, Driver_Id, City_Id, Status, Request_at) values (5, 1, 10, 1, 'completed', '2019-10-02');insert into 11_Trips (Id, Client_Id, Driver_Id, City_Id, Status, Request_at) values (6, 2, 11, 6, 'completed', '2019-10-02');insert into 11_Trips (Id, Client_Id, Driver_Id, City_Id, Status, Request_at) values (7, 3, 12, 6, 'completed', '2019-10-02');insert into 11_Trips (Id, Client_Id, Driver_Id, City_Id, Status, Request_at) values (8, 2, 12, 12, 'completed', '2019-10-03');insert into 11_Trips (Id, Client_Id, Driver_Id, City_Id, Status, Request_at) values (9, 3, 10, 12, 'completed', '2019-10-03');insert into 11_Trips (Id, Client_Id, Driver_Id, City_Id, Status, Request_at) values (10, 4, 13, 12, 'cancelled_by_driver', '2019-10-03');Truncate table 11_Users;insert into 11_Users (Users_Id, Banned, Role) values (1, 'No', 'client');insert into 11_Users (Users_Id, Banned, Role) values (2, 'Yes', 'client');insert into 11_Users (Users_Id, Banned, Role) values (3, 'No', 'client');insert into 11_Users (Users_Id, Banned, Role) values (4, 'No', 'client');insert into 11_Users (Users_Id, Banned, Role) values (10, 'No', 'driver');insert into 11_Users (Users_Id, Banned, Role) values (11, 'No', 'driver');insert into 11_Users (Users_Id, Banned, Role) values (12, 'No', 'driver');insert into 11_Users (Users_Id, Banned, Role) values (13, 'No', 'driver');
最终SQL:
方法一:SELECTT.request_at AS `Day`,ROUND(SUM(IF(T.STATUS = 'completed',0,1))/ COUNT(T.STATUS),2) AS `Cancellation Rate`FROM11_Trips AS TJOIN11_Users AS U1ONT.client_id = U1.users_id AND U1.banned ='No'WHERET.request_at BETWEEN '2019-10-01' AND '2019-10-03'GROUP BYT.request_at;
12. 游戏玩法分析
需求一:写一条 SQL 查询语句获取每位玩家 第一次登陆平台的日期。
Activity表:显示了某些游戏的玩家的活动情况。
| player_id | device_id | event_date | games_played |
|---|---|---|---|
| 1 | 2 | 2016-03-01 | 5 |
| 1 | 2 | 2016-03-02 | 6 |
| 2 | 3 | 2017-06-25 | 1 |
| 3 | 1 | 2016-03-01 | 0 |
| 3 | 4 | 2018-07-03 | 5 |
展示效果:
| player_id | first_login |
|---|---|
| 1 | 2016-03-01 |
| 2 | 2017-06-25 |
| 3 | 2016-03-02 |
Create table If Not Exists 12_Activity (player_id int, device_id int, event_date date, games_played int);Truncate table 12_Activity;insert into 12_Activity (player_id, device_id, event_date, games_played) values (1, 2, '2016-03-01', 5);insert into 12_Activity (player_id, device_id, event_date, games_played) values (1, 2, '2016-03-02', 6);insert into 12_Activity (player_id, device_id, event_date, games_played) values (2, 3, '2017-06-25', 1);insert into 12_Activity (player_id, device_id, event_date, games_played) values (3, 1, '2016-03-01', 0);insert into 12_Activity (player_id, device_id, event_date, games_played) values (3, 4, '2018-07-03', 5);
最终SQL:
selectplayer_id,min(event_date) as first_loginfrom12_Activitygroup byplayer_id;
需求二:描述每一个玩家首次登陆的设备名称
| player_id | device_id |
|---|---|
| 1 | 2 |
| 2 | 3 |
| 3 | 1 |
最终SQL:
-- 方法一:selectplayer_id,device_idfrom12_Activitywhere(player_id,event_date) in (selectplayer_id,min(event_date)from12_Activitygroup byplayer_id);-- 方法二:selectplayer_id,device_idfrom(selectplayer_id,event_date,device_id,rank() over(partition by player_id order by event_date) rkfrom12_Activity) tmpwhere rk = 1;
需求三:编写一个 SQL 查询,同时报告每组玩家和日期,以及玩家到目前为止玩了多少游戏。也就是说,在此日期之前玩家所玩的游戏总数。详细情况请查看示例。
| player_id | event_date | games_played_so_far |
|---|---|---|
| 1 | 2016-03-01 | 5 |
| 1 | 2016-05-02 | 11 |
| 2 | 2017-06-25 | 1 |
| 3 | 2016-03-02 | 0 |
| 3 | 2018-07-03 | 5 |
提示:提示:对于 ID 为 1 的玩家,2016-05-02 共玩了 5+6=11 个游戏.
对于 ID 为 3 的玩家,2018-07-03 共玩了 0+5=5 个游戏。
最终SQL:
-- 方法一:SELECTA.player_id,A.event_date,SUM(B.games_played) AS `games_played_so_far`FROM12_Activity AS Aleft JOIN12_Activity AS BONA.player_id = B.player_idAND A.event_date >= B.event_dateGROUP BYA.player_id,A.event_date;-- 方法二:selectplayer_id,event_date ,sum(games_played) over(partition by player_id order by event_date )games_played_so_farfrom 12_Activity;
需求四:编写一个 SQL 查询,报告在首次登录的第二天再次登录的玩家的百分比,四舍五入到小数点后两位。换句话说,您需要计算从首次登录日期开始至少连续两天登录的玩家的数量,然后除以玩家总数。
| fraction |
|---|
| 0.33 |
最终SQL:
-- 方法一:selectround(sum(case when datediff(a.event_date,b.first_date)=1 then 1 else 0 end)/(select count(distinct(player_id)) from 12_Activity),2 ) as fractionfrom12_Activity a,(selectplayer_id,min(event_date) first_datefrom12_Activitygroup byplayer_id) bwherea.player_id=b.player_id;-- 方法二:selectround(avg(a.event_date is not null), 2) fractionfrom(selectplayer_id,min(event_date) as first_datefrom12_Activitygroup byplayer_id) bleft join12_Activity aonb.player_id=a.player_id and datediff(a.event_date, b.first_date)=1
需求五:编写一个 SQL 查询,报告每个安装日期、当天安装游戏的玩家数量和第一天的保留时间。
| install_dt | installs | Day1_retention |
|---|---|---|
| 2016-03-01 | 2 | 0.50 |
| 2017-06-25 | 1 | 0.00 |
提示:玩家 1 和 3 在 2016-03-01 安装了游戏,但只有玩家 1 在 2016-03-02 重新登录,所以 2016-03-01 的第一天保留时间是 1/2=0.50
玩家 2 在 2017-06-25 安装了游戏,但在 2017-06-26 没有重新登录,因此 2017-06-25 的第一天保留为 0/1=0.00
最终SQL:
-- 方法一SELECTA.event_date AS install_dt,COUNT(A.player_id) AS installs,round(COUNT(C.player_id)/COUNT(A.player_id),2) AS Day1_retentionFROM12_Activity AS Aleft JOIN12_Activity AS BONA.player_id = B.player_id AND A.event_date > B.event_dateleft JOIN12_Activity AS CONA.player_id = C.player_id AND C.event_date = DATE_ADD(A.event_date,INTERVAL 1 DAY)WHEREB.event_date IS NULLGROUP BYA.event_date;
13. 员工薪水中位数
需求:请编写SQL查询来查找每个公司的薪水中位数。挑战点:你是否可以在不使用任何内置的SQL函数的情况下解决此问题。
展示效果:
| Id | Company | Salary |
|---|---|---|
| 5 | A | 451 |
| 6 | A | 513 |
| 12 | B | 234 |
| 9 | B | 1154 |
| 15 | C | 2645 |
Create table If Not Exists 13_Employee (Id int, Company varchar(255), Salary int);Truncate table 13_Employee;insert into 13_Employee (Id, Company, Salary) values (1, 'A', 2341);insert into 13_Employee (Id, Company, Salary) values (2, 'A', 341);insert into 13_Employee (Id, Company, Salary) values (3, 'A', 15);insert into 13_Employee (Id, Company, Salary) values (4, 'A', 15314);insert into 13_Employee (Id, Company, Salary) values (5, 'A', 451);insert into 13_Employee (Id, Company, Salary) values (6, 'A', 513);insert into 13_Employee (Id, Company, Salary) values (7, 'B', 15);insert into 13_Employee (Id, Company, Salary) values (8, 'B', 13);insert into 13_Employee (Id, Company, Salary) values (9, 'B', 1154);insert into 13_Employee (Id, Company, Salary) values (10, 'B', 1345);insert into 13_Employee (Id, Company, Salary) values (11, 'B', 1221);insert into 13_Employee (Id, Company, Salary) values (12, 'B', 234);insert into 13_Employee (Id, Company, Salary) values (13, 'C', 2345);insert into 13_Employee (Id, Company, Salary) values (14, 'C', 2865);insert into 13_Employee (Id, Company, Salary) values (15, 'C', 2645);insert into 13_Employee (Id, Company, Salary) values (16, 'C', 2652);insert into 13_Employee (Id, Company, Salary) values (17, 'C', 65);
最终SQL:
-- 方法一:selectb.id,b.company,b.salaryfrom(selectid,company,salary,case @com when company then @rk:=@rk+1 else @rk:=1 end rk,@com:=companyfrom13_Employee,(select @rk:=0, @com:='') aorder bycompany,salary) bleft join(selectcompany,count(1)/2 cntfrom13_Employeegroup by company) conb.company=c.companywhereb.rk in (cnt+0.5,cnt+1,cnt);-- 方法二:selectId,Company,Salaryfrom(selectId,Company,Salary,ROW_NUMBER() over(partition by Company order by Salary) rk,count(*) over(partition by Company) cntfrom 13_Employee)t1where rk IN (FLOOR((cnt + 1)/2), FLOOR((cnt + 2)/2))
14. 至少有5名直接下属的经理
需求:Employee 表,请编写一个SQL查询来查找至少有5名直接下属的经理。
展示效果:
| Name |
|---|
| John |
Create table If Not Exists 14_Employee (Id int, Name varchar(255), Department varchar(255), ManagerId int);Truncate table 14_Employee;insert into 14_Employee (Id, Name, Department, ManagerId) values (101, 'John', 'A', null);insert into 14_Employee (Id, Name, Department, ManagerId) values (102, 'Dan', 'A', 101);insert into 14_Employee (Id, Name, Department, ManagerId) values (103, 'James', 'A', 101);insert into 14_Employee (Id, Name, Department, ManagerId) values (104, 'Amy', 'A', 101);insert into 14_Employee (Id, Name, Department, ManagerId) values (105, 'Anne', 'A', 101);insert into 14_Employee (Id, Name, Department, ManagerId) values (106, 'Ron', 'B', 101);
最终SQL:
-- 方法一:SELECTNameFROM14_Employee AS t1JOIN(SELECTManagerIdFROM14_EmployeeGROUP BYManagerIdHAVINGCOUNT(ManagerId) >= 5) AS t2ONt1.Id = t2.ManagerId;-- 方法二:selectNamefrom14_Employeewhere Id in (selectManagerIdfrom14_Employeegroup byManagerIdhavingcount(*)>=5 );
15. 给定数字的频率查询中位数
需求:请编写一个查询来查找所有数字的中位数并将结果命名为 median 。
根据下表数据可以看出,原始数据为:0,0,1,2,2,2,3 中位数为2。
展示效果:
| median |
|---|
| 2.0000 |
Create table If Not Exists 15_Numbers (Number int, Frequency int);Truncate table 15_Numbers;insert into 15_Numbers (Number, Frequency) values (0, 2);insert into 15_Numbers (Number, Frequency) values (1, 1);insert into 15_Numbers (Number, Frequency) values (2, 3);insert into 15_Numbers (Number, Frequency) values (3, 1);
提示:如果 n1.Number 为中位数,n1.Number(包含本身)前累计的数字应大于等于总数/2 ,同时n1.Number(不包含本身)前累计数字应小于等于总数/2。
最终SQL:
selectavg(t.number) as medianfrom(selectn1.number,n1.frequency,(selectsum(frequency)from15_Numbers n2wheren2.number<=n1.number) as asc_frequency,(selectsum(frequency)from15_Numbers n3wheren3.number>=n1.number) as desc_frequencyfrom15_Numbers n1) twheret.asc_frequency>= (select sum(frequency) from 15_Numbers)/2and t.desc_frequency>= (select sum(frequency) from 15_Numbers)/2;
16. 当选者
需求:请编写 sql 语句来找到当选者(CandidateId)的名字,
展示效果:
| Name |
|---|
| B |
Create table If Not Exists 16_Candidate (id int, Name varchar(255));Create table If Not Exists 16_Vote (id int, CandidateId int);Truncate table 16_Candidate;insert into 16_Candidate (id, Name) values (1, 'A');insert into 16_Candidate (id, Name) values (2, 'B');insert into 16_Candidate (id, Name) values (3, 'C');insert into 16_Candidate (id, Name) values (4, 'D');insert into 16_Candidate (id, Name) values (5, 'E');Truncate table 16_Vote;insert into 16_Vote (id, CandidateId) values (1, 2);insert into 16_Vote (id, CandidateId) values (2, 4);insert into 16_Vote (id, CandidateId) values (3, 3);insert into 16_Vote (id, CandidateId) values (4, 2);insert into 16_Vote (id, CandidateId) values (5, 5);
最终SQL:
SELECTname AS 'Name'FROM16_Candidate aJOIN(SELECTCandidateIdFROM16_VoteGROUP BYCandidateIdORDER BYCOUNT(*) DESCLIMIT 1) AS winnerWHEREa.id = winner.CandidateId;
17. 员工奖金
需求:选出所有 bonus < 1000 的员工的 name 及其 bonus。
展示效果:
| name | bonus |
|---|---|
| John | null |
| Dan | 500 |
| Brad | null |
Create table If Not Exists 17_Employee (EmpId int, Name varchar(255), Supervisor int, Salary int);Create table If Not Exists 17_Bonus (EmpId int, Bonus int);Truncate table 17_Employee;insert into 17_Employee (EmpId, Name, Supervisor, Salary) values (3, 'Brad', null, 4000);insert into 17_Employee (EmpId, Name, Supervisor, Salary) values (1, 'John', 3, 1000);insert into 17_Employee (EmpId, Name, Supervisor, Salary) values (2, 'Dan', 3, 2000);insert into 17_Employee (EmpId, Name, Supervisor, Salary) values (4, 'Thomas', 3, 4000);Truncate table 17_Bonus;insert into 17_Bonus (EmpId, Bonus) values (2, 500);insert into 17_Bonus (EmpId, Bonus) values (4, 2000);
最终SQL:
SELECTe.name,b.bonusFROM17_Employee eLEFT JOIN17_Bonus bONe.empid = b.empidWHEREbonus < 1000 OR bonus IS NULL;
18. 最高回答率
需求:请编写SQL查询来找到具有最高回答率的问题。
展示效果:
| survey_log |
|---|
| 285 |
从 `survey_log` 表中获得回答率最高的问题,`survey_log` 表包含这些列**:id**, **action**, **question_id**, **answer_id**, **q_num**, **timestamp**。id 表示用户 id;action 有以下几种值:"show","answer","skip";当 action 值为 "answer" 时 answer_id 非空,而 action 值为 "show" 或者 "skip" 时 answer_id 为空;q_num 表示当前会话中问题的编号。
Create table If Not Exists 18_survey_log (uid int, action varchar(255), question_id int, answer_id int, q_num int, timestamp int);Truncate table 18_survey_log;insert into 18_survey_log (uid, action, question_id, answer_id, q_num, timestamp) values (5, 'show', 285, null, 1, 123);insert into 18_survey_log (uid, action, question_id, answer_id, q_num, timestamp) values (5, 'answer', 285, 124124, 1, 124);insert into 18_survey_log (uid, action, question_id, answer_id, q_num, timestamp) values (5, 'show', 369, null, 2, 125);insert into 18_survey_log (uid, action, question_id, answer_id, q_num, timestamp) values (5, 'skip', 369, null, 2, 126);
最终SQL:
-- 方法一SELECTquestion_id as survey_logFROM(SELECTquestion_id,SUM(case when action="answer" THEN 1 ELSE 0 END) as num_answer,SUM(case when action="show" THEN 1 ELSE 0 END) as num_showFROM18_survey_logGROUP BYquestion_id) as tblORDER BY(num_answer / num_show) DESCLIMIT 1;-- 方法二SELECTquestion_id AS 'survey_log'FROM18_survey_logGROUP BYquestion_idORDER BYCOUNT(answer_id) / COUNT(IF(action = 'show', 1, 0)) DESCLIMIT 1;
19. 员工累计薪水
需求:查询一个员工三个月内的累计薪水,但是不包括最近一个月的薪水。
展示效果:
| Id | Month | Salary |
|---|---|---|
| 1 | 3 | 90 |
| 1 | 2 | 50 |
| 1 | 1 | 20 |
| 2 | 1 | 20 |
| 3 | 3 | 100 |
| 3 | 2 | 40 |
Create table If Not Exists 19_Employee (Id int, Month int, Salary int);Truncate table 19_Employee;insert into 19_Employee (Id, Month, Salary) values (1, 1, 20);insert into 19_Employee (Id, Month, Salary) values (2, 1, 20);insert into 19_Employee (Id, Month, Salary) values (1, 2, 30);insert into 19_Employee (Id, Month, Salary) values (2, 2, 30);insert into 19_Employee (Id, Month, Salary) values (3, 2, 40);insert into 19_Employee (Id, Month, Salary) values (1, 3, 40);insert into 19_Employee (Id, Month, Salary) values (3, 3, 60);insert into 19_Employee (Id, Month, Salary) values (1, 4, 60);insert into 19_Employee (Id, Month, Salary) values (3, 4, 70);
说明:员工 1 除去最近一个月(月份 4),有三个月的薪水记录:月份 3 薪水为 40,月份 2 薪水为 30,月份 1 薪水为 20。所以近 3 个月的薪水累计分别为 (40 + 30 + 20) = 90,(30 + 20) = 50 和 20。
最终SQL:
SELECTE1.id,E1.month,(IFNULL(E1.salary, 0) + IFNULL(E2.salary, 0) + IFNULL(E3.salary, 0)) AS SalaryFROM(SELECTid, MAX(month) AS monthFROM19_EmployeeGROUP BYidHAVINGCOUNT(*) > 1) AS maxmonthLEFT JOIN19_Employee E1ON(maxmonth.id = E1.id AND maxmonth.month > E1.month)LEFT JOIN19_Employee E2ON(E2.id = E1.id AND E2.month = E1.month - 1)LEFT JOIN19_Employee E3ON(E3.id = E1.id AND E3.month = E1.month - 2)ORDER BYid ASC , month DESC;
20. 统计各专业人数
需求:查询 department 表中每个专业的学生人数 (即使没有学生的专业也需列出)。
展示效果:
| dept_name | student_number |
|---|---|
| Engineering | 2 |
| Science | 1 |
| Law | 0 |
CREATE TABLE IF NOT EXISTS 20_student (student_id INT,student_name VARCHAR(45), gender VARCHAR(6), dept_id INT);CREATE TABLE IF NOT EXISTS 20_department (dept_id INT, dept_name VARCHAR(255));Truncate table 20_student;insert into 20_student (student_id, student_name, gender, dept_id) values (1, 'Jack', 'M', 1);insert into 20_student (student_id, student_name, gender, dept_id) values (2, 'Jane', 'F', 1);insert into 20_student (student_id, student_name, gender, dept_id) values (3, 'Mark', 'M', 2);Truncate table 20_department;insert into 20_department (dept_id, dept_name) values (1, 'Engineering');insert into 20_department (dept_id, dept_name) values (2, 'Science');insert into 20_department (dept_id, dept_name) values (3, 'Law');
最终SQL:
SELECTdept_name,COUNT(student_id) AS student_numberFROM20_department dLEFT OUTER JOIN20_student sONd.dept_id = s.dept_idGROUP BYd.dept_nameORDER BYstudent_number DESC,d.dept_name;
