题一:查询每个商店的当月累计金额和累计的总销售额

数据源

店铺 月份 金额
a 1 150
a 1 200
b 1 1000
b 1 800
c 1 250
c 1 220
b 1 6000
a 2 2000
a 2 3000
b 2 1000
b 2 1500
c 2 350
c 2 280
a 3 350
a 3 250

输出结果

店铺 月份 当月的累计金额 累计每月的金额
a 1 350 350
a 2 5000 5350
a 3 600 5950
b 1 7800 7800
b 2 2500 10300
c 1 470 470
c 2 630 1100

解答

  1. create table sales
  2. (shop string, month string, money int)
  3. ROW FORMAT DELIMITED
  4. fields terminated by ',';
  5. load data local inpath '/home/hadoop/store.txt'
  6. into table sales;
  7. SELECT s.shop, s.month, s.month_sum,
  8. sum(s.month_sum) over(partition by s.shop order by s.month)
  9. from(
  10. SELECT shop, month, sum(money) AS month_sum
  11. from sales group by shop, month
  12. ) AS s;

题二:查询每位同学选课结果

数据源

id course
1 a
1 b
1 c
1 e
2 a
2 c
2 d
2 f
3 a
3 b
3 c
3 e

输出结果

id a b c d e f
1 1 1 1 0 1 0
2 1 0 1 1 0 1
3 1 1 1 0 1 0

解答

select 
  c.id sid,
  if(array_contains(user_courses,courses[0]),1,0) a,
  if(array_contains(user_courses,courses[1]),1,0) b,
  if(array_contains(user_courses,courses[2]),1,0) c,
  if(array_contains(user_courses,courses[3]),1,0) e,
  if(array_contains(user_courses,courses[4]),1,0) d,
  if(array_contains(user_courses,courses[5]),1,0) f
from 
(
    select a.id , a.user_courses , b.courses from 
  (
    select id , collect_set(trim(course)) as  user_courses
    from courses_tb group by id 
  ) a
  join 
  (
    select collect_set(trim(course)) as courses
    from courses_tb
  ) b
) c ;

题三:查询每一年最高气温出现的日期

数据源

2014010114
2014010216
2014010317
2014010410
2014010506
2012010609
2012010732
2012010812
2012010919
2012011023
2001010116
2001010212
2001010310
2001010411
2001010529
2013010619
2013010722
2013010812
2013010929
2013011023
2008010105
2008010216
2008010337
2008010414
2008010516
2007010619
2007010712
2007010812
2007010999
2007011023
2010010114
2010010216
2010010317
2010010410
2010010506
2015010649
2015010722
2015010812
2015010999
2015011023

输出结果

year maxtemp datetime
2001 29 20010105
2007 99 20070109
2008 37 20080103
2010 17 20100103
2012 32 20120107
2013 29 20130109
2014 17 20140103
2015 99 20150109

解答

-- 1
select t.year, t.maxtemp, t.datetime
from(
    select year, datetime, temp,
                max(temp) over(partition by year order by temp desc) as maxtemp
    from temp
) as t
where t.temp = t.maxtemp;
-- 2
select 
    a.year year,a.maxtemp maxtemp,substr(b.line,1,8) datetime 
from
(
  select 
      substr(line,1,4) year,max(cast(substr(line,-2,2) as int)) maxtemp 
  from temp 
  group by substr(line,1,4)
) as a 
inner join temp as b
on a.year=substr(b.line,1,4) 
and a.maxtemp=cast(substr(b.line,-2) as int);

题四:查询数学成绩大于语文成绩的学生的信息

数据源

id sid course score
1 1 yuwen 43
2 1 shuxue 55
3 2 yuwen 77
4 2 shuxue 88
5 3 yuwen 98
6 3 shuxue 65

输出结果

sid 语文 数学
1 43 55
2 77 88

解答

-- 建表
-- 字段解释:id, 学号, 课程, 成绩
CREATE TABLE `course` (
  `id` int(11) NOT NULL AUTO_INCREMENT PRIMARY KEY,
  `sid` int(11) DEFAULT NULL,
  `course` varchar(255) DEFAULT NULL,
  `score` int(11) DEFAULT NULL 
) ENGINE=InnoDB DEFAULT CHARSET=utf8;

-- 插入数据
INSERT INTO `course` VALUES (1, 1, 'yuwen', 43);
INSERT INTO `course` VALUES (2, 1, 'shuxue', 55);
INSERT INTO `course` VALUES (3, 2, 'yuwen', 77);
INSERT INTO `course` VALUES (4, 2, 'shuxue', 88);
INSERT INTO `course` VALUES (5, 3, 'yuwen', 98);
INSERT INTO `course` VALUES (6, 3, 'shuxue', 65);

-- 查询
-- 1
SELECT
    yw.sid,
    yw.yw_score yuwen,
    sx.sx_score shuxue 
FROM
    ( SELECT sid, score AS yw_score FROM course WHERE course = 'yuwen' ) AS yw
    JOIN ( SELECT sid, score AS sx_score FROM course WHERE course = 'shuxue' ) AS sx
  ON yw.sid = sx.sid 
WHERE
    yw.yw_score < sx.sx_score;
-- 2
select * from 
    (select 
    a.sid sid,sum(a.yuwen) yuwen,sum(a.shuxue) shuxue 
    from (
    select 
      sid,
      case course when "yuwen" then score else 0 end yuwen,
      case course when "shuxue" then score else 0 end shuxue 
    from course) a 
   group by a.sid) b 
where yuwen < shuxue;