Hive_sql50道练习题

创建表

create table student(
s_id string comment '学生编号',
s_name string comment '学生姓名',
s_birth string comment '学生生日',
s_sex string comment '学生性别') comment '学生表'
row format delimited
fields terminated by ' ';

create table score(
s_id string comment '学生编号',
c_id string comment '课程编号',
s_score int comment '分数') comment '课程表'
row format delimited
fields terminated by ' ';

create table course(
c_id string comment '学生编号',
c_name string comment '课程名称',
t_id string comment '教师编号') comment '课程表'
row format delimited
fields terminated by ' ';

create table teacher(
t_id string comment '教师编号',
t_name string comment '教师姓名') comment '教师表'
row format delimited
fields terminated by ' ';

数据

[root@ke04 0516]# cat student 
01 赵雷 1990-01-0102 张三 1990-12-2103 迪丽热巴 2000-01-0104 雪见 2005-03-0405 梅超风 1988-04-0606 李云龙 1999-12-1207 蒸煮 1989-07-0108 王菊 1990-01-20

[root@ke04 0516]# cat score
01 01 80
01 02 90
01 03 99
02 01 70
02 02 60
02 03 80
03 01 80
03 02 80
03 03 80
04 01 50
04 02 30
04 03 20
05 01 76
05 02 87
06 01 31
06 03 34
07 02 89
07 03 98

[root@ke04 0516]# cat course
01 语文 02
02 数学 01
03 英语 03

[root@ke04 0516]# cat teacher
01 张三
02 李四
03 王五

导入数据到hive

load data local inpath '/tmp/bigdata/0516/student' into table student;
load data local inpath '/tmp/bigdata/0516/score' into table score;
load data local inpath '/tmp/bigdata/0516/course' into table course;
load data local inpath '/tmp/bigdata/0516/teacher' into table teacher;

1.查询课程编号为'01'的课程比'02'的课程成绩高的所有学生信息

select stu.*, a.s_score as 01_score, b.s_score as 02_score
from student stu
join score a on stu.s_id = a.s_id and a.c_id = '01'
join score b on stu.s_id = b.s_id and b.c_id = '02'
where a.s_score > b.s_score;

02    张三    1990-12-2170    60
04    雪见    2005-03-0450    30


方案二: hive不支持子查询 所以查询报错

select stu.* from student stu 
where stu.s_id in 
(select a.s_id from score a 
where a.c_id = '01' and a.s_score > 
(select max(b.s_score) from score b where b.c_id = '02'));

2. 查询平均成绩大于等于60分的同学的学生编号和学生姓名和平均成绩

select stu.s_id, stu.s_name, round(avg(score.s_score), 1) as avg 
from student stu 
join score b 
on stu.s_id=b.s_id group by stu.s_id,stu.s_name having avg >=60;


select stu.s_id, stu.s_name, tmp.avgscore as score from student stu 
join (select score.s_id, round(avg(score.s_score), 1) as avgscore
from score group by s_id) as tmp 
on tmp.s_id = stu.s_id where tmp.avgscore >= 60;

01    赵雷    89.7
02    张三    70.0
03    迪丽热巴    80.0
05    梅超风    81.5
07    蒸煮    93.5

3.查询平均成绩小于60分的同学的学生编号和学生姓名和平均成绩:(包括有成绩的和无成绩的)

hive不支持not in, 可以使用1. left join isnull  2.not exist 解决
select stu2.s_id, stu2.s_name, 0 as avgScore from student stu2 
where stu2.s_id not in
(select distinct(sc2.s_id) from score sc2);
替换一:
select stu2.s_id, stu2.s_name, 0 as avgScore from student stu2 
left join score sc2 
on stu2.s_id = sc2.s_id 
where sc2.s_id is null;
替换二:
select stu2.s_id, stu2.s_name, 0 as avgScore from student stu2 
where not exists
(select distinct(sc2.s_id) from score sc2 where sc2.s_id = stu2.s_id);




方案一:
select stu.s_id, stu.s_name, tmp.avgScore from student as stu
join (select sc.s_id, round(avg(sc.s_score), 1) as avgScore from score sc 
group by sc.s_id having round(avg(sc.s_score), 1) < 60 ) as tmp
on stu.s_id = tmp.s_id 
union all
select stu2.s_id, stu2.s_name, 0 as avgScore from student stu2 
left join score sc2 
on stu2.s_id = sc2.s_id 
where sc2.s_id is null;


方案二: having不支持别名查询
select stu.s_id, stu.s_name, round(avg(sc.s_score), 1) avgScore from student stu
join score sc on stu.s_id = sc.s_id 
group by stu.s_id, stu.s_name having round(avg(sc.s_score), 1) < 60
union all
select stu2.s_id, stu2.s_name, 0 as avgScore from student stu2 
where not exists
(select distinct(sc2.s_id) from score sc2 where sc2.s_id = stu2.s_id);


08    王菊    0.0
04    雪见    33.3
06    李云龙    32.5

4.查询'李'老师的数量

select t_name, count(1) from
teacher where t_name like '李%' group by t_name;

李四    1

5.查询所有同学的学生编号、学生姓名、选课总数、所有课程的总成绩

select stu.s_id, stu.s_name, count(sc.c_id) as total_count, sum(sc.s_score) as total_score
from student stu left join score sc 
on stu.s_id = sc.s_id group by stu.s_id, stu.s_name;


01    赵雷    3    269
02    张三    3    210
03    迪丽热巴    3    240
04    雪见    3    100
05    梅超风    2    163
06    李云龙    2    65
07    蒸煮    2    187
08    王菊    0    NULL

6.查询学过'张三'老师授课的同学的信息

select stu.s_id, stu.s_name, stu.s_birth, stu.s_sex
from student stu 
join score on stu.s_id = score.s_id
join course on score.c_id = course.c_id 
join teacher t on course.t_id = t.t_id and t.t_name ='张三';


01    赵雷    1990-01-0102    张三    1990-12-2103    迪丽热巴    2000-01-0104    雪见    2005-03-0405    梅超风    1988-04-0607    蒸煮    1989-07-01
7.查询没学过'张三'老师授课的同学的信息
select stu.s_id, stu.s_name, stu.s_birth, stu.s_sex
from student stu left join 
(select s_id from score 
join course on course.c_id = score.c_id
join teacher on course.t_id =teacher.t_id and teacher.t_name = '张三' )tmp
on stu.s_id = tmp.s_id where tmp.s_id is null;

select stu.s_id, stu.s_name, stu.s_birth, stu.s_sex
from student stu where not exists
(select s_id from score 
join course on course.c_id = score.c_id
join teacher on course.t_id =teacher.t_id and teacher.t_name = '张三' 
where score.s_id = stu.s_id);

06    李云龙    1999-12-1208    王菊    1990-01-20

8.查询学习编号为'01'并且也学过编号为'02'的课程的同学的信息

select stu.s_id, stu.s_name, stu.s_birth, stu.s_sex
from student stu join
(select s_id from score where c_id = '1') tmp1 
on stu.s_id = tmp1.s_id
join (select s_id from score where c_id = '2') tmp2
on stu.s_id = tmp2.s_id;

9.查询学过编号为'01'但是没有学过编号为'02'的课程的同学的信息

select stu.* from student stu join
(select s_id from score where c_id = '1')tmp1
on stu.s_id = tmp1.s_id 
left join (select s_id from score where c_id = '2')tmp2
on stu.s_id = tmp2.s_id where tmp2.s_id is null;

10.查询没有学全所有课程的同学的信息

select stu.* from student stu join
(select count(c_id) num from course) tmp1 left join
(select s_id, count(c_id) num from score group by s_id) tmp2
on stu.s_id = tmp2.s_id and tmp1.num = tmp2.num
where tmp2.s_id is null;


select stu.* from student stu 
left join (select s_id, count(c_id) num from score group by s_id) tmp2 on stu.s_id = tmp2.s_id
join (select count(c_id) num from course) tmp1 
on tmp1.num = tmp2.num 
where tmp2.s_id is null;

11.查询至少有一门课与学号为'01'的同学所学相同的同学的信息

select stu.* from student stu join
(select s_id, c_id from score) tmp1 join 
(select c_id from score where score.s_id='01') tmp2 
on tmp1.c_id = tmp2.c_id and stu.s_id = tmp1.s_id
group by stu.s_id, stu.s_name, stu.s_birth, stu.s_sex;

01    赵雷    1990-01-0102    张三    1990-12-2103    迪丽热巴    2000-01-0104    雪见    2005-03-0405    梅超风    1988-04-0606    李云龙    1999-12-1207    蒸煮    1989-07-01

12.查询和'01'号的同学学习的课程完全相同的其他同学的信息

select stu.*, tmp1.course_id from student stu join
(select s_id, concat_ws('|', collect_set(c_id)) course_id 
from score group by s_id having s_id not in ('1')) tmp1
on stu.s_id = tmp1.s_id join
(select concat_ws('|', collect_set(c_id)) course_id 
from score where s_id = '1') tmp2
on tmp2.course_id = tmp1.course_id;

13.查询没学过'张三'老师讲授的任一门课程的学生姓名

select stu.* from student stu join
(select s_id from score tmp1 join 
(select c_id from course join teacher on course.t_id = teacher.t_id
where teacher.t_name='张三') tmp2
on tmp1.c_id = tmp2.c_id) tmp3
on stu.s_id = tmp3.s_id where tmp3.s_id is null;

14.查询两门及其以上不及格课程的同学的学号,姓名及其平均成绩

方案一:
select tmp.s_id, tmp.s_name, round(avg(score.s_score), 2) from
(select stu.s_id as s_id, stu.s_name as s_name from student stu where stu.s_id in
(select s_id from score where s_score < 60 group by s_id having count(s_score) >= 2))tmp
join score on tmp.s_id = score.s_id 
group by tmp.s_id, tmp.s_name;


方案二:
select stu.s_id,stu.s_name,tmp.avg_score from student stu join
(select s_id from score where s_score < 60 group by score.s_id having count(s_id) > 1)tmp2
on stu.s_id = tmp2.s_id
left join 
(select s_id, round(avg(score.s_score)) avg_score from
score group by s_id) tmp
on tmp.s_id = stu.s_id;

15.检索'01'课程分数小于60,按分数降序排列的学生信息.

select stu.*, score.s_score from 
student stu join score 
on stu.s_id = score.s_id
where score.s_score < 60 and score.c_id = '01'
order by score.s_score desc;

16.按平均成绩从高到低显示所有学生的所有课程的成绩以及平均成绩

select tmp1.s_id, tmp2.s_score chinese, tmp3.s_score math, tmp4.s_score rnglish, round(avg(tmp1.s_score), 2) avgScore
from score tmp1 left join 
(select s_id, s_score from score where c_id = '01') tmp2 on tmp2.s_id = tmp1.s_id
left join 
(select s_id, s_score from score where c_id = '02') tmp3 on tmp3.s_id = tmp3.s_id
left join
(select s_id, s_score from score where c_id = '03')tmp4 on tmp4.s_id = tmp3.s_id
group by tmp1.s_id, tmp2.s_score, tmp3.s_score, tmp4.s_score order by avgScore desc;

17.查询各科成绩最高分、最低分和平均分: 如以下形式显示: 课程id、课程名称、最高分、最低分、平均分、及格率、中等率、优良率、优秀率

select course.c_id, course.c_name, tmp.maxScore, tmp.minScore, tmp.avgScore, tmp.passRate, tmp.modeRate, tmp.goodRate, tmp.excellentRate
from course join  
(select c_id, 
max(s_score) as maxScore,
min(s_score) as minScore,
round(avg(s_score), 2) as avgScore,
round(sum(case when s_score >= 60 then 1 else 0 end)/count(c_id), 2) as passRate, 
round(sum(case when s_score >= 60 and s_score < 70 then 1 else 0 end)/count(c_id), 2) as modeRate, 
round(sum(case when s_score >= 70 and s_score < 80 then 1 else 0 end)/count(c_id), 2) as goodRate, 
round(sum(case when s_score >= 60 then 1 else 0 end)/count(c_id), 2) as excellentRate
from score group by course.c_id, course.c_name) tmp
on course.c_id = tmp.c_id;

18.按各科成绩进行排序,并显示排名

select c_id, s_id, s_score, 
row_number()over(partition by c_id order by s_score desc) from score;

19.查询学生的总成绩并进行排名

select score.s_id, student.s_name, sum(s_score) sumscore,
row_number()over(order by sum(s_score) desc) Ranking 
from score, student
where score.s_id = student.s_id 
group by score.s_id, student.s_name
order by sumscore desc;

20.查询不同老师所教不同课程平均分从高到低显示(需要对比原笔迹, 原笔记带了teacher表, 需要对比结果)

select c.c_id, c.t_id, round(avg(s.s_score), 2) as avgscore 
from course c join score s on c.c_id = s.c_id
group by c.c_id, c.t_id order by avgscore desc;

22.统计各科成绩个分数段人数: 课程编号、课程名称、[100-85]、[85-70]、[70-60]、[60-0]及所占百分比

select count(s.s_id) num, c.c_id, c.c_name, 
round(sum(case when s.s_score > 85 then 1 else 0 end)/count(s.s_id), 2) as good,
round(sum(case when s.s_score > 70 and s.s_score <= 85 then 1 else 0 end)/count(s.s_id), 2) as file,
round(sum(case when s.s_score > 60 and s.s_score <= 70 then 1 else 0 end)/count(s.s_id), 2) as well,
round(sum(case when s.s_score <= 60 then 1 else 0 end)/count(s.s_id), 2) as poor
from score s join course c 
group by c.c_id, c.c_name
order by c.c_id, c.c_name;

23.查询学生平均成绩及其名次对比原数据

查询平均成绩:
select stu.s_id, stu.s_name, round(avg(s.s_score), 2) avgScore
from student stu join score s on s.s_id = stu.s_id
group by stu.s_id, stu.s_name;
方案二:
select tmp.s_id, stu.s_name, tmp.avgscore from student stu join
(select s.s_id, round(avg(s.s_score), 2) avgscore
from score s group by s.s_id) tmp
on tmp.s_id = stu.s_id;



select tmp.s_id, stu.s_name, tmp.avgscore,
row_number()over(order by tmp.avgscore desc) rownum
from student stu join
(select s.s_id, round(avg(s.s_score), 2) avgscore
from score s group by s.s_id) tmp
on tmp.s_id = stu.s_id;

24.查询各科成绩前三名的记录(不考虑成绩并列情况)

select tmp.c_id, tmp.c_name, tmp.s_id, tmp.count from
(select course.c_id, course.c_name, score.s_id, sum(score.s_score) as count,
row_number()over(partition by course.c_id order by  sum(score.s_score) desc) rownum
from score join course
on course.c_id = score.c_id
group by course.c_id, course.c_name, score.s_id) tmp
where tmp.rownum <= 3;

25.查询每门课程被选修的学生数

select c.c_id, c.c_name, tmp.count 
from course c join 
(select score.c_id, count(score.s_id) count from score 
where score.s_score < 60
group by score.c_id) tmp
on c.c_id = tmp.c_id;

26.查询出只有两门课程的全部学生的学号和姓名

select stu.s_id, stu.s_name from student stu
join (select s_id, count(c_id) from score 
group by s_id having count(c_id) = 2)tmp
on stu.s_id = tmp.s_id;

27.查询男生、女生人数

方案一:
select 
sum(case when s_sex = '女' then 1 else 0 end) as women,
sum(case when s_sex = '男' then 1 else 0 end) as man
from student


方案二:
select tmp1.count women, tmp2.count men 
from (select count(1) from student s_sex = '女')tmp1,
     (select count(1) from student s_sex = '男')tmp2;

28.查询同名同姓学生名单, 并统计同名人数

select s_name, count(1) sameName from student group by s_name having count(1) >1;

29.查询每门课程的平均成绩,结果按平均成绩降序排列,平均成绩相同时,按客编编号升序排列

select c_id, round(avg(s_score), 2) avgscore from score
group by c_id order by avgscore desc, c_id asc;

30.查询平均成绩大于等于85的所有学生的学号、姓名和平均成绩

select stu.s_id,stu.s_name, round(avg(score.s_score), 2) avgscore 
from student stu join score on score.s_id = stu.s_id
group by stu.s_id, stu.s_name
having round(avg(score.s_score), 2) >= 85;

31.查询课程名称为'数学', 且分数低于60的学生姓名和分数(需要和元数据进行对比)

方案一:
select stu.s_name, score.s_score from
student stu left join score join course
on stu.s_id = score.s_id and score.c_id = course.c_id 
where course.c_name = '数学' and score.s_score <60;


方案二;
select stu.s_name, score.s_score from
student stu 
left join score on stu.s_id = score.s_id 
join course on score.c_id = course.c_id 
where course.c_name = '数学' and score.s_score <60;

32.查询所有学生的课程及分数情况

select stu.s_name, 
sum(case when course.c_name = '语文' then score.s_score else 0 end) chinese,
sum(case when course.c_name = '数学' then score.s_score else 0 end) math, 
sum(case when course.c_name = '英语' then score.s_score else 0 end) english
from student stu 
join score on score.s_id = stu.s_id
join course on course.c_id = score.c_id
group by stu.s_id, stu.s_name; 

33.查询任何一门课程成绩在70分以上的学生姓名、课程名称和分数

方案一:
select stu.s_name, score.s_score, course.c_name
from student stu join score on stu.s_id = score.s_id
join course on course.c_id = score.c_id 
where score.s_score > 70;


方案二:
select s_name,c_name,s_score from student
  join (select sc.* from score sc
        left join(select s_id from score where s_score < 70 group by s_id)tmp
        on sc.s_id=tmp.s_id where tmp.s_id is null)tmp2
    on student.s_id=tmp2.s_id
  join course on tmp2.c_id=course.c_id;

34.查询课程不及格的学生

方案一:
select s_name, c_name as courseName, tmp.s_score 
from student join
(select s_id, s_score, c_name from score,course 
where score.c_id = course.c_id and s_score <60) tmp
on student.s_id=tmp.s_id;


方案二:
select student.s_name, course.c_name as courseName, score.s_score
from student 
join score on student.s_id=score.s_id 
join course on score.c_id = course.c_id 
where score.s_score < 60;

35.查询课程编号为01且课程成绩在80分以上的学生的学号和姓名

select stu.s_id, stu.s_name from
student stu join score on score.s_id = stu.s_id
where score.c_id = '01' and score.s_score > 80;

36.求每门课程的学生人数

select c.c_name, count(s.s_id) num from
score s join course c on s.c_id = c.c_id
group by c.c_name;

37.查询选修'张三'老师所授课程的学生中,成绩最高的学生信息及其成绩.

select stu.*, tmp.s_score from 
student stu join 
(select s_id, s_score, row_number()over(order by s_score desc) rownum from score 
join course on score.c_id = course.c_id 
join teacher on teacher.t_id = course.t_id 
where teacher.t_name = '张三') tmp
on stu.s_id = tmp.s_id where tmp.rownum <= 1;

38.查询不同课程成绩相同的学生的学生编号、课程编号、学生成绩

select distinct a.s_id, a.c_id, a.s_score from score a join score b 
where a.c_id <> b.c_id and a.s_score = b.s_score;

39. 查询每门课程成绩最好的前三名

方案一:
select tmp.s_id, tmp.s_score, tmp.ranking from 
(select s_id, s_score, row_number()over(partition by c_id order by s_score desc) ranking 
from score )tmp 
where tmp.ranking <= 3;



方案二:
select tmp1.s_id as s_id, tmp1.s_score as s_score, tmp1.ranking as ranking  from 
(select *, row_number()over(order by s_score desc) ranking from score where c_id = '01') tmp1 where tmp1.ranking <= 3
union all
select tmp2.s_id as s_id, tmp2.s_score as s_score, tmp2.ranking as ranking  from 
(select *, row_number()over(order by s_score desc) ranking from score where c_id = '02') tmp2 where tmp2.ranking <= 3
union all
select tmp3.s_id as s_id, tmp3.s_score as s_score, tmp3.ranking as ranking  from 
(select *, row_number()over(order by s_score desc) ranking from score where c_id = '03') tmp3 where tmp3.ranking <= 3;

40.统计每门课程的学生选修人数(超过5人的课程才统计)
-- 要求输出课程号和选修人数,查询结果按人数降序排列,若人数相同按课程号升序排列。

 select tmp.c_id, tmp.count num from course c join 
 (select c_id, count(s_id) as count from score group by c_id having count(s_id) >= 5)tmp 
 on tmp.c_id = c.c_id order by num desc, tmp.c_id asc;

 41.查询各学生的年龄(周岁)

方案一:
 select s_name, s_birth, 
 (year(current_date) - year(s_birth) + (case when month(current_date) > month(s_birth)
 then 1 when month(current_date) = month(s_birth) and day(current_date) > day(s_birth)
 then 1 else 0 end))  as age from student; 



方案二:
select s_name,s_birth,
  floor((datediff(current_date,s_birth) - floor((year(current_date) - year(s_birth))/4))/365) +1 as age
from student;

42.查询下周过生日的学生

select * from student where weekofyear(current_date) + 1 = weekofyear(s_birth);
原文地址:https://www.cnblogs.com/bigdata-familyMeals/p/14779231.html