大数据第49天—Mysql练习题12道之四-观看电影的次数-杨大伟

有一个5000万的用户文件(user_idnameage),一个2亿记录的用户看电影的记录文件(user_idurl),根据年龄段观看电影的次数进行排序?

 1 --建表
 2 --用户表
 3 drop table if exists test_four_log;
 4 create table test_four_user(
 5     user_id string COMMENT '用户ID',
 6     name string COMMENT '用户姓名',
 7     age int COMMENT '用户年龄'
 8 ) 
 9 row format delimited fields terminated by '	';
10 --日志表
11 drop table if exists test_four_log;
12 create table test_four_log(
13     user_id string COMMENT '用户ID',
14     url string COMMENT '链接'
15 )
16 row format delimited fields terminated by '	';
 1 --插入数据
 2 insert into table test_four_user values ('1','1',8);
 3 insert into table test_four_user values ('2','2',45);
 4 insert into table test_four_user values ('3','3',14);
 5 insert into table test_four_user values ('4','4',18);
 6 insert into table test_four_user values ('5','5',17);
 7 insert into table test_four_user values ('6','6',19);
 8 insert into table test_four_user values ('7','7',26);
 9 insert into table test_four_user values ('8','8',22);
10 insert into table test_four_log values('1','111');
11 insert into table test_four_log values('2','111');
12 insert into table test_four_log values('3','111');
13 insert into table test_four_log values('4','111');
14 insert into table test_four_log values('5','111');
15 insert into table test_four_log values('6','111');
16 insert into table test_four_log values('7','111');
17 insert into table test_four_log values('8','111');
18 insert into table test_four_log values('1','111');
19 insert into table test_four_log values('2','111');
20 insert into table test_four_log values('3','111');
21 insert into table test_four_log values('4','111');
22 insert into table test_four_log values('5','111');
23 insert into table test_four_log values('6','111');
24 insert into table test_four_log values('7','111');
25 insert into table test_four_log values('8','111');
26 insert into table test_four_log values('1','111');
27 insert into table test_four_log values('2','111');
28 insert into table test_four_log values('3','111');
29 insert into table test_four_log values('4','111');
30 insert into table test_four_log values('5','111');
31 insert into table test_four_log values('6','111');
32 insert into table test_four_log values('7','111');
33 insert into table test_four_log values('8','111');
 1 -- 根据年龄段观看电影的次数进行排序?
 2 select
 3     age_size `年龄段`,
 4     count(*) `观影次数`
 5 from
 6 (
 7     select
 8     u.*,
 9     l.url,
10     case
11     when u.age >=0 and u.age <= 10 then '1-10'
12     when u.age >=11 and u.age <= 20 then '11-20'
13     when u.age >=21 and u.age <= 30 then '21-30'
14     when u.age >=31 and u.age <= 40 then '31-40'
15     when u.age >=41 and u.age <= 50 then '41-50'
16     else '51-100'
17     end age_size
18     from
19     test_four_user u join test_four_log l on u.user_id = l.user_id 
20 ) t1
21 group by age_size
22 order by `观影次数` desc;
原文地址:https://www.cnblogs.com/shui68home/p/13591545.html