某互金数据分析笔试题学习

show databases;
create database ship;
use ship;
show tables;
create table leadership
(manager int(4)primary key,
DATE date NOT NULL,
COUNTRY CHAR(20) not null,
gender char(20) not null,
age int(11) not null,
q1 int(11) not null,
q2 int(11) not null,
q3 int(11) not null,
q4 int(11),
q5 int(11)
);
select * from leadership;
alter table leadership modify date char(20);
insert into leadership(manager,date,country,gender,age,q1,q2,q3,q4,q5)
values('1','10/24/08','US','M',32,5,4,5,5,5),
('2','10/28/08','US','F',40,3,5,2,5,5),
('3','10/01/08','UK','F',25,3,5,5,5,2),
('4','10/12/08','UK','M',39,2,3,4,NULL,NULL),
('5','05/01/09','UK','F',99,2,2,1,2,1);
#第一题排序
select gender,age from leadership order by gender;
#第二题分箱
select * ,ceiling(age/5) as 分箱 from leadership order by 分箱;
#第三题均值填补缺失值
show variables like 'SQL_SAFE_UPDATES';
set sql_safe_updates = 0;#关闭安全模式
#update leadership set q4 = (select avg(q4) from leadership) where q4 is null;
#mysql中update更新表的时候,子查询不能加入from,会报错。解决方法是:1.inner join 2.再加入一层嵌套
update leadership set q4 = (select avg(q4) from (select q4 from leadership)tep)where q4 is null;
#第四题去重
show variables like 'char%';
create table AAA (brand varchar(40),
产量 int(20),
订单量 int(20),
销售额 int(30)
);
#导入CSV文件
load data local infile "C:/Users/Administrator/Desktop/AAA.csv" into table AAA fields terminated by ',' lines terminated by ' ';
show full columns from AAA;
select * from AAA;
#查找全部重复记录
select * from AAA where (AAA.产量,AAA.订单量) in (select 产量,订单量 from AAA group by 产量,订单量 having count(*)>1);
#提取重复记录情况(去重后的结果)
select*,count(distinct 产量) from AAA group by 产量;#distinct直接去除返回去重后的条数
#第五题正则表达式(mysql中匹配要加入1.like2.regexp)正则表达式是用来匹配文本的特殊的串
select brand REGEXP '[a-z0-9+]' from AAA;
#第六题转换表
create table a1 (ID int(4),
姓名 varchar(10),
科目 varchar(10),
成绩 int(20)
);
load data local infile "C:/Users/Administrator/Desktop/a1.csv" into table a1 fields terminated by ',' lines terminated by ' ';
select * from a1;
select ID,姓名,sum(if(科目='语文',成绩,0)) as 语文成绩,sum(if(科目='数学',成绩,0)) as 数学成绩 from a1 group by ID;

 ------------

1.要求是可以用R,Python,SAS,SQL实现皆可,欢迎大家交流学习。

2.表格文件已上传。

原文地址:https://www.cnblogs.com/ao-yu-a/p/11068086.html