电商数据处理项目

一、案例综述

1.数据文件说明

客户相关文件:

  1. UserInfo.csv:用户主表

  2. RegionInfo.csv:区域表

  3. UserAddress.csv:用户地址表

商品相关文件:

  1. GoodsInfo.csv:商品主表

  2. GoodsBrand.csv:商品品牌表

  3. GoodsColor.csv:商品颜色表

  4. GoodsSize.csv:商品尺码表

订单相关文件:

  1. OrderInfo.csv:订单主表

  2. OrderDetail.csv:订单详情表

表结构信息.xlsx: 建表参考信息

2.目标

1.求出购买产品金额最多的前十名顾客

2.求出购买产品金额最多的前十名顾客的最后登录时间

3.求出购买产品金额最多的前十名顾客的所在城市

4.求出购买力最强的前十个城市

5.求出购买力最强的前十个城市以及他们所在的省份

6.求出最畅销的十个品牌

7.求出最畅销的十种颜色、以及最不畅销的十种颜色

8.求出最畅销的十个商品所属品牌中所有商品的销售额

9.买不同商品种类最多的前十名用户所使用的收货城市都有哪些 (非重复计数count(distinct 字段名))

3.使用工具:MySQL

4.Error1157解决方法:

当用MySQL Workbench进行数据库的批量更新时,执行一个语句会碰到以下错误提示:Error Code: 1175

  You are using safe...without a WHERE that uses a KEY column

  执行语句:SET SQL_SAFE_UPDATES = 0;即可解决

二、了解表结构数据信息,建表并导入数据

建表举例:

create table goodsinfo(
          goodsid varchar(6) not null default '-',
          typeid varchar(3) not null default '-',
          markid varchar(4) not null default '-',
          goodstag varchar(100) not null default '-',
          brandtag varchar(100) not null default '-',
          customtag varchar(100) not null default '-',
          goodsname varchar(100) not null default '-',
          clickcount int not null default 0,
          clickcr int not null default 0,
          goodsnumber int not null default 0,
          goodsweight int not null default 0,
          marketprice double not null default 0,
          shopprice double not null default 0,
          addtime varchar(20) not null default 0,
          isonsale int not null default 0,
          sales int not null default 0,
          realsales int not null default 0,
          extraprice double not null default 0,
          goodsno varchar(9) not null default 0,
          pt varchar(9) not null default 0
         );  
 

#导入数据

          load data local infile 'E:/rawdata/GoodsInfo.csv'  into table goodsinfo    

          fields terminated by ','

     ignore 1 lines;

          select * from goodsinfo;

共建立9张表

三、统计查询

-- 1求出购买产品金额最多的前十名顾客

select userid, sum(orderamount) as 金额

from orderinfo

group by userid

order by 金额 desc

limit 10;

-- 2求出购买产品金额最多的前十名顾客的最后登录时间

select oi.userid, sum(orderamount) as 金额, lastlogin_ as 最后登录时间

from orderinfo as oi left join userinfo as ui on oi.userid = ui.userid

group by oi.userid

order by 金额 desc

limit 10;

-- 3求出购买产品金额最多的前十名顾客的所在城市

select userid, sum(orderamount) as 金额, regionname as 城市

from orderinfo as oi left join regioninfo as ri on city = regionid

group by userid

order by 金额 desc

limit 10;

-- 4求出购买力最强的前十个城市

select regionname as 城市, sum(orderamount) as 金额

from orderinfo as oi left join regioninfo as ri on city = regionid

group by city

order by 金额 desc

limit 10;

-- 5求出购买力最强的前十个城市以及他们所在的省份

create table a( select regionname as 城市, sum(orderamount) as 金额, province as 省份id

from orderinfo as oi left join regioninfo as ri on city = regionid

group by city order by 金额 desc

limit 10 );

select * from a;

select regionname as 省份, 城市, 金额

from a left join regioninfo on 省份id = regionid

order by 金额 desc;

或者 

select a.regionname as 省份, b.regionname as 城市, sum(orderamount) as 金额

from orderinfo as oi left join regioninfo as b on city = b.regionid left join regioninfo as a on province = a.regionid

group by city

order by 金额 desc limit 10;

-- 6求出最畅销的十个品牌

select brandtype as 品牌, sum(amount) as 销量

from orderdetail as od left join goodsinfo as gi on od.goodsid = gi.goodsid left join goodsbrand as gb on typeid = supplierid

group by typeid

order by 销量 desc

limit 10;

-- 7求出最畅销的十种颜色、以及最不畅销的十种颜色

(select colornote as 颜色, sum(amount) as 销量 from orderdetail as od

left join goodscolor as gc on od.colorid = gc.colorid

group by od.colorid order by 销量 desc limit 10)

union

(select colornote as 颜色, sum(amount) as 销量 from orderdetail as od

left join goodscolor as gc on od.colorid = gc.colorid

group by od.colorid order by 销量 asc limit 10);

-- 8求出最畅销的十个商品所属品牌中所有商品的销售额

#求出最畅销的十个商品所属品牌

create table aa (select typeid as 品牌id from orderdetail as od

left join goodsinfo as gs on od.goodsid = gs.goodsid

group by od.goodsid

order by sum(amount) desc

limit 10);

# 品牌中所有商品

create table bb (select goodsid as 商品id from aa

left join goodsinfo on 品牌id = typeid

group by 商品id);

#商品的销售额

select 商品id, sum(amount * goodsprice) as 销售额

from bb left join orderdetail on 商品id = goodsid

group by 商品id

order by 销售额 desc;

-- 9买不同商品种类最多的前十名用户所使用的收货城市都有哪些 -- (非重复计数count(distinct 字段名))

select od.userid as 用户, count(distinct goodsid) as 商品种类, group_concat(distinct regionname) as 城市名

from orderdetail as od

left join orderinfo as oi on od.orderid = oi.orderid

left join regioninfo on city = regionid

group by od.userid

order by 商品种类 desc

limit 10;

原文地址:https://www.cnblogs.com/lvzw/p/11555046.html