第7章 数仓搭建-DWS层
7.1 系统函数
7.1.1 nvl函数
1)基本语法:
NVL(表达式1,表达式2)
如果表达式1为空值,NVL返回值为表达式2的值,否则返回表达式1的值。
该函数的目的是把一个空值(null)转换成一个实际的值。其表达式的值可以是数字型、字符型和日期型。但是表达式1和表达式2的数据类型必须为同一个类型。
2)案例实操
select nvl(1,0);
select nvl(null,"hello");
7.1.2 日期处理函数
1)date_format函数(根据格式整理日期)
select date_format('2021-06-08','yyyy-MM');
2)date_add函数(加减日期)
select date_add('2021-06-08',-1);
3)next_day函数
(1)取当前天的下一个周一
select next_day('2021-06-08','MO');
说明:星期一到星期日的英文(Monday,Tuesday、Wednesday、Thursday、Friday、Saturday、Sunday)
(2)取当前周的周一
select date_add(next_day('2021-06-08','MO'),-7);
4)last_day函数(求当月最后一天日期)
select last_day('2021-06-08');
7.1.3 复杂数据类型定义
1)map结构数据定义
map<string,string>
2)array结构数据定义
array<string>
3)struct结构数据定义
struct<id:int,name:string,age:int>
4)struct和array嵌套定义
array<struct<id:int,name:string,age:int>>
7.2 DWS层
7.2.1 访客主题
1)建表语句
drop table if exists dws_visitor_action_daycount;
create external table dws_visitor_action_daycount(
--dwd_page_log
`mid_id` string comment '设备id',
`brand` string comment '设备品牌',
`model` string comment '设备型号',
`is_new` string comment '是否首次访问', -- 将这个设备一天中所有访问记录的 is_new字段进行聚合,只要一天中有一次为0,即为0,否则即为1
`channel` array<string> comment '渠道',
`os` array<string> comment '操作系统',
`area_code` array<string> comment '地区ID',
`version_code` array<string> comment '应用版本',
`visit_count` bigint comment '访问次数',
`page_stats` array<struct<page_id: string,page_count: bigint,during_time: bigint>> comment '页面访问统计'
) comment '每日设备行为表'
partitioned by (`dt` string)
stored as parquet
location '/warehouse/gmall/dws/dws_visitor_action_daycount'
tblproperties ("parquet.compression"="lzo");
2)数据装载(首日/每日一样,改个日期就行)
insert overwrite table dws_visitor_action_daycount partition (dt = '2021-06-08')
select t1.mid_id, --设备id
brand, --设备品牌
model, --设备型号
is_new, --是否首次访问
channel, --渠道
os, --操作系统
area_code, --地区ID
version_code, --应用版本
visit_count, --访问次数
page_stats --每个设备访问所有页面的统计信息聚合的结果(数组里装载的是一个结构体)
from
(
select mid_id, --设备id
concat_ws('|',collect_set(brand)) brand, --设备品牌
concat_ws('|',collect_set(model)) model, --设备型号
`if`(array_contains(collect_set(is_new),'0'),'0','1') is_new, --是否首次访问
collect_set(channel) channel, --渠道
collect_set(os) os, --操作系统
collect_set(area_code) area_code, --地区ID
collect_set(version_code) version_code, --应用版本
sum(`if`(last_page_id is null,1,0)) visit_count --访问次数
from dwd_page_log
where dt = '2021-06-08'
group by mid_id
) t1
join
(
--在当天中,每个设备访问所有页面的统计信息聚合的结果
select mid_id,collect_list(page_struct) page_stats
from
(
--在当天中,每个设备访问每个页面的统计信息
select mid_id,named_struct('page_id',page_id,'page_count',count(*),'during_time',sum(during_time)) page_struct
from dwd_page_log
where dt = '2021-06-08'
group by mid_id,page_id
) tmp
group by mid_id
) t2
on t1.mid_id = t2.mid_id;
3)查询加载结果
7.2.2 用户主题
1)建表语句
drop table if exists dws_user_action_daycount;
create external table dws_user_action_daycount(
`user_id` string comment '用户id',
-- 从dwd_page_log
`login_count` string comment '登录次数',
-- dwd_action_log
`cart_count` bigint comment '加入购物车次数',
`favor_count` bigint comment '收藏次数',
-- dwd_order_info or dwd_order_detail
`order_count` bigint comment '下单次数',
`order_activity_count` bigint comment '订单参与活动次数',
`order_activity_reduce_amount` decimal(16,2) comment '订单减免金额(活动)',
`order_coupon_count` bigint comment '订单用券次数',
`order_coupon_reduce_amount` decimal(16,2) comment '订单减免金额(优惠券)',
`order_original_amount` decimal(16,2) comment '订单单原始金额',
`order_final_amount` decimal(16,2) comment '订单总金额',
-- dwd_payment_info
`payment_count` bigint comment '支付次数',
`payment_amount` decimal(16,2) comment '支付金额',
-- dwd_order_refund_info
`refund_order_count` bigint comment '退单次数',
`refund_order_num` bigint comment '退单件数',
`refund_order_amount` decimal(16,2) comment '退单金额',
-- dwd_refund_payment
`refund_payment_count` bigint comment '退款次数',
-- dwd_refund_payment 关联 dwd_order_refund_info
`refund_payment_num` bigint comment '退款件数',
`refund_payment_amount` decimal(16,2) comment '退款金额',
-- dwd_coupon_use
`coupon_get_count` bigint comment '优惠券领取次数',
`coupon_using_count` bigint comment '优惠券使用(下单)次数',
`coupon_used_count` bigint comment '优惠券使用(支付)次数',
-- dwd_comment_info
`appraise_good_count` bigint comment '好评数',
`appraise_mid_count` bigint comment '中评数',
`appraise_bad_count` bigint comment '差评数',
`appraise_default_count` bigint comment '默认评价数',
-- dwd_order_detail
`order_detail_stats` array<struct<sku_id:string,sku_num:bigint,order_count:bigint,activity_reduce_amount:decimal(16,2),coupon_reduce_amount:decimal(16,2),original_amount:decimal(16,2),final_amount:decimal(16,2)>> comment '下单明细统计'
) comment '每日用户行为'
partitioned by (`dt` string)
stored as parquet
location '/warehouse/gmall/dws/dws_user_action_daycount/'
tblproperties ("parquet.compression"="lzo");
2)数据装载
(1)首日装载
set hive.exec.dynamic.partition.mode=nonstrict; insert overwrite table dws_user_action_daycount partition (dt) select user_id, sum(login_count) , sum(cart_count), sum(favor_count), sum(order_count), sum(order_activity_count), sum(order_activity_reduce_amount), sum(order_coupon_count), sum(order_coupon_reduce_amount), sum(order_original_amount), sum(order_final_amount), sum(payment_count), sum(payment_amount), sum(refund_order_count), sum(refund_order_num), sum(refund_order_amount), sum(refund_payment_count), sum(refund_payment_num), sum(refund_payment_amount), sum(coupon_get_count), sum(coupon_using_count), sum(coupon_used_count), sum(appraise_good_count), sum(appraise_mid_count), sum(appraise_bad_count), sum(appraise_default_count), max(if( order_detail_stats[0].sku_id = '-1' , null ,order_detail_stats )) order_detail_stats, dt from ( select user_id, dt, 0 login_count, 0 cart_count, 0 favor_count, 0 order_count, 0 order_activity_count, 0 order_activity_reduce_amount, 0 order_coupon_count, 0 order_coupon_reduce_amount, 0 order_original_amount, 0 order_final_amount, 0 payment_count, 0 payment_amount, 0 refund_order_count, 0 refund_order_num, 0 refund_order_amount, 0 refund_payment_count, 0 refund_payment_num, 0 refund_payment_amount, 0 coupon_get_count, 0 coupon_using_count, 0 coupon_used_count, 0 appraise_good_count, 0 appraise_mid_count, 0 appraise_bad_count, 0 appraise_default_count, collect_list(sku_struct) order_detail_stats from ( select user_id, dt, named_struct('sku_id',sku_id, --skuid 'sku_num',sum(sku_num), --商品数量 'order_count',count(*), --下单次数 'activity_reduce_amount',cast (sum(split_activity_amount) as decimal(16,2)), --活动优惠分摊 'coupon_reduce_amount',cast(sum(split_coupon_amount) as decimal(16,2)), --优惠券优惠分摊 'original_amount',cast(sum(original_amount) as decimal(16,2)), --原始价格 'final_amount',cast(sum(split_final_amount) as decimal(16,2)) --最终价格分摊 ) sku_struct --结构体 from dwd_order_detail where dt <='2021-06-08' group by user_id,dt,sku_id ) tmp group by user_id,dt union all select user_id, dt, sum(`if`(last_page_id is not null,1,0)) login_count, --登录次数 0 cart_count, 0 favor_count, 0 order_count, 0 order_activity_count, 0 order_activity_reduce_amount, 0 order_coupon_count, 0 order_coupon_reduce_amount, 0 order_original_amount, 0 order_final_amount, 0 payment_count, 0 payment_amount, 0 refund_order_count, 0 refund_order_num, 0 refund_order_amount, 0 refund_payment_count, 0 refund_payment_num, 0 refund_payment_amount, 0 coupon_get_count, 0 coupon_using_count, 0 coupon_used_count, 0 appraise_good_count, 0 appraise_mid_count, 0 appraise_bad_count, 0 appraise_default_count, `array`(named_struct('sku_id','-1', --skuid 'sku_num',cast(0 as bigint), --商品数量 'order_count',cast(0 as bigint), --下单次数 'activity_reduce_amount',cast(0.0 as decimal(16,2)), --活动优惠分摊 'coupon_reduce_amount',cast(0.0 as decimal(16,2)), --优惠券优惠分摊 'original_amount',cast(0.0 as decimal(16,2)), --原始价格 'final_amount',cast(0.0 as decimal(16,2)) --最终价格分摊 )) order_detail_stats from dwd_page_log where dt = '2021-06-08' and user_id is not null group by user_id,dt union all select user_id, dt, 0 login_count, sum(`if`(action_id = 'cart_add',1,0)) cart_count, --加入购物车次数 sum(`if`(action_id = 'favor_add',1,0)) favor_count, --收藏次数 0 order_count, 0 order_activity_count, 0 order_activity_reduce_amount, 0 order_coupon_count, 0 order_coupon_reduce_amount, 0 order_original_amount, 0 order_final_amount, 0 payment_count, 0 payment_amount, 0 refund_order_count, 0 refund_order_num, 0 refund_order_amount, 0 refund_payment_count, 0 refund_payment_num, 0 refund_payment_amount, 0 coupon_get_count, 0 coupon_using_count, 0 coupon_used_count, 0 appraise_good_count, 0 appraise_mid_count, 0 appraise_bad_count, 0 appraise_default_count, `array`(named_struct('sku_id','-1', --skuid 'sku_num',cast(0 as bigint), --商品数量 'order_count',cast(0 as bigint), --下单次数 'activity_reduce_amount',cast(0.0 as decimal(16,2)), --活动优惠分摊 'coupon_reduce_amount',cast(0.0 as decimal(16,2)), --优惠券优惠分摊 'original_amount',cast(0.0 as decimal(16,2)), --原始价格 'final_amount',cast(0.0 as decimal(16,2)) --最终价格分摊 )) order_detail_stats from dwd_action_log where dt = '2021-06-08' and user_id is not null and action_id in ('cart_add','favor_add') group by user_id,dt union all select user_id, dt, 0 login_count, 0 cart_count, 0 favor_count, count(distinct order_id) order_count, --下单次数 count(activity_rule_id) order_activity_count, --订单参与活动次数 sum(split_activity_amount) order_activity_reduce_amount, --订单减免金额(活动) count(coupon_id) order_coupon_count, --订单用券次数 sum(split_coupon_amount) order_coupon_reduce_amount, --订单减免金额(优惠券) sum(original_amount) order_original_amount, --订单单原始金额 sum(split_final_amount) order_final_amount, --订单总金额 0 payment_count, 0 payment_amount, 0 refund_order_count, 0 refund_order_num, 0 refund_order_amount, 0 refund_payment_count, 0 refund_payment_num, 0 refund_payment_amount, 0 coupon_get_count, 0 coupon_using_count, 0 coupon_used_count, 0 appraise_good_count, 0 appraise_mid_count, 0 appraise_bad_count, 0 appraise_default_count, `array`(named_struct('sku_id','-1', --skuid 'sku_num',cast(0 as bigint), --商品数量 'order_count',cast(0 as bigint), --下单次数 'activity_reduce_amount',cast(0.0 as decimal(16,2)), --活动优惠分摊 'coupon_reduce_amount',cast(0.0 as decimal(16,2)), --优惠券优惠分摊 'original_amount',cast(0.0 as decimal(16,2)), --原始价格 'final_amount',cast(0.0 as decimal(16,2)) --最终价格分摊 )) order_detail_stats from dwd_order_detail where dt <= '2021-06-08' group by user_id,dt union all select user_id, dt, 0 login_count, 0 cart_count, 0 favor_count, 0 order_count, 0 order_activity_count, 0 order_activity_reduce_amount, 0 order_coupon_count, 0 order_coupon_reduce_amount, 0 order_original_amount, 0 order_final_amount, count(*) payment_count, --支付次数 sum(payment_amount) payment_amount, --支付金额 0 refund_order_count, 0 refund_order_num, 0 refund_order_amount, 0 refund_payment_count, 0 refund_payment_num, 0 refund_payment_amount, 0 coupon_get_count, 0 coupon_using_count, 0 coupon_used_count, 0 appraise_good_count, 0 appraise_mid_count, 0 appraise_bad_count, 0 appraise_default_count, `array`(named_struct('sku_id','-1', --skuid 'sku_num',cast(0 as bigint), --商品数量 'order_count',cast(0 as bigint), --下单次数 'activity_reduce_amount',cast(0.0 as decimal(16,2)), --活动优惠分摊 'coupon_reduce_amount',cast(0.0 as decimal(16,2)), --优惠券优惠分摊 'original_amount',cast(0.0 as decimal(16,2)), --原始价格 'final_amount',cast(0.0 as decimal(16,2)) --最终价格分摊 )) order_detail_stats from dwd_payment_info where dt <= '2021-06-08' group by user_id,dt union all -- 退单次数: 用户申请退回的一笔订单的一种商品,算一次 select user_id, dt, 0 login_count, 0 cart_count, 0 favor_count, 0 order_count, 0 order_activity_count, 0 order_activity_reduce_amount, 0 order_coupon_count, 0 order_coupon_reduce_amount, 0 order_original_amount, 0 order_final_amount, 0 payment_count, 0 payment_amount, count(*) refund_order_count, --退单次数 sum(refund_num) refund_order_num, --退单件数 sum(refund_amount) refund_order_amount, --退单金额 0 refund_payment_count, 0 refund_payment_num, 0 refund_payment_amount, 0 coupon_get_count, 0 coupon_using_count, 0 coupon_used_count, 0 appraise_good_count, 0 appraise_mid_count, 0 appraise_bad_count, 0 appraise_default_count, `array`(named_struct('sku_id','-1', --skuid 'sku_num',cast(0 as bigint), --商品数量 'order_count',cast(0 as bigint), --下单次数 'activity_reduce_amount',cast(0.0 as decimal(16,2)), --活动优惠分摊 'coupon_reduce_amount',cast(0.0 as decimal(16,2)), --优惠券优惠分摊 'original_amount',cast(0.0 as decimal(16,2)), --原始价格 'final_amount',cast(0.0 as decimal(16,2)) --最终价格分摊 )) order_detail_stats from dwd_order_refund_info where dt <= '2021-06-08' group by user_id,dt union all select user_id, dt, 0 login_count, 0 cart_count, 0 favor_count, 0 order_count, 0 order_activity_count, 0 order_activity_reduce_amount, 0 order_coupon_count, 0 order_coupon_reduce_amount, 0 order_original_amount, 0 order_final_amount, 0 payment_count, 0 payment_amount, 0 refund_order_count, 0 refund_order_num, 0 refund_order_amount, count(*) refund_payment_count, --退款次数 sum(refund_num) refund_payment_num, --退款件数 sum(refund_amount) refund_payment_amount, --退款金额 0 coupon_get_count, 0 coupon_using_count, 0 coupon_used_count, 0 appraise_good_count, 0 appraise_mid_count, 0 appraise_bad_count, 0 appraise_default_count, `array`(named_struct('sku_id','-1', --skuid 'sku_num',cast(0 as bigint), --商品数量 'order_count',cast(0 as bigint), --下单次数 'activity_reduce_amount',cast(0.0 as decimal(16,2)), --活动优惠分摊 'coupon_reduce_amount',cast(0.0 as decimal(16,2)), --优惠券优惠分摊 'original_amount',cast(0.0 as decimal(16,2)), --原始价格 'final_amount',cast(0.0 as decimal(16,2)) --最终价格分摊 )) order_detail_stats from ( select user_id, dt, refund_amount, --退款金额 sku_id, order_id from dwd_refund_payment where dt <= '2021-06-08' ) t1 left join ( select refund_num, --退款件数 sku_id, order_id from dwd_order_refund_info where dt <= '2021-06-08' ) t2 on t1.order_id = t2.order_id and t1.sku_id = t2.sku_id group by user_id,dt union all select user_id, dt, 0 login_count, 0 cart_count, 0 favor_count, 0 order_count, 0 order_activity_count, 0 order_activity_reduce_amount, 0 order_coupon_count, 0 order_coupon_reduce_amount, 0 order_original_amount, 0 order_final_amount, 0 payment_count, 0 payment_amount, 0 refund_order_count, 0 refund_order_num, 0 refund_order_amount, 0 refund_payment_count, 0 refund_payment_num, 0 refund_payment_amount, sum(coupon_get_count) coupon_get_count, --优惠券领取次数 sum(coupon_using_count) coupon_using_count, --优惠券使用(下单)次数 sum(coupon_used_count) coupon_used_count, --优惠券使用(支付)次数 0 appraise_good_count, 0 appraise_mid_count, 0 appraise_bad_count, 0 appraise_default_count, `array`(named_struct('sku_id','-1', --skuid 'sku_num',cast(0 as bigint), --商品数量 'order_count',cast(0 as bigint), --下单次数 'activity_reduce_amount',cast(0.0 as decimal(16,2)), --活动优惠分摊 'coupon_reduce_amount',cast(0.0 as decimal(16,2)), --优惠券优惠分摊 'original_amount',cast(0.0 as decimal(16,2)), --原始价格 'final_amount',cast(0.0 as decimal(16,2)) --最终价格分摊 )) order_detail_stats from ( select user_id, date_format(get_time,'yyyy-MM-dd') dt, --优惠券领取时间 count(*) coupon_get_count, --优惠券领取次数 0 coupon_using_count, --优惠券使用(下单)次数 0 coupon_used_count --优惠券使用(支付)次数 from dwd_coupon_use where dt <= '2021-06-08' or dt='9999-99-99' --dt: 优惠券的完成时间 group by user_id,date_format(get_time,'yyyy-MM-dd') union all select user_id, date_format(using_time,'yyyy-MM-dd') dt, 0 coupon_get_count, --优惠券领取次数 count(*) coupon_using_count, --优惠券使用(下单)次数 0 coupon_used_count --优惠券使用(支付)次数 from dwd_coupon_use where dt <= '2021-06-08' or dt='9999-99-99' and using_time is not null group by user_id,date_format(using_time,'yyyy-MM-dd') union all select user_id, date_format(used_time,'yyyy-MM-dd') dt, 0 coupon_get_count, --优惠券领取次数 0 coupon_using_count, --优惠券使用(下单)次数 count(*) coupon_used_count --优惠券使用(支付)次数 from dwd_coupon_use where dt <= '2021-06-08' and used_time is not null group by user_id, date_format(used_time,'yyyy-MM-dd') ) tmp2 group by user_id,dt union all select user_id, dt, 0 login_count, 0 cart_count, 0 favor_count, 0 order_count, 0 order_activity_count, 0 order_activity_reduce_amount, 0 order_coupon_count, 0 order_coupon_reduce_amount, 0 order_original_amount, 0 order_final_amount, 0 payment_count, 0 payment_amount, 0 refund_order_count, 0 refund_order_num, 0 refund_order_amount, 0 refund_payment_count, 0 refund_payment_num, 0 refund_payment_amount, 0 coupon_get_count, 0 coupon_using_count, 0 coupon_used_count, sum(`if`(appraise = '1201',1,0)) appraise_good_count, --好评数 sum(`if`(appraise = '1202',1,0)) appraise_mid_count, --中评数 sum(`if`(appraise = '1203',1,0)) appraise_bad_count, --差评数 sum(`if`(appraise = '1204',1,0)) appraise_default_count, --默认评价数 `array`(named_struct('sku_id','-1', --skuid 'sku_num',cast(0 as bigint), --商品数量 'order_count',cast(0 as bigint), --下单次数 'activity_reduce_amount',cast(0.0 as decimal(16,2)), --活动优惠分摊 'coupon_reduce_amount',cast(0.0 as decimal(16,2)), --优惠券优惠分摊 'original_amount',cast(0.0 as decimal(16,2)), --原始价格 'final_amount',cast(0.0 as decimal(16,2)) --最终价格分摊 )) order_detail_stats from dwd_comment_info where dt <= '2021-06-08' group by user_id,dt ) tmp3 group by user_id,dt;
(2)每日装载
insert overwrite table dws_user_action_daycount partition (dt='2021-06-09') select user_id, sum(login_count) , sum(cart_count), sum(favor_count), sum(order_count), sum(order_activity_count), sum(order_activity_reduce_amount), sum(order_coupon_count), sum(order_coupon_reduce_amount), sum(order_original_amount), sum(order_final_amount), sum(payment_count), sum(payment_amount), sum(refund_order_count), sum(refund_order_num), sum(refund_order_amount), sum(refund_payment_count), sum(refund_payment_num), sum(refund_payment_amount), sum(coupon_get_count), sum(coupon_using_count), sum(coupon_used_count), sum(appraise_good_count), sum(appraise_mid_count), sum(appraise_bad_count), sum(appraise_default_count), max(if( order_detail_stats[0].sku_id = '-1' , null ,order_detail_stats )) order_detail_stats from ( select user_id, dt, 0 login_count, 0 cart_count, 0 favor_count, 0 order_count, 0 order_activity_count, 0 order_activity_reduce_amount, 0 order_coupon_count, 0 order_coupon_reduce_amount, 0 order_original_amount, 0 order_final_amount, 0 payment_count, 0 payment_amount, 0 refund_order_count, 0 refund_order_num, 0 refund_order_amount, 0 refund_payment_count, 0 refund_payment_num, 0 refund_payment_amount, 0 coupon_get_count, 0 coupon_using_count, 0 coupon_used_count, 0 appraise_good_count, 0 appraise_mid_count, 0 appraise_bad_count, 0 appraise_default_count, collect_list(sku_struct) order_detail_stats from ( select user_id, dt, named_struct('sku_id',sku_id, --skuid 'sku_num',sum(sku_num), --商品数量 'order_count',count(*), --下单次数 'activity_reduce_amount',cast (sum(split_activity_amount) as decimal(16,2)), --活动优惠分摊 'coupon_reduce_amount',cast(sum(split_coupon_amount) as decimal(16,2)), --优惠券优惠分摊 'original_amount',cast(sum(original_amount) as decimal(16,2)), --原始价格 'final_amount',cast(sum(split_final_amount) as decimal(16,2)) --最终价格分摊 ) sku_struct --结构体 from dwd_order_detail where dt <='2021-06-09' group by user_id,dt,sku_id ) tmp group by user_id,dt union all select user_id, dt, sum(`if`(last_page_id is not null,1,0)) login_count, --登录次数 0 cart_count, 0 favor_count, 0 order_count, 0 order_activity_count, 0 order_activity_reduce_amount, 0 order_coupon_count, 0 order_coupon_reduce_amount, 0 order_original_amount, 0 order_final_amount, 0 payment_count, 0 payment_amount, 0 refund_order_count, 0 refund_order_num, 0 refund_order_amount, 0 refund_payment_count, 0 refund_payment_num, 0 refund_payment_amount, 0 coupon_get_count, 0 coupon_using_count, 0 coupon_used_count, 0 appraise_good_count, 0 appraise_mid_count, 0 appraise_bad_count, 0 appraise_default_count, `array`(named_struct('sku_id','-1', --skuid 'sku_num',cast(0 as bigint), --商品数量 'order_count',cast(0 as bigint), --下单次数 'activity_reduce_amount',cast(0.0 as decimal(16,2)), --活动优惠分摊 'coupon_reduce_amount',cast(0.0 as decimal(16,2)), --优惠券优惠分摊 'original_amount',cast(0.0 as decimal(16,2)), --原始价格 'final_amount',cast(0.0 as decimal(16,2)) --最终价格分摊 )) order_detail_stats from dwd_page_log where dt = '2021-06-09' and user_id is not null group by user_id,dt union all select user_id, dt, 0 login_count, sum(`if`(action_id = 'cart_add',1,0)) cart_count, --加入购物车次数 sum(`if`(action_id = 'favor_add',1,0)) favor_count, --收藏次数 0 order_count, 0 order_activity_count, 0 order_activity_reduce_amount, 0 order_coupon_count, 0 order_coupon_reduce_amount, 0 order_original_amount, 0 order_final_amount, 0 payment_count, 0 payment_amount, 0 refund_order_count, 0 refund_order_num, 0 refund_order_amount, 0 refund_payment_count, 0 refund_payment_num, 0 refund_payment_amount, 0 coupon_get_count, 0 coupon_using_count, 0 coupon_used_count, 0 appraise_good_count, 0 appraise_mid_count, 0 appraise_bad_count, 0 appraise_default_count, `array`(named_struct('sku_id','-1', --skuid 'sku_num',cast(0 as bigint), --商品数量 'order_count',cast(0 as bigint), --下单次数 'activity_reduce_amount',cast(0.0 as decimal(16,2)), --活动优惠分摊 'coupon_reduce_amount',cast(0.0 as decimal(16,2)), --优惠券优惠分摊 'original_amount',cast(0.0 as decimal(16,2)), --原始价格 'final_amount',cast(0.0 as decimal(16,2)) --最终价格分摊 )) order_detail_stats from dwd_action_log where dt = '2021-06-09' and user_id is not null and action_id in ('cart_add','favor_add') group by user_id,dt union all select user_id, dt, 0 login_count, 0 cart_count, 0 favor_count, count(distinct order_id) order_count, --下单次数 count(activity_rule_id) order_activity_count, --订单参与活动次数 sum(split_activity_amount) order_activity_reduce_amount, --订单减免金额(活动) count(coupon_id) order_coupon_count, --订单用券次数 sum(split_coupon_amount) order_coupon_reduce_amount, --订单减免金额(优惠券) sum(original_amount) order_original_amount, --订单单原始金额 sum(split_final_amount) order_final_amount, --订单总金额 0 payment_count, 0 payment_amount, 0 refund_order_count, 0 refund_order_num, 0 refund_order_amount, 0 refund_payment_count, 0 refund_payment_num, 0 refund_payment_amount, 0 coupon_get_count, 0 coupon_using_count, 0 coupon_used_count, 0 appraise_good_count, 0 appraise_mid_count, 0 appraise_bad_count, 0 appraise_default_count, `array`(named_struct('sku_id','-1', --skuid 'sku_num',cast(0 as bigint), --商品数量 'order_count',cast(0 as bigint), --下单次数 'activity_reduce_amount',cast(0.0 as decimal(16,2)), --活动优惠分摊 'coupon_reduce_amount',cast(0.0 as decimal(16,2)), --优惠券优惠分摊 'original_amount',cast(0.0 as decimal(16,2)), --原始价格 'final_amount',cast(0.0 as decimal(16,2)) --最终价格分摊 )) order_detail_stats from dwd_order_detail where dt <= '2021-06-09' group by user_id,dt union all select user_id, dt, 0 login_count, 0 cart_count, 0 favor_count, 0 order_count, 0 order_activity_count, 0 order_activity_reduce_amount, 0 order_coupon_count, 0 order_coupon_reduce_amount, 0 order_original_amount, 0 order_final_amount, count(*) payment_count, --支付次数 sum(payment_amount) payment_amount, --支付金额 0 refund_order_count, 0 refund_order_num, 0 refund_order_amount, 0 refund_payment_count, 0 refund_payment_num, 0 refund_payment_amount, 0 coupon_get_count, 0 coupon_using_count, 0 coupon_used_count, 0 appraise_good_count, 0 appraise_mid_count, 0 appraise_bad_count, 0 appraise_default_count, `array`(named_struct('sku_id','-1', --skuid 'sku_num',cast(0 as bigint), --商品数量 'order_count',cast(0 as bigint), --下单次数 'activity_reduce_amount',cast(0.0 as decimal(16,2)), --活动优惠分摊 'coupon_reduce_amount',cast(0.0 as decimal(16,2)), --优惠券优惠分摊 'original_amount',cast(0.0 as decimal(16,2)), --原始价格 'final_amount',cast(0.0 as decimal(16,2)) --最终价格分摊 )) order_detail_stats from dwd_payment_info where dt <= '2021-06-09' group by user_id,dt union all -- 退单次数: 用户申请退回的一笔订单的一种商品,算一次 select user_id, dt, 0 login_count, 0 cart_count, 0 favor_count, 0 order_count, 0 order_activity_count, 0 order_activity_reduce_amount, 0 order_coupon_count, 0 order_coupon_reduce_amount, 0 order_original_amount, 0 order_final_amount, 0 payment_count, 0 payment_amount, count(*) refund_order_count, --退单次数 sum(refund_num) refund_order_num, --退单件数 sum(refund_amount) refund_order_amount, --退单金额 0 refund_payment_count, 0 refund_payment_num, 0 refund_payment_amount, 0 coupon_get_count, 0 coupon_using_count, 0 coupon_used_count, 0 appraise_good_count, 0 appraise_mid_count, 0 appraise_bad_count, 0 appraise_default_count, `array`(named_struct('sku_id','-1', --skuid 'sku_num',cast(0 as bigint), --商品数量 'order_count',cast(0 as bigint), --下单次数 'activity_reduce_amount',cast(0.0 as decimal(16,2)), --活动优惠分摊 'coupon_reduce_amount',cast(0.0 as decimal(16,2)), --优惠券优惠分摊 'original_amount',cast(0.0 as decimal(16,2)), --原始价格 'final_amount',cast(0.0 as decimal(16,2)) --最终价格分摊 )) order_detail_stats from dwd_order_refund_info where dt <= '2021-06-09' group by user_id,dt union all select user_id, dt, 0 login_count, 0 cart_count, 0 favor_count, 0 order_count, 0 order_activity_count, 0 order_activity_reduce_amount, 0 order_coupon_count, 0 order_coupon_reduce_amount, 0 order_original_amount, 0 order_final_amount, 0 payment_count, 0 payment_amount, 0 refund_order_count, 0 refund_order_num, 0 refund_order_amount, count(*) refund_payment_count, --退款次数 sum(refund_num) refund_payment_num, --退款件数 sum(refund_amount) refund_payment_amount, --退款金额 0 coupon_get_count, 0 coupon_using_count, 0 coupon_used_count, 0 appraise_good_count, 0 appraise_mid_count, 0 appraise_bad_count, 0 appraise_default_count, `array`(named_struct('sku_id','-1', --skuid 'sku_num',cast(0 as bigint), --商品数量 'order_count',cast(0 as bigint), --下单次数 'activity_reduce_amount',cast(0.0 as decimal(16,2)), --活动优惠分摊 'coupon_reduce_amount',cast(0.0 as decimal(16,2)), --优惠券优惠分摊 'original_amount',cast(0.0 as decimal(16,2)), --原始价格 'final_amount',cast(0.0 as decimal(16,2)) --最终价格分摊 )) order_detail_stats from ( select user_id, dt, refund_amount, --退款金额 sku_id, order_id from dwd_refund_payment where dt <= '2021-06-09' ) t1 left join ( select refund_num, --退款件数 sku_id, order_id from dwd_order_refund_info where dt > date_sub('2021-06-09',15) ) t2 on t1.order_id = t2.order_id and t1.sku_id = t2.sku_id group by user_id,dt union all select user_id, dt, 0 login_count, 0 cart_count, 0 favor_count, 0 order_count, 0 order_activity_count, 0 order_activity_reduce_amount, 0 order_coupon_count, 0 order_coupon_reduce_amount, 0 order_original_amount, 0 order_final_amount, 0 payment_count, 0 payment_amount, 0 refund_order_count, 0 refund_order_num, 0 refund_order_amount, 0 refund_payment_count, 0 refund_payment_num, 0 refund_payment_amount, count(*) coupon_get_count, --优惠券领取次数 sum(if(date_format(using_time,'yyyy-MM-dd') = '2021-06-09',1,0 )) coupon_using_count, --优惠券使用(下单)次数 sum(if(date_format(used_time,'yyyy-MM-dd') = '2021-06-09',1,0 )) coupon_used_count, --优惠券使用(支付)次数 0 appraise_good_count, 0 appraise_mid_count, 0 appraise_bad_count, 0 appraise_default_count, `array`(named_struct('sku_id','-1', --skuid 'sku_num',cast(0 as bigint), --商品数量 'order_count',cast(0 as bigint), --下单次数 'activity_reduce_amount',cast(0.0 as decimal(16,2)), --活动优惠分摊 'coupon_reduce_amount',cast(0.0 as decimal(16,2)), --优惠券优惠分摊 'original_amount',cast(0.0 as decimal(16,2)), --原始价格 'final_amount',cast(0.0 as decimal(16,2)) --最终价格分摊 )) order_detail_stats from dwd_coupon_use where (dt = '2021-06-09' or dt='9999-99-99') and date_format(get_time,'yyyy-MM-dd')='2021-06-09' group by user_id,dt union all select user_id, dt, 0 login_count, 0 cart_count, 0 favor_count, 0 order_count, 0 order_activity_count, 0 order_activity_reduce_amount, 0 order_coupon_count, 0 order_coupon_reduce_amount, 0 order_original_amount, 0 order_final_amount, 0 payment_count, 0 payment_amount, 0 refund_order_count, 0 refund_order_num, 0 refund_order_amount, 0 refund_payment_count, 0 refund_payment_num, 0 refund_payment_amount, 0 coupon_get_count, 0 coupon_using_count, 0 coupon_used_count, sum(`if`(appraise = '1201',1,0)) appraise_good_count, --好评数 sum(`if`(appraise = '1202',1,0)) appraise_mid_count, --中评数 sum(`if`(appraise = '1203',1,0)) appraise_bad_count, --差评数 sum(`if`(appraise = '1204',1,0)) appraise_default_count, --默认评价数 `array`(named_struct('sku_id','-1', --skuid 'sku_num',cast(0 as bigint), --商品数量 'order_count',cast(0 as bigint), --下单次数 'activity_reduce_amount',cast(0.0 as decimal(16,2)), --活动优惠分摊 'coupon_reduce_amount',cast(0.0 as decimal(16,2)), --优惠券优惠分摊 'original_amount',cast(0.0 as decimal(16,2)), --原始价格 'final_amount',cast(0.0 as decimal(16,2)) --最终价格分摊 )) order_detail_stats from dwd_comment_info where dt <= '2021-06-09' group by user_id,dt ) tmp3 group by user_id,dt;
3)查询加载结果
7.2.3 商品主题
1)建表语句
drop table if exists dws_sku_action_daycount; create external table dws_sku_action_daycount( `sku_id` string comment 'sku_id', --dwd_order_detail `order_count` bigint comment '被下单次数', `order_num` bigint comment '被下单件数', `order_activity_count` bigint comment '参与活动被下单次数', `order_coupon_count` bigint comment '使用优惠券被下单次数', `order_activity_reduce_amount` decimal(16,2) comment '优惠金额(活动)', `order_coupon_reduce_amount` decimal(16,2) comment '优惠金额(优惠券)', `order_original_amount` decimal(16,2) comment '被下单原价金额', `order_final_amount` decimal(16,2) comment '被下单最终金额', --dwd_payment_info join dwd_order_detail `payment_count` bigint comment '被支付次数', `payment_num` bigint comment '被支付件数', `payment_amount` decimal(16,2) comment '被支付金额', --dwd_order_refund_info `refund_order_count` bigint comment '被退单次数', `refund_order_num` bigint comment '被退单件数', `refund_order_amount` decimal(16,2) comment '被退单金额', --dwd_refund_payment `refund_payment_count` bigint comment '被退款次数', `refund_payment_num` bigint comment '被退款件数', `refund_payment_amount` decimal(16,2) comment '被退款金额', --dwd_action_log `cart_count` bigint comment '被加入购物车次数', `favor_count` bigint comment '被收藏次数', --dwd_comment_info `appraise_good_count` bigint comment '好评数', `appraise_mid_count` bigint comment '中评数', `appraise_bad_count` bigint comment '差评数', `appraise_default_count` bigint comment '默认评价数' ) comment '每日商品行为' partitioned by (`dt` string) stored as parquet location '/warehouse/gmall/dws/dws_sku_action_daycount/' tblproperties ("parquet.compression"="lzo");
2)数据装载
(1)首日装载
set hive.exec.dynamic.partition.mode=nonstrict; --开启动态分区 insert overwrite table dws_sku_action_daycount partition (dt) select sku_id, sum(order_count), sum(order_num), sum(order_activity_count), sum(order_coupon_count), sum(order_activity_reduce_amount), sum(order_coupon_reduce_amount), sum(order_original_amount), sum(order_final_amount), sum(payment_count), sum(payment_num), sum(payment_amount), sum(refund_order_count), sum(refund_order_num), sum(refund_order_amount), sum(refund_payment_count), sum(refund_payment_num), sum(refund_payment_amount), sum(cart_count), sum(favor_count), sum(appraise_good_count), sum(appraise_mid_count), sum(appraise_bad_count), sum(appraise_default_count), dt from ( select sku_id, dt, count(*) order_count, --被下单次数 sum(sku_num) order_num, --被下单件数 count(activity_rule_id) order_activity_count, --参与活动被下单次数 count(coupon_id) order_coupon_count, --使用优惠券被下单次数 sum(split_activity_amount) order_activity_reduce_amount, --优惠金额(活动) sum(split_coupon_amount) order_coupon_reduce_amount, --优惠金额(优惠券) sum(original_amount) order_original_amount, --被下单原价金额 sum(split_final_amount) order_final_amount, --被下单最终金额 0 payment_count, 0 payment_num, 0 payment_amount, 0 refund_order_count, 0 refund_order_num, 0 refund_order_amount, 0 refund_payment_count, 0 refund_payment_num, 0 refund_payment_amount, 0 cart_count, 0 favor_count, 0 appraise_good_count, 0 appraise_mid_count, 0 appraise_bad_count, 0 appraise_default_count from dwd_order_detail where dt <= '2021-06-08' group by sku_id,dt union all select sku_id, dt, 0 order_count, 0 order_num, 0 order_activity_count, 0 order_coupon_count, 0 order_activity_reduce_amount, 0 order_coupon_reduce_amount, 0 order_original_amount, 0 order_final_amount, count(*) payment_count, --被支付次数 sum(sku_num) payment_num, --被支付件数 sum(split_final_amount) payment_amount, --被支付金额 0 refund_order_count, 0 refund_order_num, 0 refund_order_amount, 0 refund_payment_count, 0 refund_payment_num, 0 refund_payment_amount, 0 cart_count, 0 favor_count, 0 appraise_good_count, 0 appraise_mid_count, 0 appraise_bad_count, 0 appraise_default_count from ( select sku_id, split_final_amount, --被下单最终金额 order_id, --订单id sku_num --商品数量 from dwd_order_detail where dt <= '2021-06-08' -- dt是下单日期,不是支付日期 ) t1 join ( select order_id, dt from dwd_payment_info where dt <= '2021-06-08' -- dt是支付日期 ) t2 on t1.order_id = t2.order_id group by sku_id,dt union all select sku_id, dt, 0 order_count, 0 order_num, 0 order_activity_count, 0 order_coupon_count, 0 order_activity_reduce_amount, 0 order_coupon_reduce_amount, 0 order_original_amount, 0 order_final_amount, 0 payment_count, 0 payment_num, 0 payment_amount, count(*) refund_order_count, --被退单次数 sum(refund_num) refund_order_num, --被退单件数 sum(refund_amount) refund_order_amount, --被退单金额 0 refund_payment_count, 0 refund_payment_num, 0 refund_payment_amount, 0 cart_count, 0 favor_count, 0 appraise_good_count, 0 appraise_mid_count, 0 appraise_bad_count, 0 appraise_default_count from dwd_order_refund_info where dt <= '2021-06-08' group by sku_id,dt union all select t1.sku_id, dt, 0 order_count, 0 order_num, 0 order_activity_count, 0 order_coupon_count, 0 order_activity_reduce_amount, 0 order_coupon_reduce_amount, 0 order_original_amount, 0 order_final_amount, 0 payment_count, 0 payment_num, 0 payment_amount, 0 refund_order_count, 0 refund_order_num, 0 refund_order_amount, count(*) refund_payment_count, --被退款次数 sum(refund_num) refund_payment_num, --被退款件数 sum(refund_amount) refund_payment_amount, --被退款金额 0 cart_count, 0 favor_count, 0 appraise_good_count, 0 appraise_mid_count, 0 appraise_bad_count, 0 appraise_default_count from ( select refund_amount, --退款金额 sku_id, order_id, dt from dwd_refund_payment --退款表 where dt <= '2021-06-08' ) t1 left join ( select refund_num, --退款件数 sku_id, order_id --退单表 from dwd_order_refund_info where dt <= '2021-06-08' ) t2 on t1.order_id = t2.order_id and t1.sku_id = t2.sku_id group by t1.sku_id,dt union all select item sku_id, dt, 0 order_count, 0 order_num, 0 order_activity_count, 0 order_coupon_count, 0 order_activity_reduce_amount, 0 order_coupon_reduce_amount, 0 order_original_amount, 0 order_final_amount, 0 payment_count, 0 payment_num, 0 payment_amount, 0 refund_order_count, 0 refund_order_num, 0 refund_order_amount, 0 refund_payment_count, 0 refund_payment_num, 0 refund_payment_amount, sum(`if`(action_id = 'cart_add',1,0)) cart_count, --被加入购物车次数 sum(`if`(action_id = 'favor_add',1,0)) favor_count, --被收藏次数 0 appraise_good_count, 0 appraise_mid_count, 0 appraise_bad_count, 0 appraise_default_count from dwd_action_log where dt <= '2021-06-08' and action_id in ('cart_add', 'favor_add') group by item,dt union all select sku_id, dt, 0 order_count, 0 order_num, 0 order_activity_count, 0 order_coupon_count, 0 order_activity_reduce_amount, 0 order_coupon_reduce_amount, 0 order_original_amount, 0 order_final_amount, 0 payment_count, 0 payment_num, 0 payment_amount, 0 refund_order_count, 0 refund_order_num, 0 refund_order_amount, 0 refund_payment_count, 0 refund_payment_num, 0 refund_payment_amount, 0 cart_count, 0 favor_count, sum(`if`(appraise = '1201',1,0)) appraise_good_count, --好评数 sum(`if`(appraise = '1202',1,0)) appraise_mid_count, --中评数 sum(`if`(appraise = '1203',1,0)) appraise_bad_count, --差评数 sum(`if`(appraise = '1204',1,0)) appraise_default_count --默认评价数 from dwd_comment_info where dt <= '2021-06-08' group by sku_id,dt ) tmp group by sku_id,dt;
(2)每日装载
set hive.exec.dynamic.partition.mode=nonstrict; insert overwrite table dws_sku_action_daycount partition (dt) select sku_id, sum(order_count), sum(order_num), sum(order_activity_count), sum(order_coupon_count), sum(order_activity_reduce_amount), sum(order_coupon_reduce_amount), sum(order_original_amount), sum(order_final_amount), sum(payment_count), sum(payment_num), sum(payment_amount), sum(refund_order_count), sum(refund_order_num), sum(refund_order_amount), sum(refund_payment_count), sum(refund_payment_num), sum(refund_payment_amount), sum(cart_count), sum(favor_count), sum(appraise_good_count), sum(appraise_mid_count), sum(appraise_bad_count), sum(appraise_default_count), dt from ( select sku_id, dt, count(*) order_count, --被下单次数 sum(sku_num) order_num, --被下单件数 count(activity_rule_id) order_activity_count, --参与活动被下单次数 count(coupon_id) order_coupon_count, --使用优惠券被下单次数 sum(split_activity_amount) order_activity_reduce_amount, --优惠金额(活动) sum(split_coupon_amount) order_coupon_reduce_amount, --优惠金额(优惠券) sum(original_amount) order_original_amount, --被下单原价金额 sum(split_final_amount) order_final_amount, --被下单最终金额 0 payment_count, 0 payment_num, 0 payment_amount, 0 refund_order_count, 0 refund_order_num, 0 refund_order_amount, 0 refund_payment_count, 0 refund_payment_num, 0 refund_payment_amount, 0 cart_count, 0 favor_count, 0 appraise_good_count, 0 appraise_mid_count, 0 appraise_bad_count, 0 appraise_default_count from dwd_order_detail where dt = '2021-06-09' group by sku_id,dt union all select sku_id, dt, 0 order_count, 0 order_num, 0 order_activity_count, 0 order_coupon_count, 0 order_activity_reduce_amount, 0 order_coupon_reduce_amount, 0 order_original_amount, 0 order_final_amount, count(*) payment_count, --被支付次数 sum(sku_num) payment_num, --被支付件数 sum(split_final_amount) payment_amount, --被支付金额 0 refund_order_count, 0 refund_order_num, 0 refund_order_amount, 0 refund_payment_count, 0 refund_payment_num, 0 refund_payment_amount, 0 cart_count, 0 favor_count, 0 appraise_good_count, 0 appraise_mid_count, 0 appraise_bad_count, 0 appraise_default_count from ( select sku_id, split_final_amount, --被下单最终金额 order_id, --订单id sku_num --商品数量 from dwd_order_detail --订单事实表 -- 跨天支付的问题: 2021-06-09日支付的订单,可能是 2021-06-09日下的单,也可能是 2021-06-08日下的单 where dt = '2021-06-09' or dt = date_sub('2021-06-09',1) -- dt是下单日期,不是支付日期 ) t1 join ( select order_id, dt from dwd_payment_info --支付事实表 where dt = '2021-06-09' -- dt是支付日期 ) t2 on t1.order_id = t2.order_id group by sku_id,dt union all select sku_id, dt, 0 order_count, 0 order_num, 0 order_activity_count, 0 order_coupon_count, 0 order_activity_reduce_amount, 0 order_coupon_reduce_amount, 0 order_original_amount, 0 order_final_amount, 0 payment_count, 0 payment_num, 0 payment_amount, count(*) refund_order_count, --被退单次数 sum(refund_num) refund_order_num, --被退单件数 sum(refund_amount) refund_order_amount, --被退单金额 0 refund_payment_count, 0 refund_payment_num, 0 refund_payment_amount, 0 cart_count, 0 favor_count, 0 appraise_good_count, 0 appraise_mid_count, 0 appraise_bad_count, 0 appraise_default_count from dwd_order_refund_info where dt = '2021-06-09' group by sku_id,dt union all select t1.sku_id, dt, 0 order_count, 0 order_num, 0 order_activity_count, 0 order_coupon_count, 0 order_activity_reduce_amount, 0 order_coupon_reduce_amount, 0 order_original_amount, 0 order_final_amount, 0 payment_count, 0 payment_num, 0 payment_amount, 0 refund_order_count, 0 refund_order_num, 0 refund_order_amount, count(*) refund_payment_count, --被退款次数 sum(refund_num) refund_payment_num, --被退款件数 sum(refund_amount) refund_payment_amount, --被退款金额 0 cart_count, 0 favor_count, 0 appraise_good_count, 0 appraise_mid_count, 0 appraise_bad_count, 0 appraise_default_count from ( select refund_amount, --退款金额 sku_id, order_id, dt from dwd_refund_payment --退款表:只统计退款完成的 where dt = '2021-06-09' ) t1 left join ( select refund_num, --退款件数 sku_id, order_id --退单表 from dwd_order_refund_info --事务型事实表 dt:退单的发起时间 where dt > date_sub('2021-06-09',15) ) t2 on t1.order_id = t2.order_id and t1.sku_id = t2.sku_id group by t1.sku_id,dt union all select item sku_id, dt, 0 order_count, 0 order_num, 0 order_activity_count, 0 order_coupon_count, 0 order_activity_reduce_amount, 0 order_coupon_reduce_amount, 0 order_original_amount, 0 order_final_amount, 0 payment_count, 0 payment_num, 0 payment_amount, 0 refund_order_count, 0 refund_order_num, 0 refund_order_amount, 0 refund_payment_count, 0 refund_payment_num, 0 refund_payment_amount, sum(`if`(action_id = 'cart_add',1,0)) cart_count, --被加入购物车次数 sum(`if`(action_id = 'favor_add',1,0)) favor_count, --被收藏次数 0 appraise_good_count, 0 appraise_mid_count, 0 appraise_bad_count, 0 appraise_default_count from dwd_action_log where dt = '2021-06-09' and action_id in ('cart_add', 'favor_add') group by item,dt union all select sku_id, dt, 0 order_count, 0 order_num, 0 order_activity_count, 0 order_coupon_count, 0 order_activity_reduce_amount, 0 order_coupon_reduce_amount, 0 order_original_amount, 0 order_final_amount, 0 payment_count, 0 payment_num, 0 payment_amount, 0 refund_order_count, 0 refund_order_num, 0 refund_order_amount, 0 refund_payment_count, 0 refund_payment_num, 0 refund_payment_amount, 0 cart_count, 0 favor_count, sum(`if`(appraise = '1201',1,0)) appraise_good_count, --好评数 sum(`if`(appraise = '1202',1,0)) appraise_mid_count, --中评数 sum(`if`(appraise = '1203',1,0)) appraise_bad_count, --差评数 sum(`if`(appraise = '1204',1,0)) appraise_default_count --默认评价数 from dwd_comment_info where dt = '2021-06-09' group by sku_id,dt ) tmp group by sku_id,dt;
3)查询加载结果
7.2.4 优惠券主题
1)建表语句
drop table if exists dws_coupon_info_daycount; --理论上产生行为的 coupon的各种指标(1,2,3) = 理论上产生行为的 coupon的各种指标(1,2,3) left join 实际上产生行为的 coupon的各种指标(1) create external table dws_coupon_info_daycount( `coupon_id` string comment '优惠券ID', -- dwd_coupon_use `get_count` bigint comment '领取次数', `order_count` bigint comment '使用(下单)次数', -- dwd_order_detail `order_reduce_amount` decimal(16,2) comment '使用某券的订单优惠金额', `order_original_amount` decimal(16,2) comment '使用某券的订单原价金额', `order_final_amount` decimal(16,2) comment '使用某券的订单总价金额', -- dwd_order_detail join dwd_payment_info 求出支付的订单的详请,统计用了哪种券 `payment_count` bigint comment '使用(支付)次数', `payment_reduce_amount` decimal(16,2) comment '使用某券的支付优惠金额', `payment_amount` decimal(16,2) comment '使用某券支付的总金额', `expire_count` bigint comment '过期次数' --expire_time为null,代表永久有效 ) comment '每日活动统计' partitioned by (`dt` string) stored as parquet location '/warehouse/gmall/dws/dws_coupon_info_daycount/' tblproperties ("parquet.compression"="lzo");
2)数据装载
(1)首日装载
insert overwrite table dws_coupon_info_daycount partition (dt) select t3.coupon_id, --优惠券id nvl(get_count,0), --领取次数 nvl(order_count,0), --使用(下单)次数 nvl(order_reduce_amount,0.0) , --使用某券的订单优惠金额 nvl(order_original_amount,0.0) , --使用某券的订单原价金额 nvl(order_final_amount,0.0) , --使用某券的订单总价金额 nvl(payment_count,0) , --使用(支付)次数 nvl(payment_reduce_amount,0.0) , --使用某券的支付优惠金额 nvl(payment_amount,0.0) , --使用某券支付的总金额 nvl(expire_count,0) , --过期次数:为null,代表永久有效 t3.dt from ( --理论上产生行为的 couponid和行为产生的日期 select id coupon_id, --优惠券id date_id dt from ( -- 求截至到2021-06-08日所有的优惠券 select id, --优惠券id start_time, --可以领取的开始日期 create_time, --创建时间 expire_time --过期时间 from dim_coupon_info where dt = '2021-06-08' ) t1 join ( --取商城app上线第一天 到 6月8日所有的日期 select date_id from dim_date_info where date_id between '2021-06-05' and '2021-06-08' ) t2 where date_id >= nvl(start_time,create_time) and date_id <= nvl(expire_time,'9999-99-99') ) t3 left join ( --实际上产生行为的 coupon的各种指标 select coupon_id, dt, sum(get_count) get_count, --领取次数 sum(order_count) order_count, --使用(下单)次数 sum(order_reduce_amount) order_reduce_amount, --使用某券的订单优惠金额 sum(order_original_amount) order_original_amount, --使用某券的订单原价金额 sum(order_final_amount) order_final_amount, --使用某券的订单总价金额 sum(payment_count) payment_count, --使用(支付)次数 sum(payment_reduce_amount) payment_reduce_amount, --使用某券的支付优惠金额 sum(payment_amount) payment_amount, --使用某券支付的总金额 sum(expire_count) expire_count --过期次数 from ( select coupon_id, dt, 0 get_count, 0 order_count, 0 expire_count, 0 payment_count, 0 order_reduce_amount, 0 order_original_amount, 0 order_final_amount, sum(split_coupon_amount) payment_reduce_amount, --使用某券的支付优惠金额 sum(split_final_amount) payment_amount --使用某券支付的总金额 from ( select split_final_amount, --最终价格分摊 order_id, --订单id split_coupon_amount, --优惠券优惠分摊 coupon_id --优惠券id from dwd_order_detail --订单明细事实表表 where dt <= '2021-06-08' and coupon_id is not null --dt是下单日期,不是支付日期 ) t1 join ( --求被支付的订单详请 select order_id, dt from dwd_payment_info --支付事实表表 where dt <= '2021-06-08' -- dt是支付日期 ) t2 on t1.order_id = t2.order_id group by coupon_id,dt union all select coupon_id, dt, 0 get_count, 0 order_count, 0 expire_count, 0 payment_count, sum(split_coupon_amount) order_reduce_amount, --使用某券的订单优惠金额 sum(original_amount) order_original_amount, --使用某券的订单原价金额 sum(split_final_amount) order_final_amount, --使用某券的订单总价金额 0 payment_reduce_amount, 0 payment_amount from dwd_order_detail where dt <= '2021-06-08' and coupon_id is not null --只取使用了优惠券的订单 group by coupon_id,dt union all select coupon_id, dt, sum(get_count) get_count, --领取次数 sum(order_count) order_count, --使用(下单)次数 sum(payment_count) payment_count, --使用(支付)次数 sum(expire_count) expire_count, ----过期次数 0 order_reduce_amount, 0 order_original_amount, 0 order_final_amount, 0 payment_reduce_amount, 0 payment_amount from ( --求每种券在每一天的领取次数 select coupon_id, date_format(get_time,'yyyy-MM-dd') dt, --优惠券领取时间 count(*) get_count, --领取次数 0 order_count, 0 payment_count, 0 expire_count from dwd_coupon_use --优惠券领用事实表 where dt <= '2021-06-08' or dt='9999-99-99' --dt: 优惠券的完成时间 group by coupon_id,date_format(get_time,'yyyy-MM-dd') union all --求每种券在每一天的下单次数 select coupon_id, date_format(using_time,'yyyy-MM-dd') dt, --使用时间(下单) 0 get_count, count(*) order_count, --使用(下单)次数 0 payment_count, 0 expire_count from dwd_coupon_use where dt <= '2021-06-08' or dt='9999-99-99' and using_time is not null group by coupon_id,date_format(using_time,'yyyy-MM-dd') union all --求每种券在每一天的支付次数 select coupon_id, date_format(used_time,'yyyy-MM-dd') dt, --使用时间(支付) 0 get_count, 0 order_count, count(*) payment_count, --使用(支付)次数 0 expire_count from dwd_coupon_use where dt <= '2021-06-08' or dt='9999-99-99' and used_time is not null --dt: 优惠券的完成时间 group by coupon_id,date_format(used_time,'yyyy-MM-dd') union all --求每种券在每一天的过期次数 select coupon_id, date_format(expire_time,'yyyy-MM-dd') dt, --过期时间 0 get_count, 0 order_count, 0 payment_count, count(*) expire_count --过期次数 from dwd_coupon_use where dt <= '2021-06-08' or dt='9999-99-99' and expire_time is not null group by coupon_id,date_format(expire_time,'yyyy-MM-dd') )tmp group by coupon_id,dt ) tmp2 group by coupon_id, dt ) t4 on t3.coupon_id = t4.coupon_id and t3.dt = t4.dt;
(2)每日装载
insert overwrite table dws_coupon_info_daycount partition (dt = '2021-06-09') select t3.coupon_id, --优惠券id nvl(get_count,0), --领取次数 nvl(order_count,0), --使用(下单)次数 nvl(order_reduce_amount,0.0) , --使用某券的订单优惠金额 nvl(order_original_amount,0.0) , --使用某券的订单原价金额 nvl(order_final_amount,0.0) , --使用某券的订单总价金额 nvl(payment_count,0) , --使用(支付)次数 nvl(payment_reduce_amount,0.0) , --使用某券的支付优惠金额 nvl(payment_amount,0.0) , --使用某券支付的总金额 nvl(expire_count,0) --过期次数:为null,代表永久有效 from ( --理论上产生行为的 couponid和行为产生的日期 select id coupon_id --优惠券id from dim_coupon_info where dt = '2021-06-09' and dt >= nvl(start_time,create_time) and dt <= nvl(expire_time,'9999-99-99') ) t3 left join ( --实际上产生行为的 coupon的各种指标 select coupon_id, sum(get_count) get_count, --领取次数 sum(order_count) order_count, --使用(下单)次数 sum(order_reduce_amount) order_reduce_amount, --使用某券的订单优惠金额 sum(order_original_amount) order_original_amount, --使用某券的订单原价金额 sum(order_final_amount) order_final_amount, --使用某券的订单总价金额 sum(payment_count) payment_count, --使用(支付)次数 sum(payment_reduce_amount) payment_reduce_amount, --使用某券的支付优惠金额 sum(payment_amount) payment_amount, --使用某券支付的总金额 sum(expire_count) expire_count --过期次数 from ( select coupon_id, 0 get_count, 0 order_count, 0 expire_count, 0 payment_count, 0 order_reduce_amount, 0 order_original_amount, 0 order_final_amount, sum(split_coupon_amount) payment_reduce_amount, --使用某券的支付优惠金额 sum(split_final_amount) payment_amount --使用某券支付的总金额 from dwd_order_detail where (dt = '2021-06-09' or dt = date_sub('2021-06-09',1)) and coupon_id is not null and order_id in ( select order_id from dwd_payment_info where dt = '2021-06-09' ) group by coupon_id union all select coupon_id, 0 get_count, 0 order_count, 0 expire_count, 0 payment_count, sum(split_coupon_amount) order_reduce_amount, --使用某券的订单优惠金额 sum(original_amount) order_original_amount, --使用某券的订单原价金额 sum(split_final_amount) order_final_amount, --使用某券的订单总价金额 0 payment_reduce_amount, 0 payment_amount from dwd_order_detail where dt = '2021-06-09' and coupon_id is not null --只取使用了优惠券的订单 group by coupon_id union all select coupon_id, sum(`if`(date_format(get_time,'yyyy-MM-dd') = '2021-06-09',1,0)) get_count, --领取次数 sum(`if`(date_format(using_time,'yyyy-MM-dd') = '2021-06-09',1,0)) order_count, --使用(下单)次数 sum(`if`(date_format(used_time,'yyyy-MM-dd') = '2021-06-09',1,0)) payment_count, --使用(支付)次数 sum(`if`(date_format(expire_time,'yyyy-MM-dd') = '2021-06-09',1,0)) expire_count, ----过期次数 0 order_reduce_amount, 0 order_original_amount, 0 order_final_amount, 0 payment_reduce_amount, 0 payment_amount from dwd_coupon_use where dt = '2021-06-09' or dt = '9999-99-99' group by coupon_id ) tmp2 group by coupon_id ) t4 on t3.coupon_id = t4.coupon_id;
3)查询加载结果
7.2.5 活动主题
1)建表语句
drop table if exists dws_activity_info_daycount; create external table dws_activity_info_daycount( `activity_rule_id` string comment '活动规则ID', `activity_id` string comment '活动ID', -- dwd_order_detail `order_count` bigint comment '参与某活动某规则下单次数', --注意:针对的是某个活动的某个具体规则 `order_reduce_amount` decimal(16,2) comment '参与某活动某规则下单减免金额', `order_original_amount` decimal(16,2) comment '参与某活动某规则下单原始金额', --只统计参与活动的订单明细,未参与活动的订单明细不统计 `order_final_amount` decimal(16,2) comment '参与某活动某规则下单最终金额', -- dwd_order_detail join dwd_payment_info `payment_count` bigint comment '参与某活动某规则支付次数', `payment_reduce_amount` decimal(16,2) comment '参与某活动某规则支付减免金额', `payment_amount` decimal(16,2) comment '参与某活动某规则支付金额' ) comment '每日活动统计' partitioned by (`dt` string) stored as parquet location '/warehouse/gmall/dws/dws_activity_info_daycount/' tblproperties ("parquet.compression"="lzo");
2)数据装载
(1)首日装载
insert overwrite table dws_activity_info_daycount partition (dt) select t3.activity_rule_id, --活动id t3.activity_id, --活动规则id nvl(order_count,0), --参与某活动某规则下单次数 nvl(order_reduce_amount,0.0), --参与某活动某规则下单减免金额 nvl(order_original_amount,0.0), --参与某活动某规则下单原始金额 nvl(order_final_amount,0.0), --参与某活动某规则下单最终金额 nvl(payment_count,0), --参与某活动某规则支付次数 nvl(payment_reduce_amount,0.0), --参与某活动某规则支付减免金额 nvl(payment_amount,0.0), --参与某活动某规则支付金额 t3.dt from ( -- 求理论上 可能产生行为的活动id和日期 select activity_id, --活动id activity_rule_id, --活动规则id date_id dt from ( select activity_id, --活动id activity_rule_id, --活动规则id start_time, --活动开始时间 end_time --活动结束时间 from dim_activity_rule_info where dt = '2021-06-08' ) t1 join ( select date_id from dim_date_info where date_id between '2021-06-05' and '2021-06-08' ) t2 where date_id >= start_time and date_id <= end_time --商城APP上线时间应该在活动时间范围内 ) t3 left join ( -- 实际上 产生行为的活动统计的指标 select activity_rule_id, activity_id, dt, sum(order_count) order_count, --参与某活动某规则下单次数 sum(order_reduce_amount) order_reduce_amount, --参与某活动某规则下单减免金额 sum(order_original_amount) order_original_amount, --参与某活动某规则下单原始金额 sum(order_final_amount) order_final_amount, --参与某活动某规则下单最终金额 sum(payment_count) payment_count, --参与某活动某规则支付次数 sum(payment_reduce_amount) payment_reduce_amount, --参与某活动某规则支付减免金额 sum(payment_amount) payment_amount --参与某活动某规则支付金额 from ( select activity_rule_id, activity_id, dt, 0 order_count, --参与某活动某规则下单次数 0 order_reduce_amount, --参与某活动某规则下单减免金额 0 order_original_amount, --参与某活动某规则下单原始金额 0 order_final_amount, --参与某活动某规则下单最终金额 count(*) payment_count, --参与某活动某规则支付次数 sum(split_activity_amount) payment_reduce_amount, --参与某活动某规则支付减免金额 sum(split_final_amount) payment_amount --参与某活动某规则支付金额 from ( select split_final_amount, --最终价格分摊 order_id, --订单id activity_rule_id, --活动规则id activity_id, --活动id split_activity_amount --活动优惠分摊 from dwd_order_detail --订单明细事实表 where dt <= '2021-06-08' and activity_rule_id is not null -- dt是下单日期,不是支付日期 ) t1 join ( --求被支付的订单详请 select order_id, --订单id dt from dwd_payment_info where dt <= '2021-06-08' -- dt是支付日期 ) t2 on t1.order_id = t2.order_id group by activity_rule_id,activity_id,dt union all select activity_rule_id, activity_id, dt, count(*) order_count, --参与某活动某规则下单次数 sum(split_activity_amount) order_reduce_amount, --参与某活动某规则下单减免金额 sum(original_amount) order_original_amount, --参与某活动某规则下单原始金额 sum(split_final_amount) order_final_amount, --参与某活动某规则下单最终金额 0 payment_count, 0 payment_reduce_amount, 0 payment_amount from dwd_order_detail where dt <= '2021-06-08' and activity_rule_id is not null group by activity_rule_id,activity_id,dt ) tmp group by activity_rule_id,activity_id,dt ) t4 on t3.activity_rule_id = t4.activity_rule_id and t3.dt = t4.dt;
(2)每日装载
insert overwrite table dws_activity_info_daycount partition (dt='2021-06-09') select t3.activity_rule_id, --活动id t3.activity_id, --活动规则id nvl(order_count,0), --参与某活动某规则下单次数 nvl(order_reduce_amount,0.0), --参与某活动某规则下单减免金额 nvl(order_original_amount,0.0), --参与某活动某规则下单原始金额 nvl(order_final_amount,0.0), --参与某活动某规则下单最终金额 nvl(payment_count,0), --参与某活动某规则支付次数 nvl(payment_reduce_amount,0.0), --参与某活动某规则支付减免金额 nvl(payment_amount,0.0) --参与某活动某规则支付金额 from ( -- 求理论上 可能产生行为的活动id和日期 select activity_id, --活动id activity_rule_id --活动规则id from dim_activity_rule_info where dt = '2021-06-09' and dt >= start_time and dt <= end_time --商城APP上线时间应该在活动时间范围内 ) t3 left join ( -- 实际上 产生行为的活动统计的指标 select activity_rule_id, activity_id, sum(order_count) order_count, --参与某活动某规则下单次数 sum(order_reduce_amount) order_reduce_amount, --参与某活动某规则下单减免金额 sum(order_original_amount) order_original_amount, --参与某活动某规则下单原始金额 sum(order_final_amount) order_final_amount, --参与某活动某规则下单最终金额 sum(payment_count) payment_count, --参与某活动某规则支付次数 sum(payment_reduce_amount) payment_reduce_amount, --参与某活动某规则支付减免金额 sum(payment_amount) payment_amount --参与某活动某规则支付金额 from ( select activity_rule_id, activity_id, 0 order_count, --参与某活动某规则下单次数 0 order_reduce_amount, --参与某活动某规则下单减免金额 0 order_original_amount, --参与某活动某规则下单原始金额 0 order_final_amount, --参与某活动某规则下单最终金额 count(*) payment_count, --参与某活动某规则支付次数 sum(split_activity_amount) payment_reduce_amount, --参与某活动某规则支付减免金额 sum(split_final_amount) payment_amount --参与某活动某规则支付金额 from dwd_order_detail where (dt = '2021-06-09' or dt=date_sub('2021-06-09',1)) and activity_rule_id is not null and order_id in ( --求被支付的订单详请 select order_id --订单id from dwd_payment_info where dt = '2021-06-09' -- dt是支付日期 ) group by activity_rule_id, activity_id union all select activity_rule_id, activity_id, count(*) order_count, --参与某活动某规则下单次数 sum(split_activity_amount) order_reduce_amount, --参与某活动某规则下单减免金额 sum(original_amount) order_original_amount, --参与某活动某规则下单原始金额 sum(split_final_amount) order_final_amount, --参与某活动某规则下单最终金额 0 payment_count, 0 payment_reduce_amount, 0 payment_amount from dwd_order_detail where dt = '2021-06-09' and activity_rule_id is not null --只取参与了活动的订单 group by activity_rule_id,activity_id ) tmp group by activity_rule_id,activity_id ) t4 on t3.activity_rule_id = t4.activity_rule_id;
3)查询加载结果
7.2.6 地区主题
1)建表语句
drop table if exists dws_area_stats_daycount; create external table dws_area_stats_daycount( --dwd_page_log `province_id` string comment '地区编号', `visit_count` bigint comment '访客访问次数', `login_count` bigint comment '用户访问次数', `visitor_count` bigint comment '访客人数', `user_count` bigint comment '用户人数', --dwd_order_info `order_count` bigint comment '下单次数', `order_original_amount` decimal(16,2) comment '下单原始金额', `order_final_amount` decimal(16,2) comment '下单最终金额', --dwd_payment_info `payment_count` bigint comment '支付次数', `payment_amount` decimal(16,2) comment '支付金额', --dwd_order_refund_info `refund_order_count` bigint comment '退单次数', `refund_order_amount` decimal(16,2) comment '退单金额', --dwd_refund_payment `refund_payment_count` bigint comment '退款次数', `refund_payment_amount` decimal(16,2) comment '退款金额' ) comment '每日地区统计表' partitioned by (`dt` string) stored as parquet location '/warehouse/gmall/dws/dws_area_stats_daycount/' tblproperties ("parquet.compression"="lzo");
2)数据装载
(1)首日装载
set hive.exec.dynamic.partition.mode=nonstrict;
insert overwrite table dws_area_stats_daycount partition (dt)
select province_id, --地区编号
sum(visit_count), --访客访问次数
sum(login_count), --用户访问次数
sum(visitor_count), --访客人数
sum(user_count), --用户人数
sum(order_count), --下单次数
sum(order_original_amount), --下单原始金额
sum(order_final_amount), --下单最终金额
sum(payment_count), --支付次数
sum(payment_amount), --支付金额
sum(refund_order_count), --退单次数
sum(refund_order_amount), --退单金额
sum(refund_payment_count), --退款次数
sum(refund_payment_amount), --退款金额
dt
from
(
select id province_id, --地区编号
visit_count, --访客访问次数
login_count, --用户访问次数
visitor_count, --访客人数
user_count, --用户人数
0 order_count,
0 order_original_amount,
0 order_final_amount,
0 payment_count,
0 payment_amount,
0 refund_order_count,
0 refund_order_amount,
0 refund_payment_count,
0 refund_payment_amount,
dt
from
(
select area_code, --地区编号
count(*) visit_count, --访客访问次数
count(user_id) login_count, --用户访问次数
count(distinct mid_id) visitor_count, --访客人数
count(distinct user_id) user_count, --用户人数
dt
from dwd_page_log --页面日志表
where dt <= '2021-06-08' and last_page_id is null
group by area_code,dt
) t1
left join dim_base_province area on t1.area_code = area.area_code
union all
select province_id, --地区id
0 visit_count,
0 login_count,
0 visitor_count,
0 user_count,
count(*) order_count, --下单次数
sum(original_amount) order_original_amount, --下单原始金额
sum(final_amount) order_final_amount, --下单最终金额
0 payment_count,
0 payment_amount,
0 refund_order_count,
0 refund_order_amount,
0 refund_payment_count,
0 refund_payment_amount,
date_format(create_time,'yyyy-MM-dd') dt --以订单创建时间作为分区时间字段
from dwd_order_info
where dt <= '2021-06-08' or dt = '9999-99-99'
group by province_id,date_format(create_time,'yyyy-MM-dd')
union all
select province_id,
0 visit_count,
0 login_count,
0 visitor_count,
0 user_count,
0 order_count,
0 order_original_amount,
0 order_final_amount,
count(*) payment_count, --支付次数
sum(payment_amount) payment_amount, --支付金额
0 refund_order_count,
0 refund_order_amount,
0 refund_payment_count,
0 refund_payment_amount,
dt
from dwd_payment_info
where dt <= '2021-06-08'
group by province_id,dt
union all
select province_id,
0 visit_count,
0 login_count,
0 visitor_count,
0 user_count,
0 order_count,
0 order_original_amount,
0 order_final_amount,
0 payment_count,
0 payment_amount,
count(*) refund_order_count, --退单次数
sum(refund_amount) refund_order_amount, --退单金额
0 refund_payment_count,
0 refund_payment_amount,
dt
from dwd_order_refund_info
where dt <= '2021-06-08'
group by province_id,dt
union all
select province_id,
0 visit_count,
0 login_count,
0 visitor_count,
0 user_count,
0 order_count,
0 order_original_amount,
0 order_final_amount,
0 payment_count,
0 payment_amount,
0 refund_order_count,
0 refund_order_amount,
count(*) refund_payment_count, --退款次数
sum(refund_amount) refund_payment_amount, --退款金额
dt
from dwd_refund_payment
where dt <= '2021-06-08'
group by province_id,dt
) tmp
group by province_id,dt;
(2)每日装载
insert overwrite table dws_area_stats_daycount partition (dt = '2021-06-09')
select province_id, --地区编号
sum(visit_count), --访客访问次数
sum(login_count), --用户访问次数
sum(visitor_count), --访客人数
sum(user_count), --用户人数
sum(order_count), --下单次数
sum(order_original_amount), --下单原始金额
sum(order_final_amount), --下单最终金额
sum(payment_count), --支付次数
sum(payment_amount), --支付金额
sum(refund_order_count), --退单次数
sum(refund_order_amount), --退单金额
sum(refund_payment_count), --退款次数
sum(refund_payment_amount) --退款金额
from
(
select id province_id, --地区编号
visit_count, --访客访问次数
login_count, --用户访问次数
visitor_count, --访客人数
user_count, --用户人数
0 order_count,
0 order_original_amount,
0 order_final_amount,
0 payment_count,
0 payment_amount,
0 refund_order_count,
0 refund_order_amount,
0 refund_payment_count,
0 refund_payment_amount
from
(
select area_code, --地区编号
count(*) visit_count, --访客访问次数
count(user_id) login_count, --用户访问次数
count(distinct mid_id) visitor_count, --访客人数
count(distinct user_id) user_count --用户人数
from dwd_page_log --页面日志表
where dt = '2021-06-09' and last_page_id is null
group by area_code,dt
) t1
left join dim_base_province area on t1.area_code = area.area_code
union all
select province_id, --地区id
0 visit_count,
0 login_count,
0 visitor_count,
0 user_count,
count(*) order_count, --下单次数
sum(original_amount) order_original_amount, --下单原始金额
sum(final_amount) order_final_amount, --下单最终金额
0 payment_count,
0 payment_amount,
0 refund_order_count,
0 refund_order_amount,
0 refund_payment_count,
0 refund_payment_amount
from dwd_order_info
where dt = '2021-06-09' or dt = '9999-99-99'
group by province_id
union all
select province_id,
0 visit_count,
0 login_count,
0 visitor_count,
0 user_count,
0 order_count,
0 order_original_amount,
0 order_final_amount,
count(*) payment_count, --支付次数
sum(payment_amount) payment_amount, --支付金额
0 refund_order_count,
0 refund_order_amount,
0 refund_payment_count,
0 refund_payment_amount
from dwd_payment_info
where dt = '2021-06-09'
group by province_id
union all
select province_id,
0 visit_count,
0 login_count,
0 visitor_count,
0 user_count,
0 order_count,
0 order_original_amount,
0 order_final_amount,
0 payment_count,
0 payment_amount,
count(*) refund_order_count, --退单次数
sum(refund_amount) refund_order_amount, --退单金额
0 refund_payment_count,
0 refund_payment_amount
from dwd_order_refund_info
where dt = '2021-06-09'
group by province_id
union all
select province_id,
0 visit_count,
0 login_count,
0 visitor_count,
0 user_count,
0 order_count,
0 order_original_amount,
0 order_final_amount,
0 payment_count,
0 payment_amount,
0 refund_order_count,
0 refund_order_amount,
count(*) refund_payment_count, --退款次数
sum(refund_amount) refund_payment_amount --退款金额
from dwd_refund_payment
where dt = '2021-06-09'
group by province_id
) tmp
group by province_id;
3)查询加载结果
7.2.7 DWS层首日数据装载脚本
1)编写脚本
(1)在/home/atguigu/bin目录下创建脚本dwd_to_dws_init.sh
vim dwd_to_dws_init.sh
#!/bin/bash APP=gmall if [ -n "$2" ] ;then do_date=$2 else echo "请传入日期参数" exit fi dws_visitor_action_daycount=" insert overwrite table ${APP}.dws_visitor_action_daycount partition (dt = '$do_date') select t1.mid_id, --设备id brand, --设备品牌 model, --设备型号 is_new, --是否首次访问 channel, --渠道 os, --操作系统 area_code, --地区ID version_code, --应用版本 visit_count, --访问次数 page_stats --每个设备访问所有页面的统计信息聚合的结果(数组里装载的是一个结构体) from ( select mid_id, --设备id concat_ws('|',collect_set(brand)) brand, --设备品牌 concat_ws('|',collect_set(model)) model, --设备型号 if (array_contains(collect_set(is_new),'0'),'0','1') is_new, --是否首次访问 collect_set(channel) channel, --渠道 collect_set(os) os, --操作系统 collect_set(area_code) area_code, --地区ID collect_set(version_code) version_code, --应用版本 sum( if (last_page_id is null,1,0)) visit_count --访问次数 from ${APP}.dwd_page_log where dt = '$do_date' group by mid_id ) t1 join ( --在当天中,每个设备访问所有页面的统计信息聚合的结果 select mid_id,collect_list(page_struct) page_stats from ( --在当天中,每个设备访问每个页面的统计信息 select mid_id,named_struct('page_id',page_id,'page_count',count(*),'during_time',sum(during_time)) page_struct from ${APP}.dwd_page_log where dt = '$do_date' group by mid_id,page_id ) tmp group by mid_id ) t2 on t1.mid_id = t2.mid_id; " dws_area_stats_daycount=" set hive.exec.dynamic.partition.mode=nonstrict; insert overwrite table ${APP}.dws_area_stats_daycount partition (dt) select province_id, --地区编号 sum(visit_count), --访客访问次数 sum(login_count), --用户访问次数 sum(visitor_count), --访客人数 sum(user_count), --用户人数 sum(order_count), --下单次数 sum(order_original_amount), --下单原始金额 sum(order_final_amount), --下单最终金额 sum(payment_count), --支付次数 sum(payment_amount), --支付金额 sum(refund_order_count), --退单次数 sum(refund_order_amount), --退单金额 sum(refund_payment_count), --退款次数 sum(refund_payment_amount), --退款金额 dt from ( select id province_id, --地区编号 visit_count, --访客访问次数 login_count, --用户访问次数 visitor_count, --访客人数 user_count, --用户人数 0 order_count, 0 order_original_amount, 0 order_final_amount, 0 payment_count, 0 payment_amount, 0 refund_order_count, 0 refund_order_amount, 0 refund_payment_count, 0 refund_payment_amount, dt from ( select area_code, --地区编号 count(*) visit_count, --访客访问次数 count(user_id) login_count, --用户访问次数 count(distinct mid_id) visitor_count, --访客人数 count(distinct user_id) user_count, --用户人数 dt from ${APP}.dwd_page_log --页面日志表 where dt <= '$do_date' and last_page_id is null group by area_code,dt ) t1 left join ${APP}.dim_base_province area on t1.area_code = area.area_code union all select province_id, --地区id 0 visit_count, 0 login_count, 0 visitor_count, 0 user_count, count(*) order_count, --下单次数 sum(original_amount) order_original_amount, --下单原始金额 sum(final_amount) order_final_amount, --下单最终金额 0 payment_count, 0 payment_amount, 0 refund_order_count, 0 refund_order_amount, 0 refund_payment_count, 0 refund_payment_amount, date_format(create_time,'yyyy-MM-dd') dt --以订单创建时间作为分区时间字段 from ${APP}.dwd_order_info where dt <= '$do_date' or dt = '9999-99-99' group by province_id,date_format(create_time,'yyyy-MM-dd') union all select province_id, 0 visit_count, 0 login_count, 0 visitor_count, 0 user_count, 0 order_count, 0 order_original_amount, 0 order_final_amount, count(*) payment_count, --支付次数 sum(payment_amount) payment_amount, --支付金额 0 refund_order_count, 0 refund_order_amount, 0 refund_payment_count, 0 refund_payment_amount, dt from ${APP}.dwd_payment_info where dt <= '$do_date' group by province_id,dt union all select province_id, 0 visit_count, 0 login_count, 0 visitor_count, 0 user_count, 0 order_count, 0 order_original_amount, 0 order_final_amount, 0 payment_count, 0 payment_amount, count(*) refund_order_count, --退单次数 sum(refund_amount) refund_order_amount, --退单金额 0 refund_payment_count, 0 refund_payment_amount, dt from ${APP}.dwd_order_refund_info where dt <= '$do_date' group by province_id,dt union all select province_id, 0 visit_count, 0 login_count, 0 visitor_count, 0 user_count, 0 order_count, 0 order_original_amount, 0 order_final_amount, 0 payment_count, 0 payment_amount, 0 refund_order_count, 0 refund_order_amount, count(*) refund_payment_count, --退款次数 sum(refund_amount) refund_payment_amount, --退款金额 dt from ${APP}.dwd_refund_payment where dt <= '$do_date' group by province_id,dt ) tmp group by province_id,dt; " dws_user_action_daycount=" set hive.exec.dynamic.partition.mode=nonstrict; insert overwrite table ${APP}.dws_user_action_daycount partition (dt) select user_id, sum(login_count) , sum(cart_count), sum(favor_count), sum(order_count), sum(order_activity_count), sum(order_activity_reduce_amount), sum(order_coupon_count), sum(order_coupon_reduce_amount), sum(order_original_amount), sum(order_final_amount), sum(payment_count), sum(payment_amount), sum(refund_order_count), sum(refund_order_num), sum(refund_order_amount), sum(refund_payment_count), sum(refund_payment_num), sum(refund_payment_amount), sum(coupon_get_count), sum(coupon_using_count), sum(coupon_used_count), sum(appraise_good_count), sum(appraise_mid_count), sum(appraise_bad_count), sum(appraise_default_count), max(if( order_detail_stats[0].sku_id = '-1' , null ,order_detail_stats )) order_detail_stats, dt from ( select user_id, dt, 0 login_count, 0 cart_count, 0 favor_count, 0 order_count, 0 order_activity_count, 0 order_activity_reduce_amount, 0 order_coupon_count, 0 order_coupon_reduce_amount, 0 order_original_amount, 0 order_final_amount, 0 payment_count, 0 payment_amount, 0 refund_order_count, 0 refund_order_num, 0 refund_order_amount, 0 refund_payment_count, 0 refund_payment_num, 0 refund_payment_amount, 0 coupon_get_count, 0 coupon_using_count, 0 coupon_used_count, 0 appraise_good_count, 0 appraise_mid_count, 0 appraise_bad_count, 0 appraise_default_count, collect_list(sku_struct) order_detail_stats from ( select user_id, dt, named_struct('sku_id',sku_id, --skuid 'sku_num',sum(sku_num), --商品数量 'order_count',count(*), --下单次数 'activity_reduce_amount',cast (sum(split_activity_amount) as decimal(16,2)), --活动优惠分摊 'coupon_reduce_amount',cast(sum(split_coupon_amount) as decimal(16,2)), --优惠券优惠分摊 'original_amount',cast(sum(original_amount) as decimal(16,2)), --原始价格 'final_amount',cast(sum(split_final_amount) as decimal(16,2)) --最终价格分摊 ) sku_struct --结构体 from ${APP}.dwd_order_detail where dt <='$do_date' group by user_id,dt,sku_id ) tmp group by user_id,dt union all select user_id, dt, sum( if (last_page_id is not null,1,0)) login_count, --登录次数 0 cart_count, 0 favor_count, 0 order_count, 0 order_activity_count, 0 order_activity_reduce_amount, 0 order_coupon_count, 0 order_coupon_reduce_amount, 0 order_original_amount, 0 order_final_amount, 0 payment_count, 0 payment_amount, 0 refund_order_count, 0 refund_order_num, 0 refund_order_amount, 0 refund_payment_count, 0 refund_payment_num, 0 refund_payment_amount, 0 coupon_get_count, 0 coupon_using_count, 0 coupon_used_count, 0 appraise_good_count, 0 appraise_mid_count, 0 appraise_bad_count, 0 appraise_default_count, array(named_struct('sku_id','-1', --skuid 'sku_num',cast(0 as bigint), --商品数量 'order_count',cast(0 as bigint), --下单次数 'activity_reduce_amount',cast(0.0 as decimal(16,2)), --活动优惠分摊 'coupon_reduce_amount',cast(0.0 as decimal(16,2)), --优惠券优惠分摊 'original_amount',cast(0.0 as decimal(16,2)), --原始价格 'final_amount',cast(0.0 as decimal(16,2)) --最终价格分摊 )) order_detail_stats from ${APP}.dwd_page_log where dt = '$do_date' and user_id is not null group by user_id,dt union all select user_id, dt, 0 login_count, sum( if (action_id = 'cart_add',1,0)) cart_count, --加入购物车次数 sum( if (action_id = 'favor_add',1,0)) favor_count, --收藏次数 0 order_count, 0 order_activity_count, 0 order_activity_reduce_amount, 0 order_coupon_count, 0 order_coupon_reduce_amount, 0 order_original_amount, 0 order_final_amount, 0 payment_count, 0 payment_amount, 0 refund_order_count, 0 refund_order_num, 0 refund_order_amount, 0 refund_payment_count, 0 refund_payment_num, 0 refund_payment_amount, 0 coupon_get_count, 0 coupon_using_count, 0 coupon_used_count, 0 appraise_good_count, 0 appraise_mid_count, 0 appraise_bad_count, 0 appraise_default_count, array(named_struct('sku_id','-1', --skuid 'sku_num',cast(0 as bigint), --商品数量 'order_count',cast(0 as bigint), --下单次数 'activity_reduce_amount',cast(0.0 as decimal(16,2)), --活动优惠分摊 'coupon_reduce_amount',cast(0.0 as decimal(16,2)), --优惠券优惠分摊 'original_amount',cast(0.0 as decimal(16,2)), --原始价格 'final_amount',cast(0.0 as decimal(16,2)) --最终价格分摊 )) order_detail_stats from ${APP}.dwd_action_log where dt = '$do_date' and user_id is not null and action_id in ('cart_add','favor_add') group by user_id,dt union all select user_id, dt, 0 login_count, 0 cart_count, 0 favor_count, count(distinct order_id) order_count, --下单次数 count(activity_rule_id) order_activity_count, --订单参与活动次数 sum(split_activity_amount) order_activity_reduce_amount, --订单减免金额(活动) count(coupon_id) order_coupon_count, --订单用券次数 sum(split_coupon_amount) order_coupon_reduce_amount, --订单减免金额(优惠券) sum(original_amount) order_original_amount, --订单单原始金额 sum(split_final_amount) order_final_amount, --订单总金额 0 payment_count, 0 payment_amount, 0 refund_order_count, 0 refund_order_num, 0 refund_order_amount, 0 refund_payment_count, 0 refund_payment_num, 0 refund_payment_amount, 0 coupon_get_count, 0 coupon_using_count, 0 coupon_used_count, 0 appraise_good_count, 0 appraise_mid_count, 0 appraise_bad_count, 0 appraise_default_count, array(named_struct('sku_id','-1', --skuid 'sku_num',cast(0 as bigint), --商品数量 'order_count',cast(0 as bigint), --下单次数 'activity_reduce_amount',cast(0.0 as decimal(16,2)), --活动优惠分摊 'coupon_reduce_amount',cast(0.0 as decimal(16,2)), --优惠券优惠分摊 'original_amount',cast(0.0 as decimal(16,2)), --原始价格 'final_amount',cast(0.0 as decimal(16,2)) --最终价格分摊 )) order_detail_stats from ${APP}.dwd_order_detail where dt <= '$do_date' group by user_id,dt union all select user_id, dt, 0 login_count, 0 cart_count, 0 favor_count, 0 order_count, 0 order_activity_count, 0 order_activity_reduce_amount, 0 order_coupon_count, 0 order_coupon_reduce_amount, 0 order_original_amount, 0 order_final_amount, count(*) payment_count, --支付次数 sum(payment_amount) payment_amount, --支付金额 0 refund_order_count, 0 refund_order_num, 0 refund_order_amount, 0 refund_payment_count, 0 refund_payment_num, 0 refund_payment_amount, 0 coupon_get_count, 0 coupon_using_count, 0 coupon_used_count, 0 appraise_good_count, 0 appraise_mid_count, 0 appraise_bad_count, 0 appraise_default_count, array(named_struct('sku_id','-1', --skuid 'sku_num',cast(0 as bigint), --商品数量 'order_count',cast(0 as bigint), --下单次数 'activity_reduce_amount',cast(0.0 as decimal(16,2)), --活动优惠分摊 'coupon_reduce_amount',cast(0.0 as decimal(16,2)), --优惠券优惠分摊 'original_amount',cast(0.0 as decimal(16,2)), --原始价格 'final_amount',cast(0.0 as decimal(16,2)) --最终价格分摊 )) order_detail_stats from ${APP}.dwd_payment_info where dt <= '$do_date' group by user_id,dt union all -- 退单次数: 用户申请退回的一笔订单的一种商品,算一次 select user_id, dt, 0 login_count, 0 cart_count, 0 favor_count, 0 order_count, 0 order_activity_count, 0 order_activity_reduce_amount, 0 order_coupon_count, 0 order_coupon_reduce_amount, 0 order_original_amount, 0 order_final_amount, 0 payment_count, 0 payment_amount, count(*) refund_order_count, --退单次数 sum(refund_num) refund_order_num, --退单件数 sum(refund_amount) refund_order_amount, --退单金额 0 refund_payment_count, 0 refund_payment_num, 0 refund_payment_amount, 0 coupon_get_count, 0 coupon_using_count, 0 coupon_used_count, 0 appraise_good_count, 0 appraise_mid_count, 0 appraise_bad_count, 0 appraise_default_count, array(named_struct('sku_id','-1', --skuid 'sku_num',cast(0 as bigint), --商品数量 'order_count',cast(0 as bigint), --下单次数 'activity_reduce_amount',cast(0.0 as decimal(16,2)), --活动优惠分摊 'coupon_reduce_amount',cast(0.0 as decimal(16,2)), --优惠券优惠分摊 'original_amount',cast(0.0 as decimal(16,2)), --原始价格 'final_amount',cast(0.0 as decimal(16,2)) --最终价格分摊 )) order_detail_stats from ${APP}.dwd_order_refund_info where dt <= '$do_date' group by user_id,dt union all select user_id, dt, 0 login_count, 0 cart_count, 0 favor_count, 0 order_count, 0 order_activity_count, 0 order_activity_reduce_amount, 0 order_coupon_count, 0 order_coupon_reduce_amount, 0 order_original_amount, 0 order_final_amount, 0 payment_count, 0 payment_amount, 0 refund_order_count, 0 refund_order_num, 0 refund_order_amount, count(*) refund_payment_count, --退款次数 sum(refund_num) refund_payment_num, --退款件数 sum(refund_amount) refund_payment_amount, --退款金额 0 coupon_get_count, 0 coupon_using_count, 0 coupon_used_count, 0 appraise_good_count, 0 appraise_mid_count, 0 appraise_bad_count, 0 appraise_default_count, array(named_struct('sku_id','-1', --skuid 'sku_num',cast(0 as bigint), --商品数量 'order_count',cast(0 as bigint), --下单次数 'activity_reduce_amount',cast(0.0 as decimal(16,2)), --活动优惠分摊 'coupon_reduce_amount',cast(0.0 as decimal(16,2)), --优惠券优惠分摊 'original_amount',cast(0.0 as decimal(16,2)), --原始价格 'final_amount',cast(0.0 as decimal(16,2)) --最终价格分摊 )) order_detail_stats from ( select user_id, dt, refund_amount, --退款金额 sku_id, order_id from ${APP}.dwd_refund_payment where dt <= '$do_date' ) t1 left join ( select refund_num, --退款件数 sku_id, order_id from ${APP}.dwd_order_refund_info where dt <= '$do_date' ) t2 on t1.order_id = t2.order_id and t1.sku_id = t2.sku_id group by user_id,dt union all select user_id, dt, 0 login_count, 0 cart_count, 0 favor_count, 0 order_count, 0 order_activity_count, 0 order_activity_reduce_amount, 0 order_coupon_count, 0 order_coupon_reduce_amount, 0 order_original_amount, 0 order_final_amount, 0 payment_count, 0 payment_amount, 0 refund_order_count, 0 refund_order_num, 0 refund_order_amount, 0 refund_payment_count, 0 refund_payment_num, 0 refund_payment_amount, sum(coupon_get_count) coupon_get_count, --优惠券领取次数 sum(coupon_using_count) coupon_using_count, --优惠券使用(下单)次数 sum(coupon_used_count) coupon_used_count, --优惠券使用(支付)次数 0 appraise_good_count, 0 appraise_mid_count, 0 appraise_bad_count, 0 appraise_default_count, array(named_struct('sku_id','-1', --skuid 'sku_num',cast(0 as bigint), --商品数量 'order_count',cast(0 as bigint), --下单次数 'activity_reduce_amount',cast(0.0 as decimal(16,2)), --活动优惠分摊 'coupon_reduce_amount',cast(0.0 as decimal(16,2)), --优惠券优惠分摊 'original_amount',cast(0.0 as decimal(16,2)), --原始价格 'final_amount',cast(0.0 as decimal(16,2)) --最终价格分摊 )) order_detail_stats from ( select user_id, date_format(get_time,'yyyy-MM-dd') dt, --优惠券领取时间 count(*) coupon_get_count, --优惠券领取次数 0 coupon_using_count, --优惠券使用(下单)次数 0 coupon_used_count --优惠券使用(支付)次数 from ${APP}.dwd_coupon_use where dt <= '$do_date' or dt='9999-99-99' --dt: 优惠券的完成时间 group by user_id,date_format(get_time,'yyyy-MM-dd') union all select user_id, date_format(using_time,'yyyy-MM-dd') dt, 0 coupon_get_count, --优惠券领取次数 count(*) coupon_using_count, --优惠券使用(下单)次数 0 coupon_used_count --优惠券使用(支付)次数 from ${APP}.dwd_coupon_use where dt <= '$do_date' or dt='9999-99-99' and using_time is not null group by user_id,date_format(using_time,'yyyy-MM-dd') union all select user_id, date_format(used_time,'yyyy-MM-dd') dt, 0 coupon_get_count, --优惠券领取次数 0 coupon_using_count, --优惠券使用(下单)次数 count(*) coupon_used_count --优惠券使用(支付)次数 from ${APP}.dwd_coupon_use where dt <= '$do_date' and used_time is not null group by user_id, date_format(used_time,'yyyy-MM-dd') ) tmp2 group by user_id,dt union all select user_id, dt, 0 login_count, 0 cart_count, 0 favor_count, 0 order_count, 0 order_activity_count, 0 order_activity_reduce_amount, 0 order_coupon_count, 0 order_coupon_reduce_amount, 0 order_original_amount, 0 order_final_amount, 0 payment_count, 0 payment_amount, 0 refund_order_count, 0 refund_order_num, 0 refund_order_amount, 0 refund_payment_count, 0 refund_payment_num, 0 refund_payment_amount, 0 coupon_get_count, 0 coupon_using_count, 0 coupon_used_count, sum( if (appraise = '1201',1,0)) appraise_good_count, --好评数 sum( if (appraise = '1202',1,0)) appraise_mid_count, --中评数 sum( if (appraise = '1203',1,0)) appraise_bad_count, --差评数 sum( if (appraise = '1204',1,0)) appraise_default_count, --默认评价数 array(named_struct('sku_id','-1', --skuid 'sku_num',cast(0 as bigint), --商品数量 'order_count',cast(0 as bigint), --下单次数 'activity_reduce_amount',cast(0.0 as decimal(16,2)), --活动优惠分摊 'coupon_reduce_amount',cast(0.0 as decimal(16,2)), --优惠券优惠分摊 'original_amount',cast(0.0 as decimal(16,2)), --原始价格 'final_amount',cast(0.0 as decimal(16,2)) --最终价格分摊 )) order_detail_stats from ${APP}.dwd_comment_info where dt <= '$do_date' group by user_id,dt ) tmp3 group by user_id,dt; " dws_activity_info_daycount=" set hive.exec.dynamic.partition.mode=nonstrict; insert overwrite table ${APP}.dws_activity_info_daycount partition (dt) select t3.activity_rule_id, --活动id t3.activity_id, --活动规则id nvl(order_count,0), --参与某活动某规则下单次数 nvl(order_reduce_amount,0.0), --参与某活动某规则下单减免金额 nvl(order_original_amount,0.0), --参与某活动某规则下单原始金额 nvl(order_final_amount,0.0), --参与某活动某规则下单最终金额 nvl(payment_count,0), --参与某活动某规则支付次数 nvl(payment_reduce_amount,0.0), --参与某活动某规则支付减免金额 nvl(payment_amount,0.0), --参与某活动某规则支付金额 t3.dt from ( -- 求理论上 可能产生行为的活动id和日期 select activity_id, --活动id activity_rule_id, --活动规则id date_id dt from ( select activity_id, --活动id activity_rule_id, --活动规则id start_time, --活动开始时间 end_time --活动结束时间 from ${APP}.dim_activity_rule_info where dt = '$do_date' ) t1 join ( select date_id from ${APP}.dim_date_info where date_id between date_sub('$do_date',3) and '$do_date' ) t2 where date_id >= start_time and date_id <= end_time --商城APP上线时间应该在活动时间范围内 ) t3 left join ( -- 实际上 产生行为的活动统计的指标 select activity_rule_id, activity_id, dt, sum(order_count) order_count, --参与某活动某规则下单次数 sum(order_reduce_amount) order_reduce_amount, --参与某活动某规则下单减免金额 sum(order_original_amount) order_original_amount, --参与某活动某规则下单原始金额 sum(order_final_amount) order_final_amount, --参与某活动某规则下单最终金额 sum(payment_count) payment_count, --参与某活动某规则支付次数 sum(payment_reduce_amount) payment_reduce_amount, --参与某活动某规则支付减免金额 sum(payment_amount) payment_amount --参与某活动某规则支付金额 from ( select activity_rule_id, activity_id, dt, 0 order_count, --参与某活动某规则下单次数 0 order_reduce_amount, --参与某活动某规则下单减免金额 0 order_original_amount, --参与某活动某规则下单原始金额 0 order_final_amount, --参与某活动某规则下单最终金额 count(*) payment_count, --参与某活动某规则支付次数 sum(split_activity_amount) payment_reduce_amount, --参与某活动某规则支付减免金额 sum(split_final_amount) payment_amount --参与某活动某规则支付金额 from ( select split_final_amount, --最终价格分摊 order_id, --订单id activity_rule_id, --活动规则id activity_id, --活动id split_activity_amount --活动优惠分摊 from ${APP}.dwd_order_detail --订单明细事实表 where dt <= '$do_date' and activity_rule_id is not null -- dt是下单日期,不是支付日期 ) t1 join ( --求被支付的订单详请 select order_id, --订单id dt from ${APP}.dwd_payment_info where dt <= '$do_date' -- dt是支付日期 ) t2 on t1.order_id = t2.order_id group by activity_rule_id,activity_id,dt union all select activity_rule_id, activity_id, dt, count(*) order_count, --参与某活动某规则下单次数 sum(split_activity_amount) order_reduce_amount, --参与某活动某规则下单减免金额 sum(original_amount) order_original_amount, --参与某活动某规则下单原始金额 sum(split_final_amount) order_final_amount, --参与某活动某规则下单最终金额 0 payment_count, 0 payment_reduce_amount, 0 payment_amount from ${APP}.dwd_order_detail where dt <= '$do_date' and activity_rule_id is not null group by activity_rule_id,activity_id,dt ) tmp group by activity_rule_id,activity_id,dt ) t4 on t3.activity_rule_id = t4.activity_rule_id and t3.dt = t4.dt;" dws_sku_action_daycount=" set hive.exec.dynamic.partition.mode=nonstrict; --开启动态分区 insert overwrite table ${APP}.dws_sku_action_daycount partition (dt) select sku_id, sum(order_count), sum(order_num), sum(order_activity_count), sum(order_coupon_count), sum(order_activity_reduce_amount), sum(order_coupon_reduce_amount), sum(order_original_amount), sum(order_final_amount), sum(payment_count), sum(payment_num), sum(payment_amount), sum(refund_order_count), sum(refund_order_num), sum(refund_order_amount), sum(refund_payment_count), sum(refund_payment_num), sum(refund_payment_amount), sum(cart_count), sum(favor_count), sum(appraise_good_count), sum(appraise_mid_count), sum(appraise_bad_count), sum(appraise_default_count), dt from ( select sku_id, dt, count(*) order_count, --被下单次数 sum(sku_num) order_num, --被下单件数 count(activity_rule_id) order_activity_count, --参与活动被下单次数 count(coupon_id) order_coupon_count, --使用优惠券被下单次数 sum(split_activity_amount) order_activity_reduce_amount, --优惠金额(活动) sum(split_coupon_amount) order_coupon_reduce_amount, --优惠金额(优惠券) sum(original_amount) order_original_amount, --被下单原价金额 sum(split_final_amount) order_final_amount, --被下单最终金额 0 payment_count, 0 payment_num, 0 payment_amount, 0 refund_order_count, 0 refund_order_num, 0 refund_order_amount, 0 refund_payment_count, 0 refund_payment_num, 0 refund_payment_amount, 0 cart_count, 0 favor_count, 0 appraise_good_count, 0 appraise_mid_count, 0 appraise_bad_count, 0 appraise_default_count from ${APP}.dwd_order_detail where dt <= '$do_date' group by sku_id,dt union all select sku_id, dt, 0 order_count, 0 order_num, 0 order_activity_count, 0 order_coupon_count, 0 order_activity_reduce_amount, 0 order_coupon_reduce_amount, 0 order_original_amount, 0 order_final_amount, count(*) payment_count, --被支付次数 sum(sku_num) payment_num, --被支付件数 sum(split_final_amount) payment_amount, --被支付金额 0 refund_order_count, 0 refund_order_num, 0 refund_order_amount, 0 refund_payment_count, 0 refund_payment_num, 0 refund_payment_amount, 0 cart_count, 0 favor_count, 0 appraise_good_count, 0 appraise_mid_count, 0 appraise_bad_count, 0 appraise_default_count from ( select sku_id, split_final_amount, --被下单最终金额 order_id, --订单id sku_num --商品数量 from ${APP}.dwd_order_detail where dt <= '$do_date' -- dt是下单日期,不是支付日期 ) t1 join ( select order_id, dt from ${APP}.dwd_payment_info where dt <= '$do_date' -- dt是支付日期 ) t2 on t1.order_id = t2.order_id group by sku_id,dt union all select sku_id, dt, 0 order_count, 0 order_num, 0 order_activity_count, 0 order_coupon_count, 0 order_activity_reduce_amount, 0 order_coupon_reduce_amount, 0 order_original_amount, 0 order_final_amount, 0 payment_count, 0 payment_num, 0 payment_amount, count(*) refund_order_count, --被退单次数 sum(refund_num) refund_order_num, --被退单件数 sum(refund_amount) refund_order_amount, --被退单金额 0 refund_payment_count, 0 refund_payment_num, 0 refund_payment_amount, 0 cart_count, 0 favor_count, 0 appraise_good_count, 0 appraise_mid_count, 0 appraise_bad_count, 0 appraise_default_count from ${APP}.dwd_order_refund_info where dt <= '$do_date' group by sku_id,dt union all select t1.sku_id, dt, 0 order_count, 0 order_num, 0 order_activity_count, 0 order_coupon_count, 0 order_activity_reduce_amount, 0 order_coupon_reduce_amount, 0 order_original_amount, 0 order_final_amount, 0 payment_count, 0 payment_num, 0 payment_amount, 0 refund_order_count, 0 refund_order_num, 0 refund_order_amount, count(*) refund_payment_count, --被退款次数 sum(refund_num) refund_payment_num, --被退款件数 sum(refund_amount) refund_payment_amount, --被退款金额 0 cart_count, 0 favor_count, 0 appraise_good_count, 0 appraise_mid_count, 0 appraise_bad_count, 0 appraise_default_count from ( select refund_amount, --退款金额 sku_id, order_id, dt from ${APP}.dwd_refund_payment --退款表 where dt <= '$do_date' ) t1 left join ( select refund_num, --退款件数 sku_id, order_id --退单表 from ${APP}.dwd_order_refund_info where dt <= '$do_date' ) t2 on t1.order_id = t2.order_id and t1.sku_id = t2.sku_id group by t1.sku_id,dt union all select item sku_id, dt, 0 order_count, 0 order_num, 0 order_activity_count, 0 order_coupon_count, 0 order_activity_reduce_amount, 0 order_coupon_reduce_amount, 0 order_original_amount, 0 order_final_amount, 0 payment_count, 0 payment_num, 0 payment_amount, 0 refund_order_count, 0 refund_order_num, 0 refund_order_amount, 0 refund_payment_count, 0 refund_payment_num, 0 refund_payment_amount, sum( if (action_id = 'cart_add',1,0)) cart_count, --被加入购物车次数 sum( if (action_id = 'favor_add',1,0)) favor_count, --被收藏次数 0 appraise_good_count, 0 appraise_mid_count, 0 appraise_bad_count, 0 appraise_default_count from ${APP}.dwd_action_log where dt <= '$do_date' and action_id in ('cart_add', 'favor_add') group by item,dt union all select sku_id, dt, 0 order_count, 0 order_num, 0 order_activity_count, 0 order_coupon_count, 0 order_activity_reduce_amount, 0 order_coupon_reduce_amount, 0 order_original_amount, 0 order_final_amount, 0 payment_count, 0 payment_num, 0 payment_amount, 0 refund_order_count, 0 refund_order_num, 0 refund_order_amount, 0 refund_payment_count, 0 refund_payment_num, 0 refund_payment_amount, 0 cart_count, 0 favor_count, sum( if (appraise = '1201',1,0)) appraise_good_count, --好评数 sum( if (appraise = '1202',1,0)) appraise_mid_count, --中评数 sum( if (appraise = '1203',1,0)) appraise_bad_count, --差评数 sum( if (appraise = '1204',1,0)) appraise_default_count --默认评价数 from ${APP}.dwd_comment_info where dt <= '$do_date' group by sku_id,dt ) tmp group by sku_id,dt; " dws_coupon_info_daycount=" set hive.exec.dynamic.partition.mode=nonstrict; insert overwrite table ${APP}.dws_coupon_info_daycount partition (dt) select t3.coupon_id, --优惠券id nvl(get_count,0), --领取次数 nvl(order_count,0), --使用(下单)次数 nvl(order_reduce_amount,0.0) , --使用某券的订单优惠金额 nvl(order_original_amount,0.0) , --使用某券的订单原价金额 nvl(order_final_amount,0.0) , --使用某券的订单总价金额 nvl(payment_count,0) , --使用(支付)次数 nvl(payment_reduce_amount,0.0) , --使用某券的支付优惠金额 nvl(payment_amount,0.0) , --使用某券支付的总金额 nvl(expire_count,0) , --过期次数:为null,代表永久有效 t3.dt from ( --理论上产生行为的 couponid和行为产生的日期 select id coupon_id, --优惠券id date_id dt from ( -- 求截至到2021-06-08日所有的优惠券 select id, --优惠券id start_time, --可以领取的开始日期 create_time, --创建时间 expire_time --过期时间 from ${APP}.dim_coupon_info where dt = '$do_date' ) t1 join ( --取商城app上线第一天 到 6月8日所有的日期 select date_id from ${APP}.dim_date_info where date_id between date_sub('$do_date',3) and '$do_date' ) t2 where date_id >= nvl(start_time,create_time) and date_id <= nvl(expire_time,'9999-99-99') ) t3 left join ( --实际上产生行为的 coupon的各种指标 select coupon_id, dt, sum(get_count) get_count, --领取次数 sum(order_count) order_count, --使用(下单)次数 sum(order_reduce_amount) order_reduce_amount, --使用某券的订单优惠金额 sum(order_original_amount) order_original_amount, --使用某券的订单原价金额 sum(order_final_amount) order_final_amount, --使用某券的订单总价金额 sum(payment_count) payment_count, --使用(支付)次数 sum(payment_reduce_amount) payment_reduce_amount, --使用某券的支付优惠金额 sum(payment_amount) payment_amount, --使用某券支付的总金额 sum(expire_count) expire_count --过期次数 from ( select coupon_id, dt, 0 get_count, 0 order_count, 0 expire_count, 0 payment_count, 0 order_reduce_amount, 0 order_original_amount, 0 order_final_amount, sum(split_coupon_amount) payment_reduce_amount, --使用某券的支付优惠金额 sum(split_final_amount) payment_amount --使用某券支付的总金额 from ( select split_final_amount, --最终价格分摊 order_id, --订单id split_coupon_amount, --优惠券优惠分摊 coupon_id --优惠券id from ${APP}.dwd_order_detail --订单明细事实表表 where dt <= '$do_date' and coupon_id is not null --dt是下单日期,不是支付日期 ) t1 join ( --求被支付的订单详请 select order_id, dt from ${APP}.dwd_payment_info --支付事实表表 where dt <= '$do_date' -- dt是支付日期 ) t2 on t1.order_id = t2.order_id group by coupon_id,dt union all select coupon_id, dt, 0 get_count, 0 order_count, 0 expire_count, 0 payment_count, sum(split_coupon_amount) order_reduce_amount, --使用某券的订单优惠金额 sum(original_amount) order_original_amount, --使用某券的订单原价金额 sum(split_final_amount) order_final_amount, --使用某券的订单总价金额 0 payment_reduce_amount, 0 payment_amount from ${APP}.dwd_order_detail where dt <= '$do_date' and coupon_id is not null --只取使用了优惠券的订单 group by coupon_id,dt union all select coupon_id, dt, sum(get_count) get_count, --领取次数 sum(order_count) order_count, --使用(下单)次数 sum(payment_count) payment_count, --使用(支付)次数 sum(expire_count) expire_count, ----过期次数 0 order_reduce_amount, 0 order_original_amount, 0 order_final_amount, 0 payment_reduce_amount, 0 payment_amount from ( --求每种券在每一天的领取次数 select coupon_id, date_format(get_time,'yyyy-MM-dd') dt, --优惠券领取时间 count(*) get_count, --领取次数 0 order_count, 0 payment_count, 0 expire_count from ${APP}.dwd_coupon_use --优惠券领用事实表 where dt <= '$do_date' or dt='9999-99-99' --dt: 优惠券的完成时间 group by coupon_id,date_format(get_time,'yyyy-MM-dd') union all --求每种券在每一天的下单次数 select coupon_id, date_format(using_time,'yyyy-MM-dd') dt, --使用时间(下单) 0 get_count, count(*) order_count, --使用(下单)次数 0 payment_count, 0 expire_count from ${APP}.dwd_coupon_use where dt <= '$do_date' or dt='9999-99-99' and using_time is not null group by coupon_id,date_format(using_time,'yyyy-MM-dd') union all --求每种券在每一天的支付次数 select coupon_id, date_format(used_time,'yyyy-MM-dd') dt, --使用时间(支付) 0 get_count, 0 order_count, count(*) payment_count, --使用(支付)次数 0 expire_count from ${APP}.dwd_coupon_use where dt <= '$do_date' or dt='9999-99-99' and used_time is not null --dt: 优惠券的完成时间 group by coupon_id,date_format(used_time,'yyyy-MM-dd') union all --求每种券在每一天的过期次数 select coupon_id, date_format(expire_time,'yyyy-MM-dd') dt, --过期时间 0 get_count, 0 order_count, 0 payment_count, count(*) expire_count --过期次数 from ${APP}.dwd_coupon_use where dt <= '$do_date' or dt='9999-99-99' and expire_time is not null group by coupon_id,date_format(expire_time,'yyyy-MM-dd') )tmp group by coupon_id,dt ) tmp2 group by coupon_id, dt ) t4 on t3.coupon_id = t4.coupon_id and t3.dt = t4.dt; " case $1 in "dws_visitor_action_daycount" ) hive -e "$dws_visitor_action_daycount" ;; "dws_user_action_daycount" ) hive -e "$dws_user_action_daycount" ;; "dws_activity_info_daycount" ) hive -e "$dws_activity_info_daycount" ;; "dws_area_stats_daycount" ) hive -e "$dws_area_stats_daycount" ;; "dws_sku_action_daycount" ) hive -e "$dws_sku_action_daycount" ;; "dws_coupon_info_daycount" ) hive -e "$dws_coupon_info_daycount" ;; "all" ) hive -e "$dws_visitor_action_daycount$dws_user_action_daycount$dws_activity_info_daycount$dws_area_stats_daycount$dws_sku_action_daycount$dws_coupon_info_daycount" ;; esac
(2)增加执行权限
chmod +x dwd_to_dws_init.sh
2)脚本使用
(1)执行脚本
dwd_to_dws_init.sh all 2021-06-08
(2)查看数据是否导入成功
7.2.8 DWS层每日数据装载脚本
1)编写脚本
(1)在/home/atguigu/bin目录下创建脚本dwd_to_dws.sh
vim dwd_to_dws.sh
#!/bin/bash APP=gmall # 如果是输入的日期按照取输入日期;如果没输入日期取当前时间的前一天 if [ -n "$2" ] ;then do_date=$2 else do_date= date -d "-1 day" +%F fi dws_visitor_action_daycount=" insert overwrite table ${APP}.dws_visitor_action_daycount partition (dt = '$do_date') select t1.mid_id, --设备id brand, --设备品牌 model, --设备型号 is_new, --是否首次访问 channel, --渠道 os, --操作系统 area_code, --地区ID version_code, --应用版本 visit_count, --访问次数 page_stats --每个设备访问所有页面的统计信息聚合的结果(数组里装载的是一个结构体) from ( select mid_id, --设备id concat_ws('|',collect_set(brand)) brand, --设备品牌 concat_ws('|',collect_set(model)) model, --设备型号 if (array_contains(collect_set(is_new),'0'),'0','1') is_new, --是否首次访问 collect_set(channel) channel, --渠道 collect_set(os) os, --操作系统 collect_set(area_code) area_code, --地区ID collect_set(version_code) version_code, --应用版本 sum( if (last_page_id is null,1,0)) visit_count --访问次数 from ${APP}.dwd_page_log where dt = '$do_date' group by mid_id ) t1 join ( --在当天中,每个设备访问所有页面的统计信息聚合的结果 select mid_id,collect_list(page_struct) page_stats from ( --在当天中,每个设备访问每个页面的统计信息 select mid_id,named_struct('page_id',page_id,'page_count',count(*),'during_time',sum(during_time)) page_struct from ${APP}.dwd_page_log where dt = '$do_date' group by mid_id,page_id ) tmp group by mid_id ) t2 on t1.mid_id = t2.mid_id; " dws_user_action_daycount=" insert overwrite table ${APP}.dws_user_action_daycount partition (dt='$do_date') select user_id, sum(login_count) , sum(cart_count), sum(favor_count), sum(order_count), sum(order_activity_count), sum(order_activity_reduce_amount), sum(order_coupon_count), sum(order_coupon_reduce_amount), sum(order_original_amount), sum(order_final_amount), sum(payment_count), sum(payment_amount), sum(refund_order_count), sum(refund_order_num), sum(refund_order_amount), sum(refund_payment_count), sum(refund_payment_num), sum(refund_payment_amount), sum(coupon_get_count), sum(coupon_using_count), sum(coupon_used_count), sum(appraise_good_count), sum(appraise_mid_count), sum(appraise_bad_count), sum(appraise_default_count), max(if( order_detail_stats[0].sku_id = '-1' , null ,order_detail_stats )) order_detail_stats from ( select user_id, dt, 0 login_count, 0 cart_count, 0 favor_count, 0 order_count, 0 order_activity_count, 0 order_activity_reduce_amount, 0 order_coupon_count, 0 order_coupon_reduce_amount, 0 order_original_amount, 0 order_final_amount, 0 payment_count, 0 payment_amount, 0 refund_order_count, 0 refund_order_num, 0 refund_order_amount, 0 refund_payment_count, 0 refund_payment_num, 0 refund_payment_amount, 0 coupon_get_count, 0 coupon_using_count, 0 coupon_used_count, 0 appraise_good_count, 0 appraise_mid_count, 0 appraise_bad_count, 0 appraise_default_count, collect_list(sku_struct) order_detail_stats from ( select user_id, dt, named_struct('sku_id',sku_id, --skuid 'sku_num',sum(sku_num), --商品数量 'order_count',count(*), --下单次数 'activity_reduce_amount',cast (sum(split_activity_amount) as decimal(16,2)), --活动优惠分摊 'coupon_reduce_amount',cast(sum(split_coupon_amount) as decimal(16,2)), --优惠券优惠分摊 'original_amount',cast(sum(original_amount) as decimal(16,2)), --原始价格 'final_amount',cast(sum(split_final_amount) as decimal(16,2)) --最终价格分摊 ) sku_struct --结构体 from ${APP}.dwd_order_detail where dt <='$do_date' group by user_id,dt,sku_id ) tmp group by user_id,dt union all select user_id, dt, sum( if (last_page_id is not null,1,0)) login_count, --登录次数 0 cart_count, 0 favor_count, 0 order_count, 0 order_activity_count, 0 order_activity_reduce_amount, 0 order_coupon_count, 0 order_coupon_reduce_amount, 0 order_original_amount, 0 order_final_amount, 0 payment_count, 0 payment_amount, 0 refund_order_count, 0 refund_order_num, 0 refund_order_amount, 0 refund_payment_count, 0 refund_payment_num, 0 refund_payment_amount, 0 coupon_get_count, 0 coupon_using_count, 0 coupon_used_count, 0 appraise_good_count, 0 appraise_mid_count, 0 appraise_bad_count, 0 appraise_default_count, array (named_struct('sku_id','-1', --skuid 'sku_num',cast(0 as bigint), --商品数量 'order_count',cast(0 as bigint), --下单次数 'activity_reduce_amount',cast(0.0 as decimal(16,2)), --活动优惠分摊 'coupon_reduce_amount',cast(0.0 as decimal(16,2)), --优惠券优惠分摊 'original_amount',cast(0.0 as decimal(16,2)), --原始价格 'final_amount',cast(0.0 as decimal(16,2)) --最终价格分摊 )) order_detail_stats from ${APP}.dwd_page_log where dt = '$do_date' and user_id is not null group by user_id,dt union all select user_id, dt, 0 login_count, sum( if (action_id = 'cart_add',1,0)) cart_count, --加入购物车次数 sum( if (action_id = 'favor_add',1,0)) favor_count, --收藏次数 0 order_count, 0 order_activity_count, 0 order_activity_reduce_amount, 0 order_coupon_count, 0 order_coupon_reduce_amount, 0 order_original_amount, 0 order_final_amount, 0 payment_count, 0 payment_amount, 0 refund_order_count, 0 refund_order_num, 0 refund_order_amount, 0 refund_payment_count, 0 refund_payment_num, 0 refund_payment_amount, 0 coupon_get_count, 0 coupon_using_count, 0 coupon_used_count, 0 appraise_good_count, 0 appraise_mid_count, 0 appraise_bad_count, 0 appraise_default_count, array (named_struct('sku_id','-1', --skuid 'sku_num',cast(0 as bigint), --商品数量 'order_count',cast(0 as bigint), --下单次数 'activity_reduce_amount',cast(0.0 as decimal(16,2)), --活动优惠分摊 'coupon_reduce_amount',cast(0.0 as decimal(16,2)), --优惠券优惠分摊 'original_amount',cast(0.0 as decimal(16,2)), --原始价格 'final_amount',cast(0.0 as decimal(16,2)) --最终价格分摊 )) order_detail_stats from ${APP}.dwd_action_log where dt = '$do_date' and user_id is not null and action_id in ('cart_add','favor_add') group by user_id,dt union all select user_id, dt, 0 login_count, 0 cart_count, 0 favor_count, count(distinct order_id) order_count, --下单次数 count(activity_rule_id) order_activity_count, --订单参与活动次数 sum(split_activity_amount) order_activity_reduce_amount, --订单减免金额(活动) count(coupon_id) order_coupon_count, --订单用券次数 sum(split_coupon_amount) order_coupon_reduce_amount, --订单减免金额(优惠券) sum(original_amount) order_original_amount, --订单单原始金额 sum(split_final_amount) order_final_amount, --订单总金额 0 payment_count, 0 payment_amount, 0 refund_order_count, 0 refund_order_num, 0 refund_order_amount, 0 refund_payment_count, 0 refund_payment_num, 0 refund_payment_amount, 0 coupon_get_count, 0 coupon_using_count, 0 coupon_used_count, 0 appraise_good_count, 0 appraise_mid_count, 0 appraise_bad_count, 0 appraise_default_count, array (named_struct('sku_id','-1', --skuid 'sku_num',cast(0 as bigint), --商品数量 'order_count',cast(0 as bigint), --下单次数 'activity_reduce_amount',cast(0.0 as decimal(16,2)), --活动优惠分摊 'coupon_reduce_amount',cast(0.0 as decimal(16,2)), --优惠券优惠分摊 'original_amount',cast(0.0 as decimal(16,2)), --原始价格 'final_amount',cast(0.0 as decimal(16,2)) --最终价格分摊 )) order_detail_stats from ${APP}.dwd_order_detail where dt <= '$do_date' group by user_id,dt union all select user_id, dt, 0 login_count, 0 cart_count, 0 favor_count, 0 order_count, 0 order_activity_count, 0 order_activity_reduce_amount, 0 order_coupon_count, 0 order_coupon_reduce_amount, 0 order_original_amount, 0 order_final_amount, count(*) payment_count, --支付次数 sum(payment_amount) payment_amount, --支付金额 0 refund_order_count, 0 refund_order_num, 0 refund_order_amount, 0 refund_payment_count, 0 refund_payment_num, 0 refund_payment_amount, 0 coupon_get_count, 0 coupon_using_count, 0 coupon_used_count, 0 appraise_good_count, 0 appraise_mid_count, 0 appraise_bad_count, 0 appraise_default_count, array (named_struct('sku_id','-1', --skuid 'sku_num',cast(0 as bigint), --商品数量 'order_count',cast(0 as bigint), --下单次数 'activity_reduce_amount',cast(0.0 as decimal(16,2)), --活动优惠分摊 'coupon_reduce_amount',cast(0.0 as decimal(16,2)), --优惠券优惠分摊 'original_amount',cast(0.0 as decimal(16,2)), --原始价格 'final_amount',cast(0.0 as decimal(16,2)) --最终价格分摊 )) order_detail_stats from ${APP}.dwd_payment_info where dt <= '$do_date' group by user_id,dt union all -- 退单次数: 用户申请退回的一笔订单的一种商品,算一次 select user_id, dt, 0 login_count, 0 cart_count, 0 favor_count, 0 order_count, 0 order_activity_count, 0 order_activity_reduce_amount, 0 order_coupon_count, 0 order_coupon_reduce_amount, 0 order_original_amount, 0 order_final_amount, 0 payment_count, 0 payment_amount, count(*) refund_order_count, --退单次数 sum(refund_num) refund_order_num, --退单件数 sum(refund_amount) refund_order_amount, --退单金额 0 refund_payment_count, 0 refund_payment_num, 0 refund_payment_amount, 0 coupon_get_count, 0 coupon_using_count, 0 coupon_used_count, 0 appraise_good_count, 0 appraise_mid_count, 0 appraise_bad_count, 0 appraise_default_count, array (named_struct('sku_id','-1', --skuid 'sku_num',cast(0 as bigint), --商品数量 'order_count',cast(0 as bigint), --下单次数 'activity_reduce_amount',cast(0.0 as decimal(16,2)), --活动优惠分摊 'coupon_reduce_amount',cast(0.0 as decimal(16,2)), --优惠券优惠分摊 'original_amount',cast(0.0 as decimal(16,2)), --原始价格 'final_amount',cast(0.0 as decimal(16,2)) --最终价格分摊 )) order_detail_stats from ${APP}.dwd_order_refund_info where dt <= '$do_date' group by user_id,dt union all select user_id, dt, 0 login_count, 0 cart_count, 0 favor_count, 0 order_count, 0 order_activity_count, 0 order_activity_reduce_amount, 0 order_coupon_count, 0 order_coupon_reduce_amount, 0 order_original_amount, 0 order_final_amount, 0 payment_count, 0 payment_amount, 0 refund_order_count, 0 refund_order_num, 0 refund_order_amount, count(*) refund_payment_count, --退款次数 sum(refund_num) refund_payment_num, --退款件数 sum(refund_amount) refund_payment_amount, --退款金额 0 coupon_get_count, 0 coupon_using_count, 0 coupon_used_count, 0 appraise_good_count, 0 appraise_mid_count, 0 appraise_bad_count, 0 appraise_default_count, array (named_struct('sku_id','-1', --skuid 'sku_num',cast(0 as bigint), --商品数量 'order_count',cast(0 as bigint), --下单次数 'activity_reduce_amount',cast(0.0 as decimal(16,2)), --活动优惠分摊 'coupon_reduce_amount',cast(0.0 as decimal(16,2)), --优惠券优惠分摊 'original_amount',cast(0.0 as decimal(16,2)), --原始价格 'final_amount',cast(0.0 as decimal(16,2)) --最终价格分摊 )) order_detail_stats from ( select user_id, dt, refund_amount, --退款金额 sku_id, order_id from ${APP}.dwd_refund_payment where dt <= '$do_date' ) t1 left join ( select refund_num, --退款件数 sku_id, order_id from ${APP}.dwd_order_refund_info where dt > date_sub('$do_date',15) ) t2 on t1.order_id = t2.order_id and t1.sku_id = t2.sku_id group by user_id,dt union all select user_id, dt, 0 login_count, 0 cart_count, 0 favor_count, 0 order_count, 0 order_activity_count, 0 order_activity_reduce_amount, 0 order_coupon_count, 0 order_coupon_reduce_amount, 0 order_original_amount, 0 order_final_amount, 0 payment_count, 0 payment_amount, 0 refund_order_count, 0 refund_order_num, 0 refund_order_amount, 0 refund_payment_count, 0 refund_payment_num, 0 refund_payment_amount, count(*) coupon_get_count, --优惠券领取次数 sum(if(date_format(using_time,'yyyy-MM-dd') = '$do_date',1,0 )) coupon_using_count, --优惠券使用(下单)次数 sum(if(date_format(used_time,'yyyy-MM-dd') = '$do_date',1,0 )) coupon_used_count, --优惠券使用(支付)次数 0 appraise_good_count, 0 appraise_mid_count, 0 appraise_bad_count, 0 appraise_default_count, array (named_struct('sku_id','-1', --skuid 'sku_num',cast(0 as bigint), --商品数量 'order_count',cast(0 as bigint), --下单次数 'activity_reduce_amount',cast(0.0 as decimal(16,2)), --活动优惠分摊 'coupon_reduce_amount',cast(0.0 as decimal(16,2)), --优惠券优惠分摊 'original_amount',cast(0.0 as decimal(16,2)), --原始价格 'final_amount',cast(0.0 as decimal(16,2)) --最终价格分摊 )) order_detail_stats from ${APP}.dwd_coupon_use where (dt = '$do_date' or dt='9999-99-99') and date_format(get_time,'yyyy-MM-dd')='$do_date' group by user_id,dt union all select user_id, dt, 0 login_count, 0 cart_count, 0 favor_count, 0 order_count, 0 order_activity_count, 0 order_activity_reduce_amount, 0 order_coupon_count, 0 order_coupon_reduce_amount, 0 order_original_amount, 0 order_final_amount, 0 payment_count, 0 payment_amount, 0 refund_order_count, 0 refund_order_num, 0 refund_order_amount, 0 refund_payment_count, 0 refund_payment_num, 0 refund_payment_amount, 0 coupon_get_count, 0 coupon_using_count, 0 coupon_used_count, sum( if (appraise = '1201',1,0)) appraise_good_count, --好评数 sum( if (appraise = '1202',1,0)) appraise_mid_count, --中评数 sum( if (appraise = '1203',1,0)) appraise_bad_count, --差评数 sum( if (appraise = '1204',1,0)) appraise_default_count, --默认评价数 array (named_struct('sku_id','-1', --skuid 'sku_num',cast(0 as bigint), --商品数量 'order_count',cast(0 as bigint), --下单次数 'activity_reduce_amount',cast(0.0 as decimal(16,2)), --活动优惠分摊 'coupon_reduce_amount',cast(0.0 as decimal(16,2)), --优惠券优惠分摊 'original_amount',cast(0.0 as decimal(16,2)), --原始价格 'final_amount',cast(0.0 as decimal(16,2)) --最终价格分摊 )) order_detail_stats from ${APP}.dwd_comment_info where dt <= '$do_date' group by user_id,dt ) tmp3 group by user_id,dt; " dws_activity_info_daycount=" insert overwrite table ${APP}.dws_activity_info_daycount partition (dt='$do_date') select t3.activity_rule_id, --活动id t3.activity_id, --活动规则id nvl(order_count,0), --参与某活动某规则下单次数 nvl(order_reduce_amount,0.0), --参与某活动某规则下单减免金额 nvl(order_original_amount,0.0), --参与某活动某规则下单原始金额 nvl(order_final_amount,0.0), --参与某活动某规则下单最终金额 nvl(payment_count,0), --参与某活动某规则支付次数 nvl(payment_reduce_amount,0.0), --参与某活动某规则支付减免金额 nvl(payment_amount,0.0) --参与某活动某规则支付金额 from ( -- 求理论上 可能产生行为的活动id和日期 select activity_id, --活动id activity_rule_id --活动规则id from ${APP}.dim_activity_rule_info where dt = '$do_date' and dt >= start_time and dt <= end_time --商城APP上线时间应该在活动时间范围内 ) t3 left join ( -- 实际上 产生行为的活动统计的指标 select activity_rule_id, activity_id, sum(order_count) order_count, --参与某活动某规则下单次数 sum(order_reduce_amount) order_reduce_amount, --参与某活动某规则下单减免金额 sum(order_original_amount) order_original_amount, --参与某活动某规则下单原始金额 sum(order_final_amount) order_final_amount, --参与某活动某规则下单最终金额 sum(payment_count) payment_count, --参与某活动某规则支付次数 sum(payment_reduce_amount) payment_reduce_amount, --参与某活动某规则支付减免金额 sum(payment_amount) payment_amount --参与某活动某规则支付金额 from ( select activity_rule_id, activity_id, 0 order_count, --参与某活动某规则下单次数 0 order_reduce_amount, --参与某活动某规则下单减免金额 0 order_original_amount, --参与某活动某规则下单原始金额 0 order_final_amount, --参与某活动某规则下单最终金额 count(*) payment_count, --参与某活动某规则支付次数 sum(split_activity_amount) payment_reduce_amount, --参与某活动某规则支付减免金额 sum(split_final_amount) payment_amount --参与某活动某规则支付金额 from ${APP}.dwd_order_detail where (dt = '$do_date' or dt=date_sub('$do_date',1)) and activity_rule_id is not null and order_id in ( --求被支付的订单详请 select order_id --订单id from ${APP}.dwd_payment_info where dt = '$do_date' -- dt是支付日期 ) group by activity_rule_id, activity_id union all select activity_rule_id, activity_id, count(*) order_count, --参与某活动某规则下单次数 sum(split_activity_amount) order_reduce_amount, --参与某活动某规则下单减免金额 sum(original_amount) order_original_amount, --参与某活动某规则下单原始金额 sum(split_final_amount) order_final_amount, --参与某活动某规则下单最终金额 0 payment_count, 0 payment_reduce_amount, 0 payment_amount from ${APP}.dwd_order_detail where dt = '$do_date' and activity_rule_id is not null --只取参与了活动的订单 group by activity_rule_id,activity_id ) tmp group by activity_rule_id,activity_id ) t4 on t3.activity_rule_id = t4.activity_rule_id; " dws_sku_action_daycount=" set hive.exec.dynamic.partition.mode=nonstrict; insert overwrite table ${APP}.dws_sku_action_daycount partition (dt) select sku_id, sum(order_count), sum(order_num), sum(order_activity_count), sum(order_coupon_count), sum(order_activity_reduce_amount), sum(order_coupon_reduce_amount), sum(order_original_amount), sum(order_final_amount), sum(payment_count), sum(payment_num), sum(payment_amount), sum(refund_order_count), sum(refund_order_num), sum(refund_order_amount), sum(refund_payment_count), sum(refund_payment_num), sum(refund_payment_amount), sum(cart_count), sum(favor_count), sum(appraise_good_count), sum(appraise_mid_count), sum(appraise_bad_count), sum(appraise_default_count), dt from ( select sku_id, dt, count(*) order_count, --被下单次数 sum(sku_num) order_num, --被下单件数 count(activity_rule_id) order_activity_count, --参与活动被下单次数 count(coupon_id) order_coupon_count, --使用优惠券被下单次数 sum(split_activity_amount) order_activity_reduce_amount, --优惠金额(活动) sum(split_coupon_amount) order_coupon_reduce_amount, --优惠金额(优惠券) sum(original_amount) order_original_amount, --被下单原价金额 sum(split_final_amount) order_final_amount, --被下单最终金额 0 payment_count, 0 payment_num, 0 payment_amount, 0 refund_order_count, 0 refund_order_num, 0 refund_order_amount, 0 refund_payment_count, 0 refund_payment_num, 0 refund_payment_amount, 0 cart_count, 0 favor_count, 0 appraise_good_count, 0 appraise_mid_count, 0 appraise_bad_count, 0 appraise_default_count from ${APP}.dwd_order_detail where dt = '$do_date' group by sku_id,dt union all select sku_id, dt, 0 order_count, 0 order_num, 0 order_activity_count, 0 order_coupon_count, 0 order_activity_reduce_amount, 0 order_coupon_reduce_amount, 0 order_original_amount, 0 order_final_amount, count(*) payment_count, --被支付次数 sum(sku_num) payment_num, --被支付件数 sum(split_final_amount) payment_amount, --被支付金额 0 refund_order_count, 0 refund_order_num, 0 refund_order_amount, 0 refund_payment_count, 0 refund_payment_num, 0 refund_payment_amount, 0 cart_count, 0 favor_count, 0 appraise_good_count, 0 appraise_mid_count, 0 appraise_bad_count, 0 appraise_default_count from ( select sku_id, split_final_amount, --被下单最终金额 order_id, --订单id sku_num --商品数量 from ${APP}.dwd_order_detail --订单事实表 -- 跨天支付的问题: $do_date日支付的订单,可能是 $do_date日下的单,也可能是 $do_date日下的单 where dt = '$do_date' or dt = date_sub('$do_date',1) -- dt是下单日期,不是支付日期 ) t1 join ( select order_id, dt from ${APP}.dwd_payment_info --支付事实表 where dt = '$do_date' -- dt是支付日期 ) t2 on t1.order_id = t2.order_id group by sku_id,dt union all select sku_id, dt, 0 order_count, 0 order_num, 0 order_activity_count, 0 order_coupon_count, 0 order_activity_reduce_amount, 0 order_coupon_reduce_amount, 0 order_original_amount, 0 order_final_amount, 0 payment_count, 0 payment_num, 0 payment_amount, count(*) refund_order_count, --被退单次数 sum(refund_num) refund_order_num, --被退单件数 sum(refund_amount) refund_order_amount, --被退单金额 0 refund_payment_count, 0 refund_payment_num, 0 refund_payment_amount, 0 cart_count, 0 favor_count, 0 appraise_good_count, 0 appraise_mid_count, 0 appraise_bad_count, 0 appraise_default_count from ${APP}.dwd_order_refund_info where dt = '$do_date' group by sku_id,dt union all select t1.sku_id, dt, 0 order_count, 0 order_num, 0 order_activity_count, 0 order_coupon_count, 0 order_activity_reduce_amount, 0 order_coupon_reduce_amount, 0 order_original_amount, 0 order_final_amount, 0 payment_count, 0 payment_num, 0 payment_amount, 0 refund_order_count, 0 refund_order_num, 0 refund_order_amount, count(*) refund_payment_count, --被退款次数 sum(refund_num) refund_payment_num, --被退款件数 sum(refund_amount) refund_payment_amount, --被退款金额 0 cart_count, 0 favor_count, 0 appraise_good_count, 0 appraise_mid_count, 0 appraise_bad_count, 0 appraise_default_count from ( select refund_amount, --退款金额 sku_id, order_id, dt from ${APP}.dwd_refund_payment --退款表:只统计退款完成的 where dt = '$do_date' ) t1 left join ( select refund_num, --退款件数 sku_id, order_id --退单表 from ${APP}.dwd_order_refund_info --事务型事实表 dt:退单的发起时间 where dt > date_sub('$do_date',15) ) t2 on t1.order_id = t2.order_id and t1.sku_id = t2.sku_id group by t1.sku_id,dt union all select item sku_id, dt, 0 order_count, 0 order_num, 0 order_activity_count, 0 order_coupon_count, 0 order_activity_reduce_amount, 0 order_coupon_reduce_amount, 0 order_original_amount, 0 order_final_amount, 0 payment_count, 0 payment_num, 0 payment_amount, 0 refund_order_count, 0 refund_order_num, 0 refund_order_amount, 0 refund_payment_count, 0 refund_payment_num, 0 refund_payment_amount, sum( if (action_id = 'cart_add',1,0)) cart_count, --被加入购物车次数 sum( if (action_id = 'favor_add',1,0)) favor_count, --被收藏次数 0 appraise_good_count, 0 appraise_mid_count, 0 appraise_bad_count, 0 appraise_default_count from ${APP}.dwd_action_log where dt = '$do_date' and action_id in ('cart_add', 'favor_add') group by item,dt union all select sku_id, dt, 0 order_count, 0 order_num, 0 order_activity_count, 0 order_coupon_count, 0 order_activity_reduce_amount, 0 order_coupon_reduce_amount, 0 order_original_amount, 0 order_final_amount, 0 payment_count, 0 payment_num, 0 payment_amount, 0 refund_order_count, 0 refund_order_num, 0 refund_order_amount, 0 refund_payment_count, 0 refund_payment_num, 0 refund_payment_amount, 0 cart_count, 0 favor_count, sum( if (appraise = '1201',1,0)) appraise_good_count, --好评数 sum( if (appraise = '1202',1,0)) appraise_mid_count, --中评数 sum( if (appraise = '1203',1,0)) appraise_bad_count, --差评数 sum( if (appraise = '1204',1,0)) appraise_default_count --默认评价数 from ${APP}.dwd_comment_info where dt = '$do_date' group by sku_id,dt ) tmp group by sku_id,dt; " dws_coupon_info_daycount=" insert overwrite table ${APP}.dws_coupon_info_daycount partition (dt = '$do_date') select t3.coupon_id, --优惠券id nvl(get_count,0), --领取次数 nvl(order_count,0), --使用(下单)次数 nvl(order_reduce_amount,0.0) , --使用某券的订单优惠金额 nvl(order_original_amount,0.0) , --使用某券的订单原价金额 nvl(order_final_amount,0.0) , --使用某券的订单总价金额 nvl(payment_count,0) , --使用(支付)次数 nvl(payment_reduce_amount,0.0) , --使用某券的支付优惠金额 nvl(payment_amount,0.0) , --使用某券支付的总金额 nvl(expire_count,0) --过期次数:为null,代表永久有效 from ( --理论上产生行为的 couponid和行为产生的日期 select id coupon_id --优惠券id from ${APP}.dim_coupon_info where dt = '$do_date' and dt >= nvl(start_time,create_time) and dt <= nvl(expire_time,'9999-99-99') ) t3 left join ( --实际上产生行为的 coupon的各种指标 select coupon_id, sum(get_count) get_count, --领取次数 sum(order_count) order_count, --使用(下单)次数 sum(order_reduce_amount) order_reduce_amount, --使用某券的订单优惠金额 sum(order_original_amount) order_original_amount, --使用某券的订单原价金额 sum(order_final_amount) order_final_amount, --使用某券的订单总价金额 sum(payment_count) payment_count, --使用(支付)次数 sum(payment_reduce_amount) payment_reduce_amount, --使用某券的支付优惠金额 sum(payment_amount) payment_amount, --使用某券支付的总金额 sum(expire_count) expire_count --过期次数 from ( select coupon_id, 0 get_count, 0 order_count, 0 expire_count, 0 payment_count, 0 order_reduce_amount, 0 order_original_amount, 0 order_final_amount, sum(split_coupon_amount) payment_reduce_amount, --使用某券的支付优惠金额 sum(split_final_amount) payment_amount --使用某券支付的总金额 from ${APP}.dwd_order_detail where (dt = '$do_date' or dt = date_sub('$do_date',1)) and coupon_id is not null and order_id in ( select order_id from ${APP}.dwd_payment_info where dt = '$do_date' ) group by coupon_id union all select coupon_id, 0 get_count, 0 order_count, 0 expire_count, 0 payment_count, sum(split_coupon_amount) order_reduce_amount, --使用某券的订单优惠金额 sum(original_amount) order_original_amount, --使用某券的订单原价金额 sum(split_final_amount) order_final_amount, --使用某券的订单总价金额 0 payment_reduce_amount, 0 payment_amount from ${APP}.dwd_order_detail where dt = '$do_date' and coupon_id is not null --只取使用了优惠券的订单 group by coupon_id union all select coupon_id, sum( if (date_format(get_time,'yyyy-MM-dd') = '$do_date',1,0)) get_count, --领取次数 sum( if (date_format(using_time,'yyyy-MM-dd') = '$do_date',1,0)) order_count, --使用(下单)次数 sum( if (date_format(used_time,'yyyy-MM-dd') = '$do_date',1,0)) payment_count, --使用(支付)次数 sum( if (date_format(expire_time,'yyyy-MM-dd') = '$do_date',1,0)) expire_count, ----过期次数 0 order_reduce_amount, 0 order_original_amount, 0 order_final_amount, 0 payment_reduce_amount, 0 payment_amount from ${APP}.dwd_coupon_use where dt = '$do_date' or dt = '9999-99-99' group by coupon_id ) tmp2 group by coupon_id ) t4 on t3.coupon_id = t4.coupon_id; " dws_area_stats_daycount=" insert overwrite table ${APP}.dws_area_stats_daycount partition (dt = '$do_date') select province_id, --地区编号 sum(visit_count), --访客访问次数 sum(login_count), --用户访问次数 sum(visitor_count), --访客人数 sum(user_count), --用户人数 sum(order_count), --下单次数 sum(order_original_amount), --下单原始金额 sum(order_final_amount), --下单最终金额 sum(payment_count), --支付次数 sum(payment_amount), --支付金额 sum(refund_order_count), --退单次数 sum(refund_order_amount), --退单金额 sum(refund_payment_count), --退款次数 sum(refund_payment_amount) --退款金额 from ( select id province_id, --地区编号 visit_count, --访客访问次数 login_count, --用户访问次数 visitor_count, --访客人数 user_count, --用户人数 0 order_count, 0 order_original_amount, 0 order_final_amount, 0 payment_count, 0 payment_amount, 0 refund_order_count, 0 refund_order_amount, 0 refund_payment_count, 0 refund_payment_amount from ( select area_code, --地区编号 count(*) visit_count, --访客访问次数 count(user_id) login_count, --用户访问次数 count(distinct mid_id) visitor_count, --访客人数 count(distinct user_id) user_count --用户人数 from ${APP}.dwd_page_log --页面日志表 where dt = '$do_date' and last_page_id is null group by area_code,dt ) t1 left join ${APP}.dim_base_province area on t1.area_code = area.area_code union all select province_id, --地区id 0 visit_count, 0 login_count, 0 visitor_count, 0 user_count, count(*) order_count, --下单次数 sum(original_amount) order_original_amount, --下单原始金额 sum(final_amount) order_final_amount, --下单最终金额 0 payment_count, 0 payment_amount, 0 refund_order_count, 0 refund_order_amount, 0 refund_payment_count, 0 refund_payment_amount from ${APP}.dwd_order_info where dt = '$do_date' or dt = '9999-99-99' group by province_id union all select province_id, 0 visit_count, 0 login_count, 0 visitor_count, 0 user_count, 0 order_count, 0 order_original_amount, 0 order_final_amount, count(*) payment_count, --支付次数 sum(payment_amount) payment_amount, --支付金额 0 refund_order_count, 0 refund_order_amount, 0 refund_payment_count, 0 refund_payment_amount from ${APP}.dwd_payment_info where dt = '$do_date' group by province_id union all select province_id, 0 visit_count, 0 login_count, 0 visitor_count, 0 user_count, 0 order_count, 0 order_original_amount, 0 order_final_amount, 0 payment_count, 0 payment_amount, count(*) refund_order_count, --退单次数 sum(refund_amount) refund_order_amount, --退单金额 0 refund_payment_count, 0 refund_payment_amount from ${APP}.dwd_order_refund_info where dt = '$do_date' group by province_id union all select province_id, 0 visit_count, 0 login_count, 0 visitor_count, 0 user_count, 0 order_count, 0 order_original_amount, 0 order_final_amount, 0 payment_count, 0 payment_amount, 0 refund_order_count, 0 refund_order_amount, count(*) refund_payment_count, --退款次数 sum(refund_amount) refund_payment_amount --退款金额 from ${APP}.dwd_refund_payment where dt = '$do_date' group by province_id ) tmp group by province_id; " case $1 in "dws_visitor_action_daycount" ) hive -e "$dws_visitor_action_daycount" ;; "dws_user_action_daycount" ) hive -e "$dws_user_action_daycount" ;; "dws_activity_info_daycount" ) hive -e "$dws_activity_info_daycount" ;; "dws_area_stats_daycount" ) hive -e "$dws_area_stats_daycount" ;; "dws_sku_action_daycount" ) hive -e "$dws_sku_action_daycount" ;; "dws_coupon_info_daycount" ) hive -e "$dws_coupon_info_daycount" ;; "all" ) hive -e "$dws_visitor_action_daycount$dws_user_action_daycount$dws_activity_info_daycount$dws_area_stats_daycount$dws_sku_action_daycount$dws_coupon_info_daycount" ;; esac
(2)增加执行权限
chmod +x dwd_to_dws.sh
2)脚本使用
(1)执行脚本
dwd_to_dws.sh all 2021-06-09
(2)查看数据是否导入成功