dws_user_action_daycount

每日会员行为

类似的有:每日商品行为

因为宽表要关联很多张表,但在hive中join操作会变得很慢

提供如下写法

检索多张表,采用union all + group by id 的形式累加数据,速度较快

drop table if exists dws_user_action_daycount; 
create external table dws_user_action_daycount 
( 
user_id string comment '用户 id', 
login_count bigint comment '登录次数', 
cart_count bigint comment '加入购物车次数', 
cart_amount double comment '加入购物车金额', 
order_count bigint comment '下单次数', 
order_amount decimal(16,2) comment '下单金额', 
payment_count bigint comment '支付次数', 
payment_amount decimal(16,2) comment '支付金额' 
) 
COMMENT '每日用户行为' 
PARTITIONED BY (`dt` string) 
stored as parquet 
location '/ecdw/dws/dws_user_action_daycount/' 
tblproperties ("parquet.compression"="lzo");


-- 插入
with tmp_login as 
( 
    select 
        user_id, 
        count(*) login_count 
    from dwd_start_log 
    where dt='2020-03-10' 
    and user_id is not null 
    group by user_id 
),

tmp_cart as ( 
    select 
        user_id, 
        count(*) cart_count,
        sum(cart_price*sku_num) cart_amount 
    from dwd_fact_cart_info 
    where dt='2020-03-10' 
    and user_id is not null 
    and date_format(create_time,'yyyy-MM-dd')='2020-03-10' 
    group by user_id 
),

tmp_order as 
(
    select 
        user_id, 
        count(*) order_count, 
        sum(final_total_amount) order_amount 
        from dwd_fact_order_info 
    where dt='2020-03-10' 
    and user_id is not null
    group by user_id 
) , 
tmp_payment as 
( 
    select 
        user_id, 
        count(*) payment_count, 
        sum(payment_amount) payment_amount 
    from dwd_fact_payment_info 
    where dt='2020-03-10'
    and user_id is not null
    group by user_id 
)

insert into table dws_user_action_daycount 
partition(dt='2020-03-10') 
select 
    user_actions.user_id, 
    sum(user_actions.login_count), 
    sum(user_actions.cart_count), 
    sum(user_actions.cart_amount), 
    sum(user_actions.order_count), 
    sum(user_actions.order_amount), 
    sum(user_actions.payment_count), 
    sum(user_actions.payment_amount) 
from 
( 
select 
    user_id, 
    login_count, 
    0 cart_count, 
    0 cart_amount, 
    0 order_count, 
    0 order_amount, 
    0 payment_count, 
    0 payment_amount 
from tmp_login 
union all 

select 
    user_id, 
    0 login_count, 
    cart_count, 
    cart_amount, 
    0 order_count, 
    0 order_amount, 
    0 payment_count, 
    0 payment_amount 
from tmp_cart
union all 

select 
    user_id, 
    0 login_count, 
    0 cart_count, 
    0 cart_amount, 
    order_count, 
    order_amount, 
    0 payment_count, 
    0 payment_amount 
from tmp_order 
union all 

select 
    user_id, 
    0 login_count, 
    0 cart_count, 
    0 cart_amount, 
    0 order_count, 
    0 order_amount, 
    payment_count, 
    payment_amount 
from tmp_payment 
) user_actions 
group by user_id;
原文地址:https://www.cnblogs.com/ldy233/p/14448579.html