hive 抽样方法

select *
from (
select *
from advert.dws_advert_order_model_sample_pcvr_v2_di
where dt>= date_sub('${date}',7) and dt< '${date}'
and ((label >=0.5 and rand()>(1-2*0.05)) or (label <0.5 and rand()>(1-2*0.05)))

union all

select *
from advert.dws_advert_order_model_sample_pcvr_v2_di
where dt='${date}'
and ((label >=0.5 and rand()>(1-2*0.05)) or (label <0.5 and rand()>(1-2*0.05)))
)  tmp

where rand()>0.3;

原文地址:https://www.cnblogs.com/zhangbojiangfeng/p/7278377.html