每个县里都抽100个户且至少来自3个镇

需求:每县100户,且至少来自3个镇。若是县里没有3个镇,则取所有镇,但只取100条,若是县里所有镇不足100户,则取所有户。

方法1:花了4个小时

分析: 少于3个镇(不含)的县 --6个

         少于100户(含)  的县 --2个

         总共多少个县        --26个 select count(distinct county_pac),count(distinct town_pac) from tw表 where stat_time=20180826 ;

select county_pac,count(distinct town_pac) 镇个数,count(1) 户个数 from tw表 where stat_time=20180826 group by county_pac order by 2       

COUNTY_PAC  镇个数  户个数

1  460204000000 1 535

2 460210000000 1 1030

3 460404000000 1 4

4 460202000000 1 87

5 469033000000 1 350

6 460203000000 2 628

7 460108000000 4 515

8 460106000000 4 473

9 460105000000 5 1506

-----1.先找出县至少有3个镇 ,且每个镇至少有 35户

drop table cs_thz_1;

create table cs_thz_1 as

select * from (   select t.*,row_number() over (partition by county_pac order by residence_id ) rid2 from (     select residence_id,city_name_s,county_name_s,town_name_s,country_name_s,city_pac,county_pac,town_pac,country_pac           

,row_number() over (partition by town_pac order by dbms_random.value) fam_rid           

,count(1) over (partition by town_pac) fam_cnt             

,count(distinct town_pac) over (partition by county_pac) town_cnt       

from tw表 where stat_time=20180826  

) t where town_cnt>=3 and fam_cnt>=35 and fam_rid<=35--大于15户的镇,每个镇取15个 ) where rid2<=100 ;

select count(distinct county_pac),count(1) from cs_thz_1 --已插入20个县,还有6个县需要单独处理

-----2.再找出县=3个镇 ,且至少一个镇< 35户-----这个没有,若有,每个镇先选1个,再随机选97个

-----3.再找出县=2个镇 ------------------------              每个镇先选1个,再随机选98个

-----4.再找出县=1个镇 ------------------------              每个镇先选1个,再随机选99个

select county_pac,count(distinct town_pac) 镇个数,count(1) 户个数 from tw表 t where stat_time=20180826 and not exists( select 1 from cs_thz_1 a where a.county_pac=t.county_pac ) group by county_pac order by 2

--2

insert into cs_thz_1(residence_id,city_name_s,county_name_s,town_name_s,country_name_s,city_pac,county_pac,town_pac,country_pac) select residence_id,city_name_s,county_name_s,town_name_s,country_name_s,city_pac,county_pac,town_pac,country_pac from (   select residence_id,city_name_s,county_name_s,town_name_s,country_name_s,city_pac,county_pac,town_pac,country_pac          ,row_number() over (partition by town_pac order by dbms_random.value) fam_rid    from tw表 t where stat_time=20180826   and not exists( select 1 from cs_thz_1 a where a.county_pac=t.county_pac ) ) where fam_rid=1;

--3

insert into cs_thz_1(residence_id,city_name_s,county_name_s,town_name_s,country_name_s,city_pac,county_pac,town_pac,country_pac) select residence_id,city_name_s,county_name_s,town_name_s,country_name_s,city_pac,county_pac,town_pac,country_pac from (   select residence_id,city_name_s,county_name_s,town_name_s,country_name_s,city_pac,county_pac,town_pac,country_pac          ,row_number() over (partition by county_pac order by dbms_random.value) fam_rid    from tw表 t where stat_time=20180826   and exists( select county_pac from cs_thz_1 a where a.county_pac=t.county_pac group by a.county_pac having count(1)<100 and count(distinct town_pac)=2 )   and not exists ( select * from cs_thz_1 a where a.residence_id=t.residence_id ) ) where fam_rid<=98;

--4

insert into cs_thz_1(residence_id,city_name_s,county_name_s,town_name_s,country_name_s,city_pac,county_pac,town_pac,country_pac) select residence_id,city_name_s,county_name_s,town_name_s,country_name_s,city_pac,county_pac,town_pac,country_pac from (   select residence_id,city_name_s,county_name_s,town_name_s,country_name_s,city_pac,county_pac,town_pac,country_pac          ,row_number() over (partition by county_pac order by dbms_random.value) fam_rid    from tw表 t where stat_time=20180826   and exists( select county_pac from cs_thz_1 a where a.county_pac=t.county_pac group by a.county_pac having count(1)<100 and count(distinct town_pac)=1 )   and not exists ( select * from cs_thz_1 a where a.residence_id=t.residence_id ) ) where fam_rid<=99;

-----5.验证:24个县都能取100条(其中 460203000000只涉及2个镇),1个县取4条(涉及1个镇),1个县取87条(涉及1个镇)--共2491条

select county_pac,count(distinct town_pac) town_cnt,count(1) fam_cnt  from cs_thz_1 group by county_pac order by 3,2 ;

select * from cs_thz_1 order by county_pac,town_pac select count(1) from cs_thz_1;

方法2:在方法1的基础上又花了2个小时

---1.每个县 的每个镇先拿一条,但只插3个镇(每个镇至少有1条,否则不会入tw表),若不足3个镇(应该要2个),也这样弄 ---2.剔除上面的记录,再每个县随机拿(100-已拿条数)条 drop table cs_thz_2; create table cs_thz_2 as with tmp_3_town as (   select residence_id,city_name_s,county_name_s,town_name_s,country_name_s,city_pac,county_pac,town_pac,country_pac from (     select residence_id,city_name_s,county_name_s,town_name_s,country_name_s,city_pac,county_pac,town_pac,country_pac           ,row_number() over (partition by county_pac order by residence_id) fam_rid2     from (       select residence_id,city_name_s,county_name_s,town_name_s,country_name_s,city_pac,county_pac,town_pac,country_pac              ,row_number() over (partition by town_pac order by dbms_random.value) fam_rid        from tw表 where stat_time=20180826     ) where fam_rid=1   ) where fam_rid2<=3 ) select residence_id,city_name_s,county_name_s,town_name_s,country_name_s,city_pac,county_pac,town_pac,country_pac from tmp_3_town union all select residence_id,city_name_s,county_name_s,town_name_s,country_name_s,city_pac,county_pac,town_pac,country_pac from (   select residence_id,city_name_s,county_name_s,town_name_s,country_name_s,city_pac,county_pac,town_pac,country_pac          ,row_number() over (partition by county_pac order by dbms_random.value) fam_rid          ,b.fam_cnt as yi_cnt   from tw表 a   ,(select county_pac as t_county_pac,count(1) fam_cnt from tmp_3_town group by county_pac ) b   where a.stat_time=20180826 and a.county_pac=b.t_county_pac and not exists(     select * from tmp_3_town t where t.residence_id=a.residence_id   ) ) t1 where t1.fam_rid<=(100-yi_cnt) ;

-----3.验证:24个县都能取100条(其中 460203000000 只涉及2个镇),1个县取4条(涉及1个镇),1个县取87条(涉及1个镇)--共2491条      COUNTY_PAC  镇个数  户个数 1  460204000000 1 535 2 460210000000 1 1030 3 460404000000 1 4 4 460202000000 1 87 5 469033000000 1 350 6 460203000000 2 628 7 460108000000 4 515 8 460106000000 4 473 9 460105000000 5 1506 10 469001000000 7 6846

select county_pac,count(distinct town_pac) town_cnt,count(1) fam_cnt  from cs_thz_2 group by county_pac order by 3,2 ;

select count(distinct county_pac),count(1) from cs_thz_2 ;--26 2424

-----4.结果取数一样 select county_pac,count(distinct town_pac) town_cnt,count(1) fam_cnt  from cs_thz_1 group by county_pac order by 3,2 ;

原文地址:https://www.cnblogs.com/jiangqingfeng/p/9544049.html