hive 全表全字段对比

 1 select
 2     *
 3 from (
 4     select 
 5          max(t1.source) as source
 6         ,t1.c1
 7         ,t1.c2 
 8         ,t1.c3
 9         ,count(*) as cnt
10     from (
11         select 1 as source, t11.* from dev.astron_wangluochayi_1 t11
12         union all
13         select 2 as source, t12.* from dev.astron_wangluochayi_1 t12
14     ) t1
15     group by t1.source,t1.c1,t1.c2 ,t1.c3
16     having count(*) = 1
17 ) t2
18 where t2.source = 1
19 ;
-- 此算法可能是最快的,但只能找出是否一致,如不一致,比较难找出差异
select
     flag
    ,count(1) as cnt
from (
    select
        concat_ws('|',ifnull(id,''),ifnull(name,'')) as flag
    from (
        select 1 as id, 'a' as name union all
        select 2 as id, 'b' as name union all
        select 3 as id, 'c' as name
    ) t1
    union all
    select 
        concat_ws('|',ifnull(id,''),ifnull(name,'')) as flag
    from (
        select 2 as id, 'b' as name union all
        select 3 as id, 'c' as name union all
        select 4 as id, 'd' as name
    ) t2
) t3
group by flag
having count(1) <> 2
;
-- full join
-- row_number 支持无主键表

未完待续

原文地址:https://www.cnblogs.com/chenzechao/p/9444191.html