删除表中重复的行数据

#如下测试表b，如何删除重复值字段：

SQL> select * from b1;

ID NAME

---------- ----------

1 a

1 b

2 a

1 a
2 a

#小结：重复值多：则重建表更好、重复值少，表大，则delete更好

#方法一：

group by 分组，找到有效数据，创建一个新表，数据插入，随后删除原表，随后rename 该名称：

SQL> create table b2 as select * from b1 group by id,name;

SQL> select * from b2;

1 a 1 b 2 a

SQL> drop table b1 purge;

SQL> alter table b2 rename to b1;

#方法二：

找出不符合规则的数据，delete删除

SQL> select rowid,id,name from b1 where (id,name) in(select id,name from b1 having(count(*))>1 group by id,name);

ROWID                      ID NAME
------------------ ---------- ----------
AAAV3EAAEAAAAKTAAA          1 a
AAAV3EAAEAAAAKTAAB          1 a
AAAV3EAAEAAAAKTAAD          2 a
AAAV3EAAEAAAAKWAAE          1 a
AAAV3EAAEAAAAKWAAF          2 a

#本想通过rownum,最后还是选择rowid 唯一

#查询符合条件的ROWID：重复行数据中，最小的rowid的value

SQL> select min(rowid) from b1 group by id,name having(count(*))>1;

MIN(ROWID) ------------------

AAAV3EAAEAAAAKTAAA

AAAV3EAAEAAAAKTAAD

#删除语法：因为如果表没有任何列是唯一的，那么最好直接使用rowid,稳定唯一：

找到所有重复记录的value, 排除重复记录中，最小的rowid 保留一行

SQL> delete from b1

where rowid in

(select rowid from b1 where (id,name)

in(select id,name from b1 having(count(*))>1 group by id,name))

and rowid not in

(select min(rowid) from b1 having(count(*))>1 group by id,name);

3 rows deleted.

SQL> select * from b1;

ID NAME ---------- ----------

1 a 1 b 2 a

SQL> commit;