(高难度SQL)从产品表中找出相同前缀 (都云作者痴 谁解其中味)

--期盼值 找出AA,3;PDST,3;QPL-,3;TP-,2;

--基本表
create table tb_product(
    id number(9,0) primary key,
    name nvarchar2(20) not null);
--基本表充值   
insert into tb_product(id,name) values('1','AA');
insert into tb_product(id,name) values('2','AA款');
insert into tb_product(id,name) values('3','AA屏风');
insert into tb_product(id,name) values('4','PDST');
insert into tb_product(id,name) values('5','PDST款');
insert into tb_product(id,name) values('6','PDST-TJ');
insert into tb_product(id,name) values('7','QPL-TJ');
insert into tb_product(id,name) values('8','QPL-1');
insert into tb_product(id,name) values('9','QPL-2');
insert into tb_product(id,name) values('10','TP-1');
insert into tb_product(id,name) values('11','TP-2');
--序列表
create table tb_seq(
    id number(9,0) primary key);
--序列表充值
insert into tb_seq
select rownum from dual
connect by level<10
order by dbms_random.random;

--求所有名称及其长度
select name,length(name) as len from tb_product

--求最大长度
select max(len) as maxlen from (select name,length(name) as len from tb_product)

--求截取序列
select id from tb_seq where id<=(select max(len) as maxlen from (select name,length(name) as len from tb_product)) order by id

--tb_product表和截取序列表求笛卡儿积
select p.name,length(p.name) as namelen,seq.id as cutlen from tb_product p,(select id from tb_seq where id<=(select max(len) as maxlen from (select name,length(name) as len from tb_product)) order by id) seq

--看name长度和length的关系
select (case when a.namelen<a.cutlen then 'extra' else to_char(substr(a.name,1,a.cutlen)) end) as sery from
(select p.name,length(p.name) as namelen,seq.id as cutlen from tb_product p,(select id from tb_seq where id<=(select max(len) as maxlen from (select name,length(name) as len from tb_product)) order by id) seq) a

--清除掉extra
select b.sery from
(select (case when a.namelen<a.cutlen then 'extra' else to_char(substr(a.name,1,a.cutlen)) end) as sery from
(select p.name,length(p.name) as namelen,seq.id as cutlen from tb_product p,(select id from tb_seq where id<=(select max(len) as maxlen from (select name,length(name) as len from tb_product)) order by id) seq) a) b
where b.sery<>'extra' 

--求种类个数
select c.sery,count(*) as cnt from 
(select b.sery from
(select (case when a.namelen<a.cutlen then 'extra' else to_char(substr(a.name,1,a.cutlen)) end) as sery from
(select p.name,length(p.name) as namelen,seq.id as cutlen from tb_product p,(select id from tb_seq where id<=(select max(len) as maxlen from (select name,length(name) as len from tb_product)) order by id) seq) a) b
where b.sery<>'extra') c
group by c.sery

--按种类个数排序
select d.sery,d.cnt from
(select c.sery,count(*) as cnt from 
(select b.sery from
(select (case when a.namelen<a.cutlen then 'extra' else to_char(substr(a.name,1,a.cutlen)) end) as sery from
(select p.name,length(p.name) as namelen,seq.id as cutlen from tb_product p,(select id from tb_seq where id<=(select max(len) as maxlen from (select name,length(name) as len from tb_product)) order by id) seq) a) b
where b.sery<>'extra') c
group by c.sery) d
where length(d.sery)>1 and d.cnt>1
order by d.cnt desc,d.sery

--SQL实在太长了,为了简化,将上面的结果放入新表
create table tb_tmp1 as select d.sery,d.cnt from
(select c.sery,count(*) as cnt from 
(select b.sery from
(select (case when a.namelen<a.cutlen then 'extra' else to_char(substr(a.name,1,a.cutlen)) end) as sery from
(select p.name,length(p.name) as namelen,seq.id as cutlen from tb_product p,(select id from tb_seq where id<=(select max(len) as maxlen from (select name,length(name) as len from tb_product)) order by id) seq) a) b
where b.sery<>'extra') c
group by c.sery) d
where length(d.sery)>1 and d.cnt>1
order by d.cnt desc,d.sery

--看看结果
select * from tb_tmp1

--把唯一的sery值做成字符串
select listagg(sery,',') within group(order by sery) from tb_tmp1 

--查出的结果只能在唯一sery序列中出现一次
select *
from tb_tmp1
where LENGTH(REGEXP_REPLACE(REPLACE((select listagg(sery,',') within group(order by sery) from tb_tmp1 ), sery, '@'),  '[^@]+', ''))=1

--最后结果与最开始的预期(AA,3;PDST,3;QPL-,3;TP-,2;)一致
SQL> select *
  2  from tb_tmp1
  3  where LENGTH(REGEXP_REPLACE(REPLACE((select listagg(sery,',') within group(order by sery) from tb_tmp1 ), sery, '@'),  '[^@]+', ''))=1;

SERY
----------------------------------------------------------------------------------------------------
       CNT
----------
AA
         3

PDST
         3

QPL-
         3

TP-
         2

用带变量循环函数类的程序语言弄出来不难,但全靠SQL做出来也别有一番风味。

--2020年4月15日8:40 到 2020年4月16日 0:28--

原文地址:https://www.cnblogs.com/heyang78/p/12709690.html