字符串(AC自动机):HDU 5129 Yong Zheng's Death

Yong Zheng's Death

Time Limit: 20000/10000 MS (Java/Others)    Memory Limit: 512000/512000 K (Java/Others)
Total Submission(s): 224    Accepted Submission(s): 37


Problem Description
Some Chinese emperors ended up with a mysterious death. Many historians like to do researches about this. For example, the 5th Qing dynasty emperor Yong Zheng's death is often argued among historians. Someone say that he was killed by the top assassin Lu Siniang whose family were almost wiped out by Yong Zheng. Someone say that Yong Zheng was poisoned to death because he liked to eat all kinds of Chinese medicines which were said to have the functions of prolonging human lives. Recently, a new document was discovered in Gu Gong(the Forbidden City). It is a secret document written by Yong Zheng's most trusted eunuch. It reads on the cover: "This document tells how Yong Zheng died. But I can't write this in plain text. Whether people will understand this is depend on Gods." Historians finally found out that there are totally n strings in the document making a set S = { s1,s2,... sn}. You can make some death ciphers from the set. A string is called a DEATH CIPHER if and only if it can be divide into two substrings u and v, and u and v are both a prefix of a string from S (Note that u and v can't be empty strings, and u and v can be prefixes of a same string or two different strings).

When all DEATH CIPHERs are put together, a readable article revealing Yong Zheng's death will appear.

Please help historians to figure out the number of different death ciphers.
 
Input
The input consists of no more than 20 test cases.

For each case, the first line contains an integer n(1 <= n <= 10000), indicating the number of strings in S.

The following n lines contain n strings, indicating the strings in S. Every string only consists of lower case letters and its length is between 1 and 30(inclusive).

The input ends by n = 0.
 
Output
For each test case, print a number denoting the number of death ciphers.
 
Sample Input
2
ab
ac
0
 
Sample Output
9
Hint
For the sample , the death ciphers are {aa, aba, aca, aab, aac, abac, acab, abab, acac}

  神题啊!在此我会介绍两种方法,都是构造法,构造是很跳脱的,这也是此题难点,写题解时有些小激动。

  考虑答案的构成,会是两个前缀,我们叫它们A,B;网上很多WA的代码,就是建trie后输出cnt²,当然是错的,考虑当前枚举的A+B答案串,可能有多断点可以使得断点左边是A,右边是B,比如数据:3 abc bc c,这里abc答案串会被枚举两次,所以错误的原因是算重复了。

  先建一个AC自动机。

 

两种解决思路:

  第一种是补集转换,用总的-不合法的,总的就是cnt²,不合法的我们来分析,下图是某个答案串的情形。

  这里已经假设A,B,C三个断点都是可以的,这表示[1~A],[1~B],[1~C],[A+1~len],[B+1~len],[C+1~len]都是前缀,总的方案数是3,不合法的方案数是2,现在来构造方法(我有三种方法):

  1.求中间没有红线的两头都是红线的段数(A~B,B~C)

  2.求左端点是最左边的红线的段数

  3.求右端点是最右边的红线的段数

  我们采用第一种方法,枚举前缀B,假设与某个A构成的A+B答案串中有当前B位置后面的断点,fail[B]处一定是最近的那个。

  p1,p2为合法分割,fail[x]=p2,所以当前枚举的前缀B,我们可以接的A前缀只需要满足后缀是[p1~p2]即可。(这句话很重要)

代码很好理解:

 1 #include <iostream>
 2 #include <cstring>
 3 #include <cstdio>
 4 #include <queue>
 5 using namespace std;
 6 const int N=300010;
 7 typedef long long LL;
 8 int fa[N],len[N],deg[N];
 9 int cnt,ch[N][26],fail[N];
10 LL sum[N];queue<int>q;
11 char s[N];
12 struct ACM{
13     void Init(){
14         memset(fa,0,sizeof(fa));
15         memset(sum,0,sizeof(sum));
16         memset(deg,0,sizeof(deg));
17         memset(fail,0,sizeof(fail));
18         memset(ch,0,sizeof(ch));cnt=0;
19     }
20     void Insert(char *s){
21         int l=strlen(s);
22         for(int i=0,p=0;i<l;i++){
23             int c=s[i]-'a';
24             if(ch[p][c])p=ch[p][c];
25             else{
26                 fa[++cnt]=p;
27                 len[cnt]=len[p]+1;
28                 p=ch[p][c]=cnt;
29             }
30         }
31     }
32     void Build(){
33         for(int i=0;i<26;i++)
34             if(ch[0][i])q.push(ch[0][i]);
35         while(!q.empty()){
36             int x=q.front();q.pop();
37             for(int i=0;i<26;i++)
38                 if(ch[x][i]){
39                     fail[ch[x][i]]=ch[fail[x]][i];
40                     q.push(ch[x][i]);
41                 }
42                 else ch[x][i]=ch[fail[x]][i];
43         }
44         for(int i=1;i<=cnt;i++)
45             sum[i]=1;
46         for(int i=1;i<=cnt;i++)
47             if(fail[i])deg[fail[i]]++;
48         for(int i=1;i<=cnt;i++)
49             if(!deg[i])q.push(i);
50         while(!q.empty()){
51             int x=q.front();q.pop();
52             sum[fail[x]]+=sum[x];
53             if(!--deg[fail[x]])
54                 q.push(fail[x]);
55         }        
56     }
57     LL Solve(){
58         LL ret=0;
59         for(int i=1;i<=cnt;i++){
60             int tmp=len[fail[i]],p=i;
61             while(tmp--)p=fa[p];
62             ret+=(fail[i]!=0)*(sum[p]-1);
63         }
64         return 1ll*cnt*cnt-ret;    
65     }
66 }ac;
67 int n;
68 int main(){
69     while(true){
70         scanf("%d",&n);
71         if(!n)break;ac.Init();
72         for(int i=1;i<=n;i++){
73             scanf("%s",s);
74             ac.Insert(s);
75         }
76         ac.Build();
77         printf("%lld
",ac.Solve());
78     }
79     return 0;
80 }

第二种方法是直接正着搞,DP出合法的即可(膜的poursoul大大的代码)

  这也是构造,我先跑A串,使其尽量长,直到没有某个转移,不得不跳fail时开始DP,先上图。

   有时候构造没有理由,不好理解不好发现,不过可以证明是对的。

  现在于x计数,后面接的B串都是上图中第一二行的形式(上面应有省略号),我用我的感受引入一个乱yy的名词,"潜力",这里枚举出的最左断点是fail[x],fail[fail[x]],fail[fail[fail[x]]]……最开始时1~fail[x]那么长的一段就是ch[x][i](注意这里跳了fail)状态的"潜力",枚举B串,如果不能转移就跳fail,最后"潜力"没了,没了就不能跳fail了,即不能匹配了。

  可以发现枚举的过程中"潜力"一直是尽可能的大,就是指断点在最左边。

  dp[i][x]表示原x(和这里的x无关)位置后面匹配i个长度,在状态x的方案数,发现是可以转移的。

  哎,讲不清讲不清,建议看代码,然后仔细地感受。

 1 #include <iostream>
 2 #include <cstring>
 3 #include <cstdio>
 4 #include <queue>
 5 using namespace std;
 6 const int N=300010,M=32;
 7 typedef long long LL;
 8 int ch[N][26],fail[N],fa[N],sln[N];
 9 int n,cnt;LL ans,dp[M][N];
10 char s[N];queue<int>q;
11 
12 struct AC{
13     void Init(){
14         memset(fa,0,sizeof(fa));
15         memset(ch,0,sizeof(ch));
16         memset(dp,0,sizeof(dp));
17         memset(sln,0,sizeof(sln));
18         memset(fail,0,sizeof(fail));
19         ans=cnt=0;
20     }
21     void Insert(char *s){
22         int len=strlen(s);
23         for(int i=0,p=0;i<len;i++){
24             int c=s[i]-'a';
25             if(ch[p][c])p=ch[p][c];
26             else{
27                 sln[++cnt]=sln[p]+1;
28                 fa[cnt]=p;p=ch[p][c]=cnt;
29             }
30         }
31     }
32     void Build(){
33         for(int i=0;i<26;i++)
34             if(ch[0][i])
35                 q.push(ch[0][i]);
36         
37         while(!q.empty()){
38             int x=q.front();q.pop();
39             if(fail[x]!=0)ans=ans+1;
40             for(int i=0;i<26;i++)
41                 if(ch[x][i]){
42                     fail[ch[x][i]]=ch[fail[x]][i];
43                     q.push(ch[x][i]);
44                 }
45                 else{
46                     ch[x][i]=ch[fail[x]][i];
47                     if(ch[x][i]){
48                         dp[1][ch[x][i]]+=1;
49                         ans+=1;
50                     }
51                 }
52         }    
53     }
54     void Solve(){
55         bool flag=1;
56         for(int i=2;flag;i++){
57             flag=0;
58             for(int j=1;j<=cnt;j++)if(dp[i-1][j]){
59                 for(int k=0,t;k<26;k++){
60                     if(sln[t=ch[j][k]]<i)continue;
61                     dp[i][t]+=dp[i-1][j];flag=1;
62                 }
63             }
64             for(int j=1;j<=cnt;j++)ans+=dp[i][j];
65         }
66         printf("%lld
",ans);
67     }
68 }ac;
69 int main(){
70     while(~scanf("%d",&n)&&n){
71         ac.Init();
72         for(int i=1;i<=n;i++){
73             scanf("%s",s);
74             ac.Insert(s);                
75         }
76         ac.Build();ac.Solve();
77     }
78     return 0;
79 }
原文地址:https://www.cnblogs.com/TenderRun/p/5964542.html