ACdream1430SETI(后缀自动机)

 问题:

Amateur astronomers Tom and Bob try to find radio broadcasts of extraterrestrial civilizations in the air. Recently they received some strange signal and represented it as a word consisting of small letters of the English alphabet. Now they wish to decode the signal. But they do not know what to start with.
      They think that the extraterrestrial message consists of words, but they cannot identify them. Tom and Bob call a subword of the message a potential word if it has at least two non-overlapping occurrences in the message.

      For example, if the message is “abacabacaba”, “abac” is a potential word, but “acaba” is not because two of its occurrences overlap.
      Given a message m help Tom and Bob to find the number of potential words in it.

Input

      Input file contains one string that consists of small letters of the English alphabet. The length of the message doesn’t exceed 10 000.

Output

      Output one integer number — the number of potential words in a message.

Sample Input

abacabacaba

Sample Output

15

题意:

求字符串里的不相交重复字串的个数和。

思路;

后缀自动机:记录每个状态的最先出现和最后一次出现的位置,就可以判断是否出现了多次,以及是否相交。(116ms)

后缀数组:。。。

#include<cstdio>
#include<cstdlib>
#include<cstring>
#include<iostream>
#include<memory>
#include<algorithm>
#include<vector>
using namespace std;
const int maxn=401000;
char chr[maxn];
int Head[maxn],Lates[maxn],Now;
struct SAM
{
    int sz,Last,ch[maxn][26],slink[maxn],maxlen[maxn],ans;
    int c[maxn],pos[maxn],num[maxn];
    void init()
    {
          Last=sz=1;Head[1]=Lates[1]=0;ans=0;
          memset(ch[1],0,sizeof(ch[1]));
          memset(Lates,0,sizeof(Lates));
          memset(Head,0,sizeof(Head));
    }
    void add(int x)
    {
          int np=++sz,p=Last;Last=np;num[np]=1;
          Head[np]=Now;Lates[np]=Now; memset(ch[np],0,sizeof(ch[np]));
          maxlen[np]=maxlen[p]+1;
          while(p&&!ch[p][x]) ch[p][x]=np,p=slink[p];
          if(!p) slink[np]=1;
          else {                
                int q=ch[p][x];
                if(maxlen[q]==maxlen[p]+1)  slink[np]=q;
                else {                
                    int nq=++sz; num[nq]=0;
                    memcpy(ch[nq],ch[q],sizeof(ch[q])); Head[nq]=Head[q];Lates[nq]=Lates[q];
                    slink[nq]=slink[q],slink[np]=slink[q]=nq;
                    maxlen[nq]=maxlen[p]+1;
                    while(p&&ch[p][x]==q) ch[p][x]=nq,p=slink[p];
                }
         }
         while(np>1) Lates[np]=Now,np=slink[np];
    }
    void solve()
    { 
        for(int i=1;i<=sz;i++) {
            int dis=Lates[i]-Head[i];
            int mi=min(dis,maxlen[i]);
            if(mi>=maxlen[slink[i]]) ans+=mi-maxlen[slink[i]];
        }
        printf("%d
",ans);
    }
};
SAM sam;
int main()
{
    sam.init(); int l;
    scanf("%s",chr);  l=strlen(chr);
    for(Now=0;Now<l;Now++) sam.add(chr[Now]-'a'); sam.solve();     
    return 0;
}

 也可以刷新完了再跟新最后一次出现的位置(拓扑排序)(4ms)(优化一下居然时间排名第一了。。。)

#include<cstdio>
#include<cstdlib>
#include<cstring>
#include<iostream>
#include<memory>
#include<algorithm>
#include<vector>
using namespace std;
const int maxn=401000;
char chr[maxn];
int Head[maxn],Lates[maxn],Now;
struct SAM
{
    int sz,Last,ch[maxn][26],slink[maxn],maxlen[maxn],ans;
    int c[maxn],pos[maxn],num[maxn];
    void init()
    {
          Last=sz=1;Head[1]=Lates[1]=0;ans=0;
          memset(ch[1],0,sizeof(ch[1]));
          memset(Lates,0,sizeof(Lates));
          memset(Head,0,sizeof(Head));
    }
    void add(int x)
    {
          int np=++sz,p=Last;Last=np;num[np]=1;
          Head[np]=Now;Lates[np]=Now; memset(ch[np],0,sizeof(ch[np]));
          maxlen[np]=maxlen[p]+1;
          while(p&&!ch[p][x]) ch[p][x]=np,p=slink[p];
          if(!p) slink[np]=1;
          else {                
                int q=ch[p][x];
                if(maxlen[q]==maxlen[p]+1)  slink[np]=q;
                else {                
                    int nq=++sz; num[nq]=0;
                    memcpy(ch[nq],ch[q],sizeof(ch[q])); Head[nq]=Head[q];Lates[nq]=Lates[q];
                    slink[nq]=slink[q],slink[np]=slink[q]=nq;
                    maxlen[nq]=maxlen[p]+1;
                    while(p&&ch[p][x]==q) ch[p][x]=nq,p=slink[p];
                }
         }
         //while(np>1) Lates[np]=Now,np=slink[np];
    }
    void sort()
    {
          for(int i=0;i<=sz;i++) c[i]=0;
          for(int i=1;i<=sz;i++) c[maxlen[i]]++;
          for(int i=1;i<=sz;i++) c[i]+=c[i-1];
          for(int i=1;i<=sz;i++) pos[c[maxlen[i]]--]=i;
          for(int i=sz;i>=1;i--) Lates[slink[pos[i]]]=max(Lates[pos[i]],Lates[slink[pos[i]]]);    
    }
    void solve()
    {   ans=0;
        for(int i=2;i<=sz;i++) {
            int dis=Lates[i]-Head[i];
            int mi=min(dis,maxlen[i]);
            if(mi>=maxlen[slink[i]]) ans+=mi-maxlen[slink[i]];
        }
        printf("%d
",ans);
    }
};
SAM sam;
int main()
{
    sam.init();int l;
    scanf("%s",chr);l=strlen(chr);
    for(Now=0;Now<l;Now++) sam.add(chr[Now]-'a'); 
    sam.sort();sam.solve();     
    return 0;
}
View Code
原文地址:https://www.cnblogs.com/hua-dong/p/8034565.html