后缀数组的应用

height[i] 表示排名第i的后缀和排名第i-1的后缀的最长公共前缀,也即sa[i]与sa[i-1]的最长公共前缀。

1、给定一个字符串,询问任意两个后缀的最长公共前缀,假设询问suffix(i)和suffix(j)的最长公共前缀,且rank[i] < rank[j],那么suffix(i)和suffix(j)的最长公共前缀的

长度为height[rank[i]+1],height[rank[i]+2]....height[rank[j]]的最小值,即一次RMQ(rank[i]+1,rank[j]);

RMQ的初始化为O(nlogn),询问为O(1)

2、给定一个字符串,问最长的重复子串的长度是多少(子串可重叠),重复字串的长度可转化为某两个后缀的最长公共前缀,任意两个后缀的最长公共前缀

相当于height数组中某一段的最小值,那么这个值一定不大于height数组中的最大值,所以最长重复子串的长度就是height数组的最大值

时间复杂度为O(n)

3、给定一个字符串,问最长的重复子串的长度是多少(子串不可重叠)。重复子串的长度肯定在[0,n]之间,所以我们可以二分枚举可能的答案,然后判定是否可行。

设答案为k,那么将连续的height[i]>=k的分为一组,这样组内,任意两个后缀重复长度才会>=k,然后判断组内最大的sa[i]与最小的sa[i]的差值是否>=k

时间复杂度为O(nlogn)

poj1743

  1 #include <stdio.h>
  2 #include <string.h>
  3 const int N = 20000 + 10;
  4 int rank[N],wb[N],bucket[N],height[N],count[N];
  5 int max(const int &a, const int &b)
  6 {
  7     return a > b ? a : b;
  8 }
  9 int min(const int &a, const int &b)
 10 {
 11     return a < b ? a : b;
 12 }
 13 void build_sa(int *r, int *sa, int n, int m)
 14 {
 15     memset(wb,0,sizeof(0));
 16     memset(rank,0,sizeof(rank));
 17     int *x=rank,*y=wb,*t,i,k,p;
 18     for(i=0; i<m; ++i) count[i] = 0;
 19     for(i=0; i<n; ++i) count[x[i] = r[i]]++;
 20     for(i=1; i<m; ++i) count[i] += count[i-1];
 21     for(i=n-1; i>=0; --i) sa[--count[x[i]]] = i;
 22     for(k=1; k<=n; k<<=1)
 23     {
 24         for(p=0,i=n-k; i<n; ++i) y[p++] = i;
 25         for(i=0; i<n; ++i) if(sa[i]>=k) y[p++] = sa[i] - k;
 26         for(i=0; i<n; ++i) bucket[i] = x[y[i]];
 27         for(i=0; i<m; ++i) count[i] = 0;
 28         for(i=0; i<n; ++i) count[bucket[i]]++;
 29         for(i=1; i<m; ++i) count[i] += count[i-1];
 30         for(i=n-1; i>=0; --i) sa[--count[bucket[i]]] = y[i];
 31         t = x; x = y; y = t;
 32         p = 1; x[sa[0]] = 0;
 33         for(i=1; i<n; ++i)
 34             x[sa[i]] = y[sa[i-1]]==y[sa[i]] && y[sa[i-1]+k]==y[sa[i]+k] ? p-1:p++;
 35         m = p;
 36         
 37     }
 38 }
 39 void getHeight(int *r, int *sa, int n)
 40 {
 41     int i,j,k=0;
 42     for(i=1;i<=n; ++i) rank[sa[i]] = i;
 43     for(i=0; i<n;height[rank[i++]]=k)
 44         for(k?k--:0,j=sa[rank[i]-1];r[i+k]==r[j+k];k++);
 45         
 46 }
 47 
 48 bool check(int k, int *sa, int n)
 49 {
 50     int i,minStart = n+1,maxStart = -1;
 51     for(i=1; i<=n; ++i)
 52     {
 53         if(height[i] < k)
 54         {
 55             minStart = n+1;
 56             maxStart = -1;
 57         }
 58         else
 59         {
 60             minStart = min(minStart,min(sa[i],sa[i-1]));
 61             maxStart = max(maxStart,max(sa[i],sa[i-1]));
 62             if(maxStart - minStart >=k)
 63                 return true;
 64         }
 65     }
 66     return false;
 67 }
 68 int main()
 69 {
 70     int s[N],r[N],sa[N],n,i;
 71     while(scanf("%d",&n)!=EOF&&n)
 72     {
 73         memset(r,0,sizeof(r));
 74         for(i=0; i<n; ++i)
 75             scanf("%d",&s[i]);
 76         if(n<10)
 77         {
 78             printf("0
");
 79             continue;
 80         }
 81         for(i=1; i<n; ++i)
 82             r[i-1] = s[i] - s[i-1] + 88;
 83         r[n--] = 0;
 84         build_sa(r, sa, n+1, 200);
 85         getHeight(r, sa, n);
 86         
 87         int L = 4, R = n/2+1,ans=0;
 88         while(L<=R)
 89         {
 90             int mid = (L+R)/2;
 91             if(check(mid,sa,n))
 92             {
 93                 ans = mid;
 94                 L = mid + 1;
 95             }
 96             else
 97                 R = mid - 1;
 98         }
 99         if(ans>=4)
100             printf("%d
",ans+1);
101         else
102             printf("0
");
103     }
104     
105     return 0;
106 }
107 
108 
109 //二分答案是指答案不好直接确定,是一个判定性的问题,就可以在满足条件的范围内二分枚举可能的答案,然后就答案进行判定
View Code

4、给定一个字符串,问重复k次的最长重复子串的长度,重复子串的长度肯定在[0,n]之间,所以我们可以二分枚举可能的答案(设为len),然后判定是否可行

可行的因素是存在k个连续height[i]>=len。

时间复杂度为O(nlogn)

 poj3261

 1 #include <stdio.h>
 2 #include <string.h>
 3 const int N = 20000 + 10;
 4 int bucket[N],wa[N],wb[N],rank[N],height[N],sa[N],r[N];
 5 int count[N];
 6 
 7 void build_sa(int n, int m)
 8 {
 9     int *x=wa,*y=wb,*t,i,k,p;
10     for(i=0; i<m; ++i) count[i] = 0;
11     for(i=0; i<n; ++i) count[x[i] = r[i]]++;
12     for(i=1; i<m; ++i) count[i] += count[i-1];
13     for(i=n-1; i>=0; --i) sa[--count[x[i]]] = i;
14     for(k=1; k<=n; k<<=1)
15     {
16         for(p=0,i=n-k;i<n; ++i) y[p++] = i;
17         for(i=0; i<n; ++i) if(sa[i]>=k) y[p++] = sa[i] - k;
18         for(i=0; i<n; ++i) bucket[i] = x[y[i]];
19         for(i=0; i<m; ++i) count[i] = 0;
20         for(i=0; i<n; ++i) count[bucket[i]]++;
21         for(i=1; i<m; ++i) count[i] += count[i-1];
22         for(i=n-1; i>=0; --i) sa[--count[bucket[i]]] = y[i];
23         t = x; x = y; y = t;
24         p = 1; x[sa[0]] = 0;
25         for(i=1; i<n; ++i)
26             x[sa[i]] = y[sa[i-1]]==y[sa[i]] && y[sa[i-1]+k]==y[sa[i]+k] ? p-1:p++;
27         if(p>=n) break;
28         m = p;
29     }
30 }
31 void getHeight(int n)
32 {
33     int i,j,k=0;
34     for(i=0; i<=n; ++i) rank[sa[i]] = i;
35     for(i=0; i<n; height[rank[i++]]=k)
36         for(k?k--:0,j=sa[rank[i]-1];r[i+k]==r[j+k];k++);
37 }
38 
39 bool check(int len, int n, int k)
40 {
41     int cnt = 0,i;
42     for(i=1; i<=n; ++i)
43     {
44         if(height[i] < len)
45         {
46             cnt = 0;
47         }
48         else
49         {
50             cnt ++;
51             if(cnt == k)
52                 return true;
53         }
54     }
55     return false;
56 }
57 int main()
58 {
59     int n,k,i;
60     scanf("%d%d",&n,&k);
61     for(i=0; i<n; ++i)
62         scanf("%d",&r[i]);
63     r[n] = 0;
64     build_sa(n+1,N+1);
65     getHeight(n);
66     int ans = 0,L=0,R=n;
67     while(L<=R)
68     {
69         int mid = (L+R)>>1;
70         if(check(mid,n,k-1))
71         {
72             ans = mid;
73             L = mid+1;
74         }
75         else
76             R = mid - 1;
77     }
78     printf("%d
",ans);
79     return 0;
80 }
View Code

5、给定一个字符串,问最长的回文子串的长度,枚举每一位,然后计算以这一位为中心的最长回文子串,但是要注意奇数和偶数的情况。两种情况都可以转化为

求一个后缀和一个反过来写的后缀的最长公共前缀,即将aaab变为aaab1baaa0,求以i为中心最长回文串,当回文串长度为奇数时,计算suffix(i+1)和和suffix(n-i)的长度*2+1,即为回文串的长度,当回文串的长度为偶数时,计算suffix(i)和suffix(n-i)的长度*2,即为回文串的长度,枚举所有的i,即可计算出所有的回文字串的长度,选择最长的一个即可。

 ural1297

  1 #include <stdio.h>
  2 #include <string.h>
  3 const int N = 110000+110000 + 10;
  4 int rank[N],wa[N],wb[N],height[N],count[N],bucket[N],sa[N],r[N],dp[N][33];
  5 char str[N];
  6 
  7 int min(const int &a, const int &b)
  8 {
  9     return a < b ? a : b;
 10 }
 11 int max(const int &a, const int &b)
 12 {
 13     return a > b ? a : b;
 14 }
 15 void build_sa(int n, int m)
 16 {
 17     int *x=wa,*y=wb,*t,i,k,p;
 18     for(i=0; i<m; ++i) count[i] = 0;
 19     for(i=0; i<n; ++i) count[x[i]=r[i]]++;
 20     for(i=1; i<m; ++i) count[i] += count[i-1];
 21     for(i=n-1; i>=0; --i) sa[--count[x[i]]] = i;
 22     for(k=1; k<=n; k<<=1)
 23     {
 24         for(p=0,i=n-k;i<n; ++i) y[p++] = i;
 25         for(i=0; i<n; ++i) if(sa[i]>=k) y[p++] = sa[i] - k;
 26         for(i=0; i<n; ++i) bucket[i] = x[y[i]];
 27         for(i=0; i<m; ++i) count[i] = 0;
 28         for(i=0; i<n; ++i) count[bucket[i]]++;
 29         for(i=1; i<m; ++i) count[i] += count[i-1];
 30         for(i=n-1; i>=0; --i) sa[--count[bucket[i]]] = y[i];
 31         t = x; x = y; y = t;
 32         p = 1;x[sa[0]] = 0;
 33         for(i=1; i<n; ++i)
 34             x[sa[i]] = y[sa[i-1]]==y[sa[i]] && y[sa[i-1]+k]==y[sa[i]+k] ? p-1:p++;
 35         if(p>=n) break;
 36         m = p;//没这句就会runtime error,想不通
 37     }
 38 }
 39 void getHeight(int n)
 40 {
 41     int i,j,k=0;
 42     for(i=1; i<=n; ++i) rank[sa[i]] = i;
 43     for(i=0; i<n; height[rank[i++]]=k)
 44         for(k?k--:0,j=sa[rank[i]-1];r[i+k]==r[j+k];k++);
 45 }
 46 void RMQ_init(int n)
 47 {
 48     int i,j;
 49     for(i=1; i<=n; ++i) dp[i][0] = height[i];
 50     for(j=1; (1<<j)<=n; ++j)
 51         for(i=1; i+(1<<j)-1<=n; ++i)
 52             dp[i][j] = min(dp[i][j-1],dp[i+(1<<(j-1))][j-1]);
 53 }
 54 
 55 int lcp(int i, int j)
 56 {
 57     int L = rank[i] + 1,R = rank[j];
 58     if(rank[i] > rank[j])
 59     {
 60         L = rank[j]+1;
 61         R = rank[i];
 62     }
 63     int k =0;
 64     while(1<<(k+1)<=R-L+1) k++;
 65     return min(dp[L][k],dp[R-(1<<k)+1][k]);
 66 }
 67 int main()
 68 {
 69     int n,i,j,nn;
 70     while(scanf("%s",str)!=EOF)
 71     {
 72         nn = n = strlen(str);
 73     
 74         for(i=0; i<n; ++i)
 75             r[i] = str[i] ;
 76         j = n - 1;
 77         i = n + 1;
 78         r[n] = 1;
 79         //n = 2*n+1;
 80         r[n*2+1] = 0;
 81         
 82         for(;j>=0;--j,++i)
 83             r[i] = str[j];
 84         build_sa(2*n+2,200);
 85         getHeight(2*n+1);
 86         int maxLen = -1;
 87         int ret = -1;
 88         int index;
 89         RMQ_init(n*2+1);
 90 
 91         
 92         for(i=0; i<n; ++i)
 93         {
 94             j = 2 * n - i + 1 ;
 95             ret = lcp(i+1,j) * 2 + 1;
 96             if(ret > maxLen)
 97             {
 98                 maxLen = ret;
 99                 index = i+1;
100             }
101             if(i>0)
102             {
103                 ret = lcp(i,j) * 2;
104                 if(ret > maxLen)
105                 {
106                     maxLen = ret;
107                     index = i;
108                 }
109             }
110         }
111         printf("%d
",maxLen);
112     }
113     
114     /*
115     if(maxLen % 2 == 1)
116     {
117         for(i=index-maxLen/2-1;i<=index-1+maxLen/2;++i)
118             printf("%c",str[i]);
119     }
120     else
121     {
122         for(i=index-maxLen/2; i<=index-1+maxLen/2;++i)
123             printf("%c",str[i]);
124     }
125     puts("");*/
126         
127     
128 }
View Code

6、给定一个字符串,求不相同的子串的个数(distinct substrings),每个子串都是某个后缀的前缀,如果按照suffix(sa[1]),suffix(sa[2])...suffix(sa[n])的顺序计算,可以发现

对于加进来的每个后缀,它将产生n-sa[i]个前缀,但是其中后height[i]个前缀是重复的,所以对于suffix(sa[i])将贡献n-sa[i]+height[i]个子串,累加之后就是答案。

 spoj694

 1 #include <stdio.h>
 2 #include <string.h>
 3 const int N = 50000 + 10;
 4 int rank[N],sa[N],height[N],wa[N],wb[N],count[N],bucket[N],r[N];
 5 char str[N];
 6 
 7 void build_sa(int n, int m)
 8 {
 9     int *x=wa,*y=wb,*t,i,k,p;
10     for(i=0; i<m; ++i) count[i] = 0;
11     for(i=0; i<n; ++i) count[x[i] = r[i]]++;
12     for(i=1; i<m; ++i) count[i] += count[i-1];
13     for(i=n-1; i>=0; --i) sa[--count[x[i]]] = i;
14     for(k=1; k<=n; k<<=1)
15     {
16         for(p=0,i=n-k; i<n; ++i) y[p++] = i;
17         for(i=0; i<n; ++i) if(sa[i] >= k) y[p++] = sa[i] - k;
18         for(i=0; i<n; ++i) bucket[i] = x[y[i]];
19         for(i=0; i<m; ++i) count[i] = 0;
20         for(i=0; i<n; ++i) count[bucket[i]]++;
21         for(i=1; i<m; ++i) count[i] += count[i-1];
22         for(i=n-1; i>=0; --i) sa[--count[bucket[i]]] = y[i];
23         t = x; x = y; y = t;
24         p =1 ;x[sa[0]] = 0;
25         for(i=1; i<n; ++i)
26             x[sa[i]] = y[sa[i-1]]==y[sa[i]] && y[sa[i-1]+k]==y[sa[i]+k] ? p-1 : p++;
27         m = p;
28     }
29 }
30 void getHeight(int n)
31 {
32     int i,j,k = 0;
33     for(i=1; i<=n; ++i) rank[sa[i]] = i;
34     for(i=0; i<n; height[rank[i++]]=k)
35         for(k?k--:0,j=sa[rank[i]-1];r[i+k]==r[j+k];k++);
36 }
37 
38 int solve(int n)
39 {
40     int ans = 0,i;
41     for(i=1; i<=n; ++i)
42         ans += n - sa[i]  - height[i];
43     return ans;
44 }
45 int main()
46 {
47     int t,n,i;
48     scanf("%d",&t);
49     while(t--)
50     {
51         scanf("%s",str);
52         n = strlen(str);
53         for(i=0; i<n; ++i)
54             r[i] = str[i] ;
55         r[n] = 0;
56         build_sa(n+1,150);
57         getHeight(n);
58         printf("%d
",solve(n));
59     }
60     return 0;
61 }
View Code

以下是两个字符串相关的问题:

7、给定2个字符串,求2个字符串的最长公共子串(longest  common substrings),可转换为求2个字符串后缀的最长公共前缀,将两个字符串连起来,中间用个没出现过的字符分割

求出height数组后,扫描出height数组中的最大值即可,当然,要求组sa[i]属于一个串,sa[i-1]属于另一个串。

 poj2774

 1 #include <stdio.h>
 2 #include <string.h>
 3 const int N = 100000+100000 + 10;
 4 int rank[N],wa[N],wb[N],height[N],count[N],bucket[N],sa[N],r[N];
 5 char str[N];
 6 
 7 
 8 void build_sa(int n, int m)
 9 {
10     int *x=wa,*y=wb,*t,i,k,p;
11     for(i=0; i<m; ++i) count[i] = 0;
12     for(i=0; i<n; ++i) count[x[i]=r[i]]++;
13     for(i=1; i<m; ++i) count[i] += count[i-1];
14     for(i=n-1; i>=0; --i) sa[--count[x[i]]] = i;
15     for(k=1; k<=n; k<<=1)
16     {
17         for(p=0,i=n-k;i<n; ++i) y[p++] = i;
18         for(i=0; i<n; ++i) if(sa[i]>=k) y[p++] = sa[i] - k;
19         for(i=0; i<n; ++i) bucket[i] = x[y[i]];
20         for(i=0; i<m; ++i) count[i] = 0;
21         for(i=0; i<n; ++i) count[bucket[i]]++;
22         for(i=1; i<m; ++i) count[i] += count[i-1];
23         for(i=n-1; i>=0; --i) sa[--count[bucket[i]]] = y[i];
24         t = x; x = y; y = t;
25         p = 1;x[sa[0]] = 0;
26         for(i=1; i<n; ++i)
27             x[sa[i]] = y[sa[i-1]]==y[sa[i]] && y[sa[i-1]+k]==y[sa[i]+k] ? p-1:p++;
28         if(p>=n) break;
29         m = p;//没这句就会runtime error,想不通
30     }
31 }
32 void getHeight(int n)
33 {
34     int i,j,k=0;
35     for(i=1; i<=n; ++i) rank[sa[i]] = i;
36     for(i=0; i<n; height[rank[i++]]=k)
37         for(k?k--:0,j=sa[rank[i]-1];r[i+k]==r[j+k];k++);
38 }
39 
40 int solve(int n, int m)
41 {
42     int max = 0;
43     int i;
44     for(i=1; i<=n+m+1; ++i)
45     {
46         if(height[i] > max)
47         {
48             if(sa[i]>=n+1 && sa[i-1]<n)
49                 max = height[i];
50             else if(sa[i]<n && sa[i-1]>=n+1)
51                 max = height[i];
52         }
53     }
54     return max;
55 }
56 int main()
57 {
58     int n,m,j,i;
59     while(scanf("%s",str)!=EOF)
60     {
61         n = strlen(str);
62         for(i=0; i<n; ++i)
63             r[i] = str[i] - 'a' + 2;
64         r[n] = 1;
65         scanf("%s",str);
66         m = strlen(str);
67         for(j=0,i=n+1; i<=m+n; ++i,++j)
68             r[i] = str[j] - 'a' + 2;
69         r[n+m+1] = 0;
70         build_sa(n+m+2,30);
71         getHeight(n+m+1);
72         printf("%d
",solve(n,m));
73     }
74 }
View Code

8、给定2个字符串,求出两个字符串所有公共子串中长度大于k的子串的个数,将两个字符串连起来,中间用个没出现过的字符分割

求出height数组后,将height[i]>k的分为一组,然后组内进行扫描,但是要用一个单调栈来维护后缀值。

 poj3415

  1 /*
  2 长度不小于k的公共子串的个数
  3 将连续的height[i]>=k的分为一组
  4 */#include <stdio.h>
  5 #include <string.h>
  6 typedef __int64 LL;
  7 const int N = 200005;
  8 int rank[N],r[N],height[N],sa[N],wa[N],wb[N],count[N];
  9 char str[N];
 10 int stack[N][2];
 11 int k;
 12 void build_sa(int n, int m)
 13 {
 14     int *x=wa,*y=wb,*t,i,p,k;
 15     for(i=0; i<m; ++i) count[i] = 0;
 16     for(i=0; i<n; ++i) count[x[i] = r[i]] ++;
 17     for(i=1; i<m; ++i) count[i] += count[i-1];
 18     for(i=n-1; i>=0; --i) sa[--count[x[i]]] = i;
 19     for(k=1; k<=n; k<<=1)
 20     {
 21         for(p=0,i=n-k; i<n; ++i) y[p++] = i;
 22         for(i=0; i<n; ++i) if(sa[i]>=k) y[p++] = sa[i] - k;
 23         for(i=0; i<m; ++i) count[i] = 0;
 24         for(i=0; i<n; ++i) count[x[y[i]]]++;
 25         for(i=1; i<m; ++i) count[i] += count[i-1];
 26         for(i=n-1; i>=0; --i) sa[--count[x[y[i]]]] = y[i];
 27         t = x; x = y; y = t;
 28         p = 1; x[sa[0]] = 0;
 29         for(i=1; i<n; ++i)
 30             x[sa[i]] = y[sa[i-1]]==y[sa[i]] && y[sa[i-1]+k]==y[sa[i]+k]?p-1:p++;
 31         m = p;
 32     }
 33 }
 34 void getHeight(int n)
 35 {
 36     int i,j,k = 0;
 37     for(i=1; i<=n; ++i) rank[sa[i]] = i;
 38     for(i=0; i<n; height[rank[i++]]=k)
 39         for(k?k--:0,j=sa[rank[i]-1];r[i+k]==r[j+k];k++);
 40 
 41 }
 42 
 43 
 44 int main()
 45 {
 46     int n ,m, i,j;
 47     LL total,top,sum;
 48     while(scanf("%d",&k)!=EOF)
 49     {
 50         if(k==0)
 51             break;
 52         sum = 0;
 53         scanf("%s",str);
 54         n = strlen(str);
 55         for(i=0; i<n; ++i)
 56             r[i] = str[i] ;
 57         r[n] = 1;
 58         scanf("%s",str);
 59         m = strlen(str);
 60         for(i=n+1,j=0;j<m; ++j,++i)
 61             r[i] = str[j] ;
 62         r[n+m+1] = 0;
 63         build_sa(n+m+2,130);
 64         getHeight(n+m+1);
 65         total = 0,top = 0;
 66         for(i=1; i<=n+m+1; ++i)
 67         {
 68             if(height[i] < k) total = 0,top = 0;
 69             else
 70             {
 71                 int cnt = 0;
 72                 if(sa[i-1] < n) cnt++,total += height[i] - k + 1;
 73                 while(top>0 && height[i]<=stack[top-1][0])
 74                 {
 75                     top--;
 76                     total -= (stack[top][0] - height[i]) * stack[top][1];
 77                     cnt += stack[top][1];
 78                 }
 79                 stack[top][0] = height[i];stack[top++][1] = cnt;
 80                 if(sa[i] > n) sum += total;    
 81             }
 82         }
 83         total = 0,top = 0;
 84         for(i=1; i<=n+m+1; ++i)
 85         {
 86             if(height[i] < k) total = 0,top = 0;
 87             else
 88             {
 89                 int cnt = 0;
 90                 if(sa[i-1] > n) cnt++,total += height[i] - k + 1;
 91                 while(top>0 && height[i]<=stack[top-1][0])
 92                 {
 93                     top--;
 94                     total -= (stack[top][0] - height[i]) * stack[top][1];
 95                     cnt += stack[top][1];
 96                 }
 97                 stack[top][0] = height[i];stack[top++][1] = cnt;
 98                 if(sa[i] < n) sum += total;    
 99             }
100         }
101         printf("%I64d
",sum);
102     }
103 }
View Code

9、给定n个字符串,求出最长公共子串,要求这个子串至少出现在k(k<=n)个字符串中,求出height数组后,二分答案,然后将height分组,然后判断组内的后缀是否属于k个以上的原字符串

 poj3294

  1 /*
  2 给定n个字符串,求最长子串的长度,要求该子串至少出现在k个字符串中
  3 二分答案,找到一组height[i]>=k且组中有k个以上的后缀属于不同的字符串
  4 */
  5 #include <stdio.h>
  6 #include <string.h>
  7 const int N = 200000+10;
  8 int rank[N],r[N],height[N],sa[N],wa[N],wb[N],count[N],mark[N],vis[111],index[111];
  9 char str[N];
 10 int k,c,cc;
 11 void build_sa(int n, int m)
 12 {
 13     int *x=wa,*y=wb,*t,i,p,k;
 14     for(i=0; i<m; ++i) 
 15         count[i] = 0;
 16 
 17     for(i=0; i<n; ++i) 
 18         count[x[i] = r[i]] ++;
 19     for(i=1; i<m; ++i) count[i] += count[i-1];
 20     for(i=n-1; i>=0; --i) sa[--count[x[i]]] = i;
 21     for(k=1; k<=n; k<<=1)
 22     {
 23         for(p=0,i=n-k; i<n; ++i) y[p++] = i;
 24         for(i=0; i<n; ++i) if(sa[i]>=k) y[p++] = sa[i] - k;
 25         for(i=0; i<m; ++i) count[i] = 0;
 26         for(i=0; i<n; ++i) count[x[y[i]]]++;
 27         for(i=1; i<m; ++i) count[i] += count[i-1];
 28         for(i=n-1; i>=0; --i) sa[--count[x[y[i]]]] = y[i];
 29         t = x; x = y; y = t;
 30         p = 1; x[sa[0]] = 0;
 31         for(i=1; i<n; ++i)
 32             x[sa[i]] = y[sa[i-1]]==y[sa[i]] && y[sa[i-1]+k]==y[sa[i]+k]?p-1:p++;
 33         m = p;
 34     }
 35 }
 36 void getHeight(int n)
 37 {
 38     int i,j,k = 0;
 39     for(i=1; i<=n; ++i) rank[sa[i]] = i;
 40     for(i=0; i<n; height[rank[i++]]=k)
 41         for(k?k--:0,j=sa[rank[i]-1];r[i+k]==r[j+k];k++);
 42 
 43 }
 44 bool check(int n, int len)
 45 {
 46     memset(vis,0,sizeof(vis));
 47     int i;
 48     int cnt = 0;
 49     bool flag = false;
 50     c = 0;
 51     bool first = true;
 52     for(i=1; i<=n; ++i)
 53     {
 54         if(height[i] < len)
 55         {
 56             memset(vis,0,sizeof(vis));
 57             first = true;
 58             cnt = 0;
 59         }
 60         else
 61         {
 62             if(!vis[mark[sa[i-1]]])
 63                 vis[mark[sa[i-1]]]++,cnt++;
 64             if(!vis[mark[sa[i]]])
 65                 vis[mark[sa[i]]]++,cnt++;
 66             if(cnt>=k)
 67             {
 68                 if(first)
 69                     index[c++] = sa[i-1];
 70                 first = false;
 71                 flag = true;
 72             }    
 73         }
 74     }
 75     if(flag)    
 76     {
 77         cc = c;
 78         return true;
 79     }    
 80     
 81     return false;
 82 }
 83 int solve(int n)
 84 {
 85     int l = 0,r = n,mid;
 86     int ans = 0,i;
 87     while(l<=r)
 88     {
 89         mid = (l + r) >> 1;
 90         if(check(n,mid))
 91         {
 92             ans = mid;
 93             l = mid + 1;
 94         }
 95         else
 96             r = mid - 1;
 97     }
 98     return ans;
 99 }
100 int main()
101 {
102     freopen("in.txt","r",stdin);
103     int n,m,i,j,len;
104     int t = 0;
105     while(scanf("%d",&n),n)
106     {
107         memset(height,0,sizeof(height));
108         len = c = cc = 0;
109         memset(mark,0,sizeof(mark));
110         for(j=1; j<=n; ++j)
111         {
112             scanf("%s",str);
113             m = strlen(str);
114             for(i=0;i<m; ++i)
115             {
116                 r[len] = str[i] + 100;
117                 mark[len++] = j;
118             }
119             r[len++] = n-j;
120         }
121         build_sa(len,300);
122         getHeight(len-1);
123         k = n/2+1;
124         t++;
125         if(t!=1)
126             puts("");
127         int max = solve(len);
128         if(max!=0)
129         {
130             for(i=0; i<cc; ++i)
131             {
132                 for(j=0; j<max; ++j)
133                     printf("%c",r[j+index[i]]-100);
134                 puts("");
135             }    
136         }
137         else
138             puts("?");
139         
140     }
141 }
View Code

10、每个字符串中至少出现2次且不重叠的最长子串,求出height数组中,二分答案,然后将height分组,然后判断是否符号要求,怎么判断,根据上面的经验就可以知道了,不多说了。

spoj220

  1 /*http://www.spoj.com/problems/PHRASES/
  2 给定n个字符串,求最长子串的长度,要求该子串至少出现在k个字符串中
  3 二分答案,找到一组height[i]>=k且组中有k个以上的后缀属于不同的字符串
  4 */
  5 #include <stdio.h>
  6 #include <string.h>
  7 const int N = 200000+10;
  8 const int INF = 1<<30;
  9 int rank[N],r[N],height[N],sa[N],wa[N],wb[N],count[N],mark[N],idx[N][11],cnt[11];
 10 char str[N];
 11 int m;
 12 void build_sa(int n, int m)
 13 {
 14     int *x=wa,*y=wb,*t,i,p,k;
 15     for(i=0; i<m; ++i) 
 16         count[i] = 0;
 17 
 18     for(i=0; i<n; ++i) 
 19         count[x[i] = r[i]] ++;
 20     for(i=1; i<m; ++i) count[i] += count[i-1];
 21     for(i=n-1; i>=0; --i) sa[--count[x[i]]] = i;
 22     for(k=1; k<=n; k<<=1)
 23     {
 24         for(p=0,i=n-k; i<n; ++i) y[p++] = i;
 25         for(i=0; i<n; ++i) if(sa[i]>=k) y[p++] = sa[i] - k;
 26         for(i=0; i<m; ++i) count[i] = 0;
 27         for(i=0; i<n; ++i) count[x[y[i]]]++;
 28         for(i=1; i<m; ++i) count[i] += count[i-1];
 29         for(i=n-1; i>=0; --i) sa[--count[x[y[i]]]] = y[i];
 30         t = x; x = y; y = t;
 31         p = 1; x[sa[0]] = 0;
 32         for(i=1; i<n; ++i)
 33             x[sa[i]] = y[sa[i-1]]==y[sa[i]] && y[sa[i-1]+k]==y[sa[i]+k]?p-1:p++;
 34         m = p;
 35     }
 36 }
 37 void getHeight(int n)
 38 {
 39     int i,j,k = 0;
 40     for(i=1; i<=n; ++i) rank[sa[i]] = i;
 41     for(i=0; i<n; height[rank[i++]]=k)
 42         for(k?k--:0,j=sa[rank[i]-1];r[i+k]==r[j+k];k++);
 43 
 44 }
 45 
 46 bool check(int n, int len)
 47 {
 48     int i,j,k,mn,mx;
 49     int is_ok = 0;
 50     memset(cnt,0,sizeof(cnt));
 51     for(k=1; k<=n; ++k)
 52     {
 53         if(height[k] < len)
 54         {
 55             
 56             for(i=1; i<=m; ++i)
 57             {
 58                 mn = INF;mx = 0;
 59                 for(j=0; j<cnt[i]; ++j)
 60                 {
 61                     if(idx[j][i] > mx)
 62                         mx = idx[j][i];
 63                     if(idx[j][i] < mn)
 64                         mn = idx[j][i];
 65                 }
 66                 if(mx - mn >= len)
 67                     is_ok++;
 68                     
 69             }
 70             if(is_ok==m)
 71                 break;
 72             is_ok = 0;
 73             memset(cnt,0,sizeof(cnt));
 74         }
 75         else
 76         {
 77             idx[cnt[mark[sa[k]]]++][mark[sa[k]]] = sa[k];
 78             idx[cnt[mark[sa[k-1]]]++][mark[sa[k-1]]] = sa[k-1];
 79             
 80         }
 81     }
 82     if(is_ok==m)
 83         return true;
 84     return false;
 85 }
 86 int solve(int n)
 87 {
 88     int l = 0,r = n, mid;
 89     int ans = 0;
 90     while(l<=r)
 91     {
 92         mid = (l+r)>>1;
 93         if(check(n,mid))
 94         {
 95             ans = mid;
 96             l = mid + 1;
 97         }
 98         else
 99             r = mid - 1;
100     }
101     return ans;
102 }
103 int main()
104 {
105     
106     int t,n,i,j,len;
107     scanf("%d",&t);
108     while(t--)
109     {
110         n = 0;
111         memset(mark,0,sizeof(mark));
112         scanf("%d",&m);
113         for(i=1; i<=m; ++i)
114         {
115             scanf("%s",str);
116             len = strlen(str);
117             for(j=0; j<len; ++j)
118             {
119                 r[n] = str[j];
120                 mark[n++] = i;
121             }
122             r[n++] = m - i;
123         }
124         build_sa(n,130);
125         getHeight(n-1);
126         printf("%d
",solve(n));
127     }
128 }
View Code
原文地址:https://www.cnblogs.com/justPassBy/p/4084661.html