SPOJ Distinct Substrings【后缀数组】

Given a string, we need to find the total number of its distinct substrings.

Input

T- number of test cases. T<=20;
Each test case consists of one string, whose length is <= 1000

Output

For each test case output one number saying the number of distinct substrings.

Example

Sample Input:
2
CCCCC
ABABA

Sample Output:
5
9

Explanation for the testcase with string ABABA: 
len=1 : A,B
len=2 : AB,BA
len=3 : ABA,BAB
len=4 : ABAB,BABA
len=5 : ABABA
Thus, total number of distinct substrings is 9.

题意:

求一个字符串中所有的不相同的子串个数。

思路:

一个字符串的所有子串个数是n*(n+1)/2,关键就是有多少是重复的。

一个子串其实就是原字符串某个后缀的前缀,找到有多少重复的其实就是找后缀的lcp

比如我们遍历到了第k个后缀,本来他可以产生len个前缀,但是他的前面有一部分前缀是已经算过的了。

而且由于 LCP(i,j)=min{LCP(k-1,k)|i+1≤k≤j} ,且对 i≤j<k,LCP(j,k)≥LCP(i,k)

每加入一个后缀,应该减去max(LCP(i,j)), 也就是LCP(i-1,i),也就是height[i]

 1 #include <iostream>
 2 #include <set>
 3 #include <cmath>
 4 #include <stdio.h>
 5 #include <cstring>
 6 #include <algorithm>
 7 #include <vector>
 8 #include <queue>
 9 #include <map>
10 using namespace std;
11 typedef long long LL;
12 #define inf 0x7f7f7f7f
13 
14 const int maxn = 1005;
15 int t;
16 char str[maxn];
17 
18 int sa[maxn];
19 int t1[maxn], t2[maxn], c[maxn];
20 int rnk[maxn], height[maxn];
21 
22 void build_sa(int s[], int n, int m)
23 {
24     int i, j, p, *x = t1, *y = t2;
25     for(i = 0; i < m; i++)c[i] = 0;
26     for(i = 0; i < n; i++)c[x[i] = s[i]]++;
27     for(i = 1; i < m; i++)c[i] += c[i - 1];
28     for(i = n - 1; i >= 0; i--)sa[--c[x[i]]] = i;
29     for(j = 1; j <= n; j <<= 1){
30         p = 0;
31         for(i = n - j; i < n; i++)y[p++] = i;
32         for(i = 0; i < n; i++)if(sa[i] >= j)y[p++] = sa[i] - j;
33         for(i = 0; i < m; i++)c[i] = 0;
34         for(i = 0; i < n; i++)c[x[y[i]]]++;
35         for(i = 1; i < m; i++)c[i] += c[i - 1];
36         for(i = n - 1; i >= 0; i--)sa[--c[x[y[i]]]] = y[i];
37         swap(x, y);
38         p = 1;
39         x[sa[0]] = 0;
40         for(i = 1; i < n; i++)
41             x[sa[i]] = y[sa[i - 1]] == y[sa[i]] && y[sa[i - 1] + j] == y[sa[i] + j] ? p - 1:p++;
42         if(p >= n)break;
43         m = p;
44     }
45 }
46 
47 void get_height(int s[], int n)
48 {
49     int i, j, k = 0;
50     for(i = 0; i <= n; i++)rnk[sa[i]] = i;
51     for(i = 0; i < n; i++){
52         if(k) k--;
53         j = sa[rnk[i] - 1];
54         while(s[i + k] == s[j + k])k++;
55         height[rnk[i]] = k;
56     }
57 }
58 
59 int s[maxn];
60 int main()
61 {
62     scanf("%d", &t);
63     while(t--){
64         scanf("%s", str);
65         int n = strlen(str);
66         for(int i = 0; i <= n; i++)s[i] = str[i];
67         build_sa(s, n + 1, 200);
68         get_height(s, n);
69         LL ans = n * (n + 1) / 2;
70         //cout<<ans<<endl;
71         for(int i = 1; i <= n; i++){
72             ans -= height[i];
73         }
74         printf("%lld
", ans);
75 
76     }
77     return 0;
78 }
原文地址:https://www.cnblogs.com/wyboooo/p/9865280.html