CSU1632Repeated Substrings(后缀数组/最长公共前缀)

题意就是求一个字符串的重复出现(出现次数>=2)的不同子串的个数。

标准解法是后缀数组、最长公共前缀的应用,对于样例aabaab,先将所有后缀排序:

      aab

3    aabaab

1    ab

2    abaab

0    b

1    baab

每个后缀前面数字代表这个后缀与它之前的后缀(rank比它小1)的最长公共前缀的长度:然而就可以这样理解这个最长公共前缀LCP、aabaab与aab的最长公共前缀是3,那说明子串a、aa、aab都至少出现的两次,那么这就是后缀aab重复出现的子串个数

然后我们考虑后缀ab与后缀aabaab的最长公共前缀=1,这时由于LCP(ab, aabaab) <= LCP(aabaab, aab),所以就说明ab与aabaab的所有公共前缀(只有LCP(ab, aabaab) = 1个)之前都已经计算过了,所以我们直接跳过

之后我们考虑后缀abaab与后缀ab的最长公共前缀LCP(abaab, ab) > LCP(ab, aabaab),同时,由于LCP(ab, aabaab) != 0,所以考虑abaab与ab的两个公共前缀(重复出现的子串)a、ab时,已经有了LCP(ab, aabaab) = 1个公共前缀(重复出现的子串)a已经计算过了,所以这时新的重复出现的子串的个数为LCP(abaab, ab) - LCP(ab, aabaab) = 1

最后总结起来就是:

    if(height[i] <= height[i - 1]) continue;

    if(height[i] > height[i - 1] ) ans += height[i] - height[i - 1]

这就是最后的答案。

  1 #include <map>
  2 #include <set>
  3 #include <stack>
  4 #include <queue>
  5 #include <cmath>
  6 #include <ctime>
  7 #include <vector>
  8 #include <cstdio>
  9 #include <cctype>
 10 #include <cstring>
 11 #include <cstdlib>
 12 #include <iostream>
 13 #include <algorithm>
 14 using namespace std;
 15 #define INF 0x3f3f3f3f
 16 #define inf (-((LL)1<<40))
 17 #define lson k<<1, L, (L + R)>>1
 18 #define rson k<<1|1,  ((L + R)>>1) + 1, R
 19 #define mem0(a) memset(a,0,sizeof(a))
 20 #define mem1(a) memset(a,-1,sizeof(a))
 21 #define mem(a, b) memset(a, b, sizeof(a))
 22 #define FIN freopen("in.txt", "r", stdin)
 23 #define FOUT freopen("out.txt", "w", stdout)
 24 #define rep(i, a, b) for(int i = a; i <= b; i ++)
 25 #define dec(i, a, b) for(int i = a; i >= b; i --)
 26   
 27 template<class T> T CMP_MIN(T a, T b) { return a < b; }
 28 template<class T> T CMP_MAX(T a, T b) { return a > b; }
 29 template<class T> T MAX(T a, T b) { return a > b ? a : b; }
 30 template<class T> T MIN(T a, T b) { return a < b ? a : b; }
 31 template<class T> T GCD(T a, T b) { return b ? GCD(b, a%b) : a; }
 32 template<class T> T LCM(T a, T b) { return a / GCD(a,b) * b;    }
 33   
 34 //typedef __int64 LL;
 35 typedef long long LL;
 36 const int MAXN = 110000;
 37 const int MAXM = 110000;
 38 const double eps = 1e-4;
 39 LL MOD = 1000000007;
 40   
 41 struct SufArray {
 42     char s[MAXN];
 43     int sa[MAXN], t[MAXN], t2[MAXN], c[MAXN], n, m;
 44     int rnk[MAXN], height[MAXN];
 45     int mi[MAXN][20], idxK[MAXN];
 46   
 47     void init() {
 48         mem0(s);
 49         mem0(height);
 50     }
 51     void read_str() {
 52         gets(s);
 53         m = 128;
 54         n = strlen(s);
 55         s[n++] = ' ';
 56     }
 57     void build_sa() {
 58         int *x = t, *y = t2;
 59         rep (i, 0, m - 1) c[i] = 0;
 60         rep (i, 0, n - 1) c[x[i] = s[i]] ++;
 61         rep (i, 1, m - 1) c[i] += c[i - 1];
 62         dec (i, n - 1, 0) sa[--c[x[i]]] = i;
 63         for(int k = 1; k <= n; k <<= 1) {
 64             int p = 0;
 65             rep (i, n - k, n - 1) y[p++] = i;
 66             rep (i, 0, n - 1) if(sa[i] >= k) y[p++] = sa[i] - k;
 67             rep (i, 0, m - 1) c[i] = 0;
 68             rep (i, 0, n - 1) c[x[y[i]]] ++;
 69             rep (i, 0, m - 1) c[i] += c[i - 1];
 70             dec (i, n - 1, 0) sa[--c[x[y[i]]]] = y[i];
 71             swap(x, y);
 72             p = 1;
 73             x[sa[0]] = 0;
 74             rep (i, 1, n - 1) {
 75                 x[sa[i]] = y[sa[i - 1]] == y[sa[i]] && y[sa[i - 1] + k] == y[sa[i] + k] ? p - 1 : p++;
 76             }
 77             if(p >= n) break;
 78             m = p;
 79         }
 80     }
 81     void get_height() {
 82         int k = 0;
 83         rep (i, 0, n - 1) rnk[sa[i]] = i;
 84         rep (i, 0, n - 1) {
 85             if(k) k --;
 86             int j = sa[rnk[i] - 1];
 87             while(s[i + k] == s[j + k]) k ++;
 88             height[rnk[i]] = k;
 89         }
 90     }
 91     void rmq_init(int *a, int n) {
 92         rep (i, 0, n - 1) mi[i][0] = a[i];
 93         for(int j = 1; (1 << j) <= n; j ++) {
 94             for(int i = 0; i + (1<<j) - 1 < n; i ++) {
 95                 mi[i][j] = min(mi[i][j - 1], mi[i + (1 << (j - 1))][j - 1]);
 96             }
 97         }
 98         rep (len, 1, n) {
 99             idxK[len] = 0;
100             while((1 << (idxK[len] + 1)) <= len) idxK[len] ++;
101         }
102     }
103     int rmq_min(int l, int r) {
104         int len = r - l + 1, k = idxK[len];
105         return min(mi[l][k], mi[r - (1 << k) + 1][k]);
106     }
107     void lcp_init() {
108         get_height();
109         rmq_init(height, n);
110     }
111     int get_lcp(int a, int b) {
112         if(a == b) return n - a - 1;
113         return rmq_min(min(rnk[a], rnk[b]) + 1, max(rnk[a], rnk[b]));
114     }
115     void solve() {
116         get_height();
117         LL ans = 0, pre = 0;
118         rep (i, 1, n - 1) {
119             if(height[i] > pre) ans += height[i] - pre;
120             pre = height[i];
121         }
122         cout << ans << endl;
123     }
124 };
125   
126 int T;
127 SufArray sa;
128   
129 int main()
130 {
131     while(~scanf("%d%*c", &T)) while(T--){
132         sa.init();
133         sa.read_str();
134         sa.build_sa();
135         sa.solve();
136     }
137     return 0;
138 }
139 /**************************************************************
140     Problem: 1632
141     User: csust_Rush
142     Language: C++
143     Result: Accepted
144     Time:880 ms
145     Memory:13192 kb
146 ****************************************************************/
原文地址:https://www.cnblogs.com/gj-Acit/p/4527637.html