POJ3693 Maximum repetition substring

Maximum repetition substring
Time Limit: 1000MS   Memory Limit: 65536K
Total Submissions: 11130   Accepted: 3431

Description

The repetition number of a string is defined as the maximum number R such that the string can be partitioned into R same consecutive substrings. For example, the repetition number of "ababab" is 3 and "ababa" is 1.

Given a string containing lowercase letters, you are to find a substring of it with maximum repetition number.

Input

The input consists of multiple test cases. Each test case contains exactly one line, which
gives a non-empty string consisting of lowercase letters. The length of the string will not be greater than 100,000.

The last test case is followed by a line containing a '#'.

Output

For each test case, print a line containing the test case number( beginning with 1) followed by the substring of maximum repetition number. If there are multiple substrings of maximum repetition number, print the lexicographically smallest one.

Sample Input

ccabababc
daabbccaa
#

Sample Output

Case 1: ababab
Case 2: aa

Source

 

【题解】

枚举循环节长度L,不难发现对于1,l+L,1+2L,1+3l,.......,l+kL<=n这些位置上的字符,一定存在相邻字符在两个相邻长度为L的循环节里。注意,这里仅仅说存在,没有说存在几个,可能这两个字符代表的长度上有多个重叠的不同循环节。但我们只需求出最大可行的循环节个数及其对应的循环节长度即可。

对于相邻的1 + kL, 1 + kL + L,求LCP,若L | LCP,则L满足要求,循环节个数为LCP / L + 1,长度为L;否则左移L - k%L,再求LCP,重复上述过程。

只需证明存在循环节长度为L的子串一定能被找到,比较显然,留给大家了

找的时候记录下所有可行的最大循环节个数的循环节长度,还需证明最大循环节个数所有的循环节长度都能被找到,相当于枚举循环节长度命中的位置,找到的是尽可能大的循环节个数,这也是显然的。

于是这个算法正确性就是显然的(显然了老半天我才显然出十分显然的证明来。。。。)

求出最大可行的循环节个数及其对应的循环节长度,枚举所有sa[i]和可行长度,判断即可

 

  1 #include <iostream>
  2 #include <cstdio>
  3 #include <cstring>
  4 #include <cstdlib>
  5 #include <algorithm>
  6 #include <queue>
  7 #include <vector>
  8 #include <cmath> 
  9 #define min(a, b) ((a) < (b) ? (a) : (b))
 10 #define max(a, b) ((a) > (b) ? (a) : (b))
 11 #define abs(a) ((a) < 0 ? (-1 * (a)) : (a))
 12 template <class T>
 13 inline void swap(T& a, T& b)
 14 {
 15     T tmp = a;a = b;b = tmp;
 16 }
 17 inline void read(int &x)
 18 {
 19     x = 0;char ch = getchar(), c = ch;
 20     while(ch < '0' || ch > '9') c = ch, ch = getchar();
 21     while(ch <= '9' && ch >= '0') x = x * 10 + ch - '0', ch = getchar();
 22     if(c == '-') x = -x;
 23 }
 24 
 25 const int INF = 0x3f3f3f3f;
 26 const int MAXN = 1000000 + 10;
 27 
 28 int pow2[30], lo2[MAXN];
 29 
 30 struct SuffixArray
 31 {
 32     char s[MAXN];int sa[MAXN], rank[MAXN], height[MAXN], t1[MAXN], t2[MAXN], n, c[MAXN];
 33     
 34     int stmi[MAXN][30];
 35     
 36     void clear(){n = 0;memset(sa, 0, sizeof(sa));}
 37     void build_sa(int m)
 38     {
 39         int i, *x = t1, *y = t2;
 40         for(i = 0;i <= m;++ i) c[i] = 0;
 41         for(i = 1;i <= n;++ i) ++ c[x[i] = s[i]];
 42         for(i = 1;i <= m;++ i) c[i] += c[i - 1];
 43         for(i = n;i;-- i) sa[c[x[i]] --] = i;
 44         for(int k = 1;k <= n;k <<= 1)
 45         {
 46             int p = 0;
 47             for(i = n - k + 1;i <= n;++ i) y[++ p] = i;
 48             for(i = 1;i <= n;++ i) if(sa[i] > k) y[++ p] = sa[i] - k;
 49             for(i = 0;i <= m;++ i) c[i] = 0;
 50             for(i = 1;i <= n;++ i) ++ c[x[y[i]]];
 51             for(i = 1;i <= m;++ i) c[i] += c[i - 1];
 52             for(i = n;i;-- i) sa[c[x[y[i]]] --] = y[i];
 53             swap(x, y);p = 0,x[sa[1]] = ++ p;
 54             for(i = 2;i <= n;++ i) x[sa[i]] = sa[i] + k <= n && sa[i - 1] + k <= n && y[sa[i]] == y[sa[i - 1]] && y[sa[i] + k] == y[sa[i - 1] + k] ? p : ++ p;
 55             if(p >= n) break;m = p;
 56         }
 57     }
 58     void build_height()
 59     {
 60         int i,j,k = 0;
 61         for(i = 1;i <= n;++ i) rank[sa[i]] = i;
 62         for(i = 1;i <= n;++ i)
 63         {
 64             if(k) -- k; if(rank[i] == 1) continue;
 65             j = sa[rank[i] - 1];
 66             while(i + k <= n && j + k <= n && s[i + k] == s[j + k]) ++ k;
 67             height[rank[i]] = k;
 68         }
 69     }
 70     void build_st()
 71     {
 72         for(int i = 1;i <= n;++ i) stmi[i][0] = height[i];
 73         for(int j = 1;pow2[j] <= n;++ j)
 74             for(int i = 1;i <= n;++ i)
 75                 if(i + pow2[j - 1] <= n) stmi[i][j] = min(stmi[i][j - 1], stmi[i + pow2[j - 1]][j - 1]);
 76                 else stmi[i][j] = stmi[i][j - 1];
 77     }
 78     int getmin(int x, int y)
 79     {
 80         return min(stmi[x][lo2[y - x + 1]], stmi[y - pow2[lo2[y - x + 1]] + 1][lo2[y - x + 1]]);
 81     }
 82     int LCP(int x, int y)
 83     {
 84         if(rank[x] + 1 > rank[y]) swap(x, y);
 85         return getmin(rank[x] + 1, rank[y]);
 86     }
 87 }A;
 88 
 89 int ca, ma, ans[MAXN], tot;
 90 
 91 int main()
 92 {
 93     pow2[0] = 1;
 94     for(int i = 1;i < 30;++ i) pow2[i] = pow2[i - 1] << 1;
 95     lo2[1] = 0;
 96     for(int i = 2;i <= 200000;++ i) lo2[i] = lo2[i >> 1] + 1;
 97     while(scanf("%s", A.s + 1) != EOF && A.s[1] != '#')
 98     {
 99         ++ ca, A.n = strlen(A.s + 1);ma = tot = 0;
100         A.build_sa('z' + 1);A.build_height();A.build_st();
101         for(register int L = 1;L <= A.n >> 1;++ L)
102         {
103             for(int l = 1,r = L + 1;r <= A.n;l += L, r += L)
104             {
105                 int k = A.LCP(l, r), t = (L - k % L); 
106                 if(k % L == 0)
107                 {
108                     if(k / L + 1 > ma) ma = k / L + 1, ans[tot = 1] = L;
109                     else if(k / L + 1 == ma) ans[++ tot] = L;
110                 }
111                 else if(l - t >= 1 && r - t >= 1)
112                 {
113                     k = A.LCP(l - t, r - t);
114                     if(k && k % L == 0)
115                     {
116                         if(k / L + 1 > ma) ma = k / L + 1, ans[tot = 1] = L;
117                         else if(k / L + 1 == ma) ans[++ tot] = L;
118                     }
119                 }
120             }
121         }
122         printf("Case %d: ", ca);
123         int flag = 0, s = 0, t = 0;
124         for(int i = 1;i <= A.n;++ i)
125             if(flag) break;
126             else 
127                 for(int j = 1;j <= tot;++ j)
128                     if(A.sa[i] + ma * ans[j] - 1<= A.n && A.LCP(A.sa[i], A.sa[i] + ans[j]) >= (ma - 1) * ans[j])
129                     {
130                         flag = 1;s = A.sa[i];t = A.sa[i] + ma * ans[j] - 1;break;
131                     }
132         for(int i = s;i <= t;++ i) printf("%c", A.s[i]);
133         putchar('
');
134         A.clear();
135     }
136     return 0;
137 }
POJ3693

 

 

 

 

原文地址:https://www.cnblogs.com/huibixiaoxing/p/8337508.html