BZOJ 1396: 识别子串( 后缀数组 + 线段树 )

这道题各位大神好像都是用后缀自动机做的?.....蒟蒻就秀秀智商写一写后缀数组解法.....

求出Height数组后, 我们枚举每一位当做子串的开头.

如上图(x, y是height值), Heights数组中相邻的3个后缀, 假如我们枚举s2的第一个字符为开头, 那我们发现, 长度至少为len = max(x, y)+1, 才能满足题意(仅出现一次). 这个很好脑补...因为s2和其他串的LCP是RMQ, 肯定会<=LCP(s1,s2)或<=LCP(s2,s3). 然后就用len去更新s2中前len个字符的答案, 线段树维护. 然后对于长度lth>len的也肯定是合法的, 他们对s2的前lth个字符都有贡献...但是事实上lth对前lth-1个字符c的贡献是没有卵用的....(因为小于同样字符开头的以c结尾的串的贡献或者是len的贡献), 所以lth>=len对第lth个字符有贡献.

容易看出这样的贡献是成等差数列的。。。。线段树维护就OK了.

时间复杂度O(N log N), 空间复杂度O(N) 

-------------------------------------------------------------------------

#include<cstdio>
#include<cstring>
#include<algorithm>
 
using namespace std;
 
const int maxn = 100009;
 
char S[maxn];
int N, L, R, Val;
int Height[maxn], Rank[maxn], Sa[maxn], cnt[maxn];
 
inline void Min(int &x, int t) {
if(t < x) x = t;
}
inline void Max(int &x, int t) {
if(t > x) x = t;
}
 
void BuildSA(int m) {
int *x = Height, *y = Rank;
for(int i = 0; i < m; i++) cnt[i] = 0;
for(int i = 0; i < N; i++) cnt[x[i] = S[i]]++;
for(int i = 1; i < m; i++) cnt[i] += cnt[i - 1];
for(int i = N; i--; ) Sa[--cnt[x[i]]] = i;
for(int k = 1, p = 0; k <= N; k <<= 1, p = 0) {
for(int i = N - k; i < N; i++) y[p++] = i;
for(int i = 0; i < N; i++)
if(Sa[i] >= k) y[p++] = Sa[i] - k;
for(int i = 0; i < m; i++) cnt[i] = 0;
for(int i = 0; i < N; i++) cnt[x[y[i]]]++;
for(int i = 1; i < m; i++) cnt[i] += cnt[i - 1];
for(int i = N; i--; ) Sa[--cnt[x[y[i]]]] = y[i];
swap(x, y);
p = (x[Sa[0]] = 0) + 1;
for(int i = 1; i < N; i++) {
if(y[Sa[i]] != y[Sa[i - 1]] || y[Sa[i] + k] != y[Sa[i - 1] + k]) p++;
x[Sa[i]] = p - 1;
}
if((m = p) >= N) break;
}
for(int i = 0; i < N; i++) Rank[Sa[i]] = i;
Height[0] = 0;
for(int i = 0, h = 0; i < N; i++) if(Rank[i]) {
if(h) h--;
while(S[i + h] == S[Sa[Rank[i] - 1] + h]) h++;
Height[Rank[i]] = h;
}
}
 
struct Node {
Node *lc, *rc;
int n, d;
inline void pd(int len) {
if(n != maxn) {
Min(lc->n, n);
Min(rc->n, n);
}
if(d != maxn) {
Min(lc->d, d);
Min(rc->d, d + ((len + 1) >> 1));
}
}
} pool[maxn << 1], *pt = pool, *Root;
 
void Build(Node* t, int l, int r) {
t->n = t->d = maxn;
if(l != r) {
int m = (l + r) >> 1;
Build(t->lc = pt++, l, m);
Build(t->rc = pt++, m + 1, r);
}
}
 
void Modify(Node* t, int l, int r) {
if(L <= l && r <= R) {
Min(t->n, Val);
} else {
int m = (l + r) >> 1;
if(L <= m) Modify(t->lc, l, m);
if(m < R) Modify(t->rc, m + 1, r);
}
}
 
void Change(Node* t, int l, int r) {
if(L <= l && r <= R) {
Min(t->d, Val + l - L);
} else {
int m = (l + r) >> 1;
if(L <= m) Change(t->lc, l, m);
if(m < R) Change(t->rc, m + 1, r);
}
}
 
void DFS(Node* t, int l, int r) {
if(l != r) {
int m = (l + r) >> 1;
t->pd(r - l + 1);
DFS(t->lc, l, m);
DFS(t->rc, m + 1, r);
} else 
printf("%d ", min(t->d, t->n));
}
 
int main() {
scanf("%s", S);
N = strlen(S);
S[N++] = '$';
BuildSA('z' + 1);
int n = N - 1;
Build(Root = pt++, 1, n);
Height[N] = 0;
for(int i = 1; i < N; i++) {
Val = max(Height[i], Height[i + 1]) + 1;
if(Val > 1) {
if(Sa[i] + Val > n) continue;
L = Sa[i] + 1, R = L + Val - 2;
Modify(Root, 1, n);
}
L = Sa[i] + Val, R = n;
if(L > R) continue;
Change(Root, 1, n);
}
DFS(Root, 1, n);
return 0;
}

-------------------------------------------------------------------------

1396: 识别子串

Time Limit: 10 Sec  Memory Limit: 162 MB
Submit: 201  Solved: 119
[Submit][Status][Discuss]

Description

Input

一行,一个由小写字母组成的字符串S,长度不超过10^5

Output

L行,每行一个整数,第i行的数据表示关于S的第i个元素的最短识别子串有多长.

Sample Input

agoodcookcooksgoodfood

Sample Output

1
2
3
3
2
2
3
3
2
2
3
3
2
1
2
3
3
2
1
2
3
4

HINT

Source

原文地址:https://www.cnblogs.com/JSZX11556/p/5177965.html