POJ3415--Common Substrings 后缀数组 +单调栈

题意: 求长度大于等于K的公共子串的个数。位置不同就算不同。

后缀数组求依次SA LCP, 然后就是统计答案了, 暴力统计n^2复杂度显然不可以, 我们可以利用lcp数组的"部分单调性", 用一个栈,栈中保存小于等于当前lcp的原数组的下标, 

两次统计, 第一次统计, 按B串统计, 把A串大于等于K的那一部分的长度加起来保存在tot中, 出栈时则需要从tot中减去多余的哪一步分,同时记录个数, 这样遇到一次B串 求一次ans

然后反过来 按A串 统计,这一步和上一步相同。

#include <cstdio>
#include <cstring>
#include <algorithm>
const int MAXN = 2e5+10;
char str[MAXN];
int step, rank[MAXN], tmp[MAXN], sa[MAXN],lcp[MAXN];
int len;
bool cmp(int i, int j) {
    if (rank[i] != rank[j]) {
        return rank[i] < rank[j];
    }
    int ri = i + step <= len ? rank[i+step] : -1;
    int rj = j + step <= len ? rank[j+step] : -1;
    return ri < rj;
}
void build_sa() {
    for (int i = 0; i <= len; i++) {
        sa[i] = i;
        rank[i] = i < len ? str[i] : -1;
    }
    for (step = 1; step <= len; step <<= 1) {
        std::sort (sa, sa+len+1, cmp);
        tmp[sa[0]] = 0;
        for (int i = 1; i <= len; i++) {
            tmp[sa[i]] = tmp[sa[i-1]] + (cmp(sa[i-1], sa[i]) ? 1 : 0);
        }
        for (int i = 0; i <= len; i++) {
            rank[i] = tmp[i];
        }
    }
}
void build_lcp() {
    for (int i = 0; i <= len; i++) {
        rank[sa[i]] = i;
    }
    int h = 0;
    lcp[0] = 0;
    for (int i = 0; i <= len; i++) {
        int j = sa[rank[i]-1];
        if (h > 0) {
            h--;
        }
        while (j+h < len && i + h < len && str[i+h] == str[j+h]) {
            h++;
        }
        lcp[rank[i]] = h;
    }
}
typedef long long LL;
LL ans;
int K, pt1, pt2, stack1[MAXN],stack2[MAXN], top;
void solve () {
    ans = 0;
    top = -1;
    LL tot = 0;
    for (int i = 0; i <= len; i++) {
        if (lcp[i] < K) {
            top = -1;
            tot = 0;
        } else {
            int cnt = 0;
            if (sa[i-1] < pt2) {
                tot += lcp[i] - K + 1;
                cnt++;
            }
            while (~top && lcp[stack1[top]] >= lcp[i]) {
                tot -= stack2[top] * (lcp[stack1[top]] - lcp[i]);
                cnt += stack2[top];
                top--;
            }
            stack1[++top] = i;
            stack2[top] = cnt;
            if (sa[i] >= pt2) {
                ans += tot;
            }
        }
    }
    top = -1;
    tot = 0;
    for (int i = 0; i <= len; i++) {
        if (lcp[i] < K) {
            top = -1;
            tot = 0;
        } else {
            int cnt = 0;
            if (sa[i-1] >= pt2) {
                tot += lcp[i] - K + 1;
                cnt++;
            }
            while (~top && lcp[stack1[top]] >= lcp[i]) {
                tot -= stack2[top] * (lcp[stack1[top]] - lcp[i]);
                cnt += stack2[top];
                top--;
            }
            stack1[++top] = i;
            stack2[top] = cnt;
            if (sa[i] < pt2) {
                ans += tot;
            }
        }
    }
    printf("%lld
", ans);
}
int main() {
    while (scanf ("%d", &K), K) {
        int len1;
        scanf ("%s", str);
        len1 = strlen(str);
        str[len1] = '#';
        scanf ("%s", str+len1+1);
        pt2 = len1 + 1;
        len = strlen(str);
        build_sa();
        build_lcp();
        solve();
    }
    return 0;
}
原文地址:https://www.cnblogs.com/oneshot/p/4711699.html