后缀数组

1. New Distinct Substrings

题目链接:spoj - SUBST1

题意:给你一个串,求他的子串个数

思路:每个子串都是某个后缀的前缀,所以计算每个后缀的贡献即可,按照后缀suffix(sa[1]),suffix(sa[2])...的顺序计算,当加入suffix(sa[k])时,本应产生n-sa[k]+1个前缀,但是有height[k]个前缀在前面已经计算过了,所以suffix(sa[k])对答案的贡献就是n-sa[k]+1-height[k]

#include <iostream>
#include <algorithm>
#include <cstdio>
#include <cstring>

using namespace std;

const int N = 50010;

int wa[N], wb[N], wv[N], wss[N];
int n, T, rak[N], height[N], cal[N], sa[N];
char s[N];

bool cmp(int *r, int a, int b, int l)
{
    return r[a] == r[b] && r[a + l] == r[b + l];
}

void da(int *r, int *sa, int n, int m)
{
    int i, j, p, *x = wa, *y = wb;
    for (i = 0; i < m; i++) wss[i] = 0;
    for (i = 0; i < n; i++) wss[x[i] = r[i]]++;
    for (i = 1; i < m; i++) wss[i] += wss[i - 1];
    for (i = n - 1; i >= 0; i--) sa[--wss[x[i]]] = i;
    for (j = 1, p = 1; p < n; j *= 2, m = p) {
        for (p = 0, i = n - j; i < n; i++) y[p++] = i;
        for (i = 0; i < n; i++)
            if (sa[i] >= j) y[p++] = sa[i] - j;
        for (i = 0; i < n; i++) wv[i] = x[y[i]];
        for (i = 0; i < m; i++) wss[i] = 0;
        for (i = 0; i < n; i++) wss[wv[i]]++;
        for (i = 1; i < m; i++) wss[i] += wss[i - 1];
        for (i = n - 1; i >= 0; i--) sa[--wss[wv[i]]] = y[i];
        for (swap(x, y), p = 1, x[sa[0]] = 0, i = 1; i < n; i++) {
            if (cmp(y, sa[i - 1], sa[i], j)) x[sa[i]] = p - 1;
            else x[sa[i]] = p++;
        }
    }
}

void calc(int *r, int *sa, int n)
{
    int i, j, k = 0;
    for (i = 1; i <= n; i++) rak[sa[i]] = i;
    for (i = 0; i < n; height[rak[i++]] = k)
        for (k ? k-- : 0, j = sa[rak[i] - 1]; r[i + k] == r[j + k]; k++);
    for (i = n; i; i--) {
        rak[i] = rak[i - 1];
        sa[i]++;
    }
}

int main()
{
    scanf("%d", &T);
    while (T--) {
        scanf("%s", s + 1);
        int n = strlen(s + 1);
        for (int i = 1; i <= n; i++) cal[i] = s[i];
        cal[n + 1] = 0;
        da(cal + 1, sa, n + 1, 150);
        calc(cal + 1, sa, n);
        int res = 0;
        for (int i = 1; i <= n; i++)
            res = res + n - sa[i] + 1 - height[i];
        printf("%d
", res);
    }
    return 0;
}
View Code

2. Substring

题目链接:hdu - 5769

题意:给你一个串和一个字符x,求包含字符x的子串个数

思路:和上题思路一样,计算每个后缀的贡献,不同的是子串中需要包含字符x,所以我们可以先预处理出每个位置后面离它最近的一个字符x的位置pos[],当加入suffix(sa[k])时,我们只能从pos[sa[k]]开始取前缀,所以suffix(sa[k])对答案的贡献就是n-sa[k]+1-max(height[k],pos[sa[k]]-sa[k])

#include <iostream>
#include <algorithm>
#include <cstdio>
#include <cstring>

using namespace std;

typedef long long ll;

const int N = 700010;

int wa[N], wb[N], wv[N], wss[N];
int rak[N], height[N], cal[N], sa[N];
int T, icas, pos[N];
char s[N], ch[N];

bool cmp(int *r, int a, int b, int l)
{
    return r[a] == r[b] && r[a + l] == r[b + l];
}

void da(int *r, int *sa, int n, int m)
{
    int i, j, p, *x = wa, *y = wb;
    for (i = 0; i < m; i++) wss[i] = 0;
    for (i = 0; i < n; i++) wss[x[i] = r[i]]++;
    for (i = 1; i < m; i++) wss[i] += wss[i - 1];
    for (i = n - 1; i >= 0; i--) sa[--wss[x[i]]] = i;
    for (j = 1, p = 1; p < n; j *= 2, m = p) {
        for (p = 0, i = n - j; i < n; i++) y[p++] = i;
        for (i = 0; i < n; i++)
            if (sa[i] >= j) y[p++] = sa[i] - j;
        for (i = 0; i < n; i++) wv[i] = x[y[i]];
        for (i = 0; i < m; i++) wss[i] = 0;
        for (i = 0; i < n; i++) wss[wv[i]]++;
        for (i = 1; i < m; i++) wss[i] += wss[i - 1];
        for (i = n - 1; i >= 0; i--) sa[--wss[wv[i]]] = y[i];
        for (swap(x, y), p = 1, x[sa[0]] = 0, i = 1; i < n; i++) {
            if (cmp(y, sa[i - 1], sa[i], j)) x[sa[i]] = p - 1;
            else x[sa[i]] = p++;
        }
    }
}

void calc(int *r, int *sa, int n)
{
    int i, j, k = 0;
    for (i = 1; i <= n; i++) rak[sa[i]] = i;
    for (i = 0; i < n; height[rak[i++]] = k)
        for (k ? k-- : 0, j = sa[rak[i] - 1]; r[i + k] == r[j + k]; k++);
    for (i = n; i; i--) {
        rak[i] = rak[i - 1];
        sa[i]++;
    }
}

int main()
{
    scanf("%d", &T);
    while (T--) {
        scanf("%s%s", ch + 1, s + 1);
        int n = strlen(s + 1);
        for (int i = 1; i <= n; i++) cal[i] = s[i];
        cal[n + 1] = 0;
        da(cal + 1, sa, n + 1, 200);
        calc(cal + 1, sa, n);
        int p = n + 1;
        for (int i = n; i >= 1; i--) {
            if (s[i] == ch[1]) p = i;
            pos[i] = p;
        }
        ll res = 0;
        for (int i = 1; i <= n; i++) {
            res = res + n - sa[i] + 1;
            res -= max(height[i], pos[sa[i]] - sa[i]);
        }
        printf("Case #%d: %lld
", ++icas, res);
    }
    return 0;
}
View Code

3. Musical Theme

题目链接:poj - 1743

题意:给你一个串,找出最长、不可重叠、重复的子串

思路:二分答案,将问题转换为判定型问题,判断是否存在两个子串长度为k且不重叠,将排序后的后缀分为若干组,每组内所有元素的height大于等于k,那么每组内任意两个后缀的最长公共前缀长度都大于等于k,此时如果有一组内sa的最大值与最小值之差大于等于k,那么就一定存在两个子串长度为k且不重叠

#include <iostream>
#include <algorithm>
#include <cstdio>
#include <cstring>

using namespace std;

const int N = 20010;
const int INF = 0x3f3f3f3f;

int wa[N], wb[N], wv[N], wss[N], a[N];
int n, rak[N], height[N], cal[N], sa[N];

bool cmp(int *r, int a, int b, int l)
{
    return r[a] == r[b] && r[a + l] == r[b + l];
}

void da(int *r, int *sa, int n, int m)
{
    int i, j, p, *x = wa, *y = wb;
    for (i = 0; i < m; i++) wss[i] = 0;
    for (i = 0; i < n; i++) wss[x[i] = r[i]]++;
    for (i = 1; i < m; i++) wss[i] += wss[i - 1];
    for (i = n - 1; i >= 0; i--) sa[--wss[x[i]]] = i;
    for (j = 1, p = 1; p < n; j *= 2, m = p) {
        for (p = 0, i = n - j; i < n; i++) y[p++] = i;
        for (i = 0; i < n; i++)
            if (sa[i] >= j) y[p++] = sa[i] - j;
        for (i = 0; i < n; i++) wv[i] = x[y[i]];
        for (i = 0; i < m; i++) wss[i] = 0;
        for (i = 0; i < n; i++) wss[wv[i]]++;
        for (i = 1; i < m; i++) wss[i] += wss[i - 1];
        for (i = n - 1; i >= 0; i--) sa[--wss[wv[i]]] = y[i];
        for (swap(x, y), p = 1, x[sa[0]] = 0, i = 1; i < n; i++) {
            if (cmp(y, sa[i - 1], sa[i], j)) x[sa[i]] = p - 1;
            else x[sa[i]] = p++;
        }
    }
}

void calc(int *r, int *sa, int n)
{
    int i, j, k = 0;
    for (i = 1; i <= n; i++) rak[sa[i]] = i;
    for (i = 0; i < n; height[rak[i++]] = k)
        for (k ? k-- : 0, j = sa[rak[i] - 1]; r[i + k] == r[j + k]; k++);
    for (i = n; i; i--) {
        rak[i] = rak[i - 1];
        sa[i]++;
    }
}

bool check(int mid)
{
    int imin = sa[1], imax = sa[1];
    for (int i = 2; i <= n; i++) {
        if (height[i] >= mid) {
            imin = min(imin, sa[i]);
            imax = max(imax, sa[i]);
        }
        else {
            if (imax - imin > mid) return true;
            imin = imax = sa[i];
        }
    }
    return imax - imin > mid;
}

int main()
{
    while (scanf("%d", &n) && 0 != n) {
        for (int i = 1; i <= n; i++) scanf("%d", &a[i]);
        for (int i = 1; i <= n; i++) {
            cal[i] = a[i] - a[i - 1];
            cal[i] += 90;
        }
        cal[n + 1] = 0;
        da(cal + 1, sa, n + 1, 200);
        calc(cal + 1, sa, n);
        int l = 0, r = n;
        while (l < r) {
            int mid = (l + r) / 2;
            if (check(mid)) l = mid + 1;
            else r = mid;
        }
        printf("%d
", l >= 5 ? l : 0);
    }
    return 0;
}
View Code

4. Milk Patterns

题目链接:poj - 3261

题意:给你一个数组和一个数k,求出最长、可以重叠、重复k次的子串

思路:二分答案,将问题转换为判定型问题,判断是否存在k个子串长度为L、可以重叠的子串,将排序后的后缀分为若干组,每组内所有元素的height大于等于L,此时如果存在一组组内后缀的数目大于等于k,那么就一定存在k个子串长度为L、可以重叠的子串

#include <iostream>
#include <algorithm>
#include <cstdio>
#include <cstring>

using namespace std;

const int N = 1000010;

int wa[N], wb[N], wv[N], wss[N], a[N];
int n, k, rak[N], height[N], cal[N], sa[N];

bool cmp(int *r, int a, int b, int l)
{
    return r[a] == r[b] && r[a + l] == r[b + l];
}

void da(int *r, int *sa, int n, int m)
{
    int i, j, p, *x = wa, *y = wb;
    for (i = 0; i < m; i++) wss[i] = 0;
    for (i = 0; i < n; i++) wss[x[i] = r[i]]++;
    for (i = 1; i < m; i++) wss[i] += wss[i - 1];
    for (i = n - 1; i >= 0; i--) sa[--wss[x[i]]] = i;
    for (j = 1, p = 1; p < n; j *= 2, m = p) {
        for (p = 0, i = n - j; i < n; i++) y[p++] = i;
        for (i = 0; i < n; i++)
            if (sa[i] >= j) y[p++] = sa[i] - j;
        for (i = 0; i < n; i++) wv[i] = x[y[i]];
        for (i = 0; i < m; i++) wss[i] = 0;
        for (i = 0; i < n; i++) wss[wv[i]]++;
        for (i = 1; i < m; i++) wss[i] += wss[i - 1];
        for (i = n - 1; i >= 0; i--) sa[--wss[wv[i]]] = y[i];
        for (swap(x, y), p = 1, x[sa[0]] = 0, i = 1; i < n; i++) {
            if (cmp(y, sa[i - 1], sa[i], j)) x[sa[i]] = p - 1;
            else x[sa[i]] = p++;
        }
    }
}

void calc(int *r, int *sa, int n)
{
    int i, j, k = 0;
    for (i = 1; i <= n; i++) rak[sa[i]] = i;
    for (i = 0; i < n; height[rak[i++]] = k)
        for (k ? k-- : 0, j = sa[rak[i] - 1]; r[i + k] == r[j + k]; k++);
    for (i = n; i; i--) {
        rak[i] = rak[i - 1];
        sa[i]++;
    }
}

bool check(int mid)
{
    int cnt = 1;
    for (int i = 2; i <= n; i++) {
        if (height[i] >= mid) cnt++;
        else cnt = 1;
        if (cnt >= k) return true;
    }
    return false;
}

int main()
{
    scanf("%d%d", &n, &k);
    for (int i = 1; i <= n; i++) {
        scanf("%d", &cal[i]);
        cal[i]++;
    }
    cal[n + 1] = 0;
    da(cal + 1, sa, n + 1, N);
    calc(cal + 1, sa, n);
    int l = 1, r = n;
    while (l < r) {
        int mid = (l + r + 1) / 2;
        if (check(mid)) l = mid;
        else r = mid - 1;
    }
    printf("%d
", l);
    return 0;
}
View Code

5. Life Forms

题目链接:poj - 3294

题意:给你n个字符串,求在大于n/2个字符串中含有的最长公共子串

思路:将n个字符串拼接起来,中间用一个从没出现过且不相同的字符隔开,求出后缀数组后二分答案,将排序后的后缀分为若干组,判断每一组内的后缀是否出现在大于n/2个不同的字符串中(标记某个字符串是否出现过,统计不同字符串的数量)

#include <iostream>
#include <algorithm>
#include <cstdio>
#include <cstring>

using namespace std;

typedef long long ll;

const int N = 101500;
const int M = 110;

int wa[N], wb[N], wv[N], wss[N];
int rak[N], height[N], cal[N], sa[N];
int n, len, belong[N];
bool f[M];

bool cmp(int *r, int a, int b, int l)
{
    return r[a] == r[b] && r[a + l] == r[b + l];
}

void da(int *r, int *sa, int n, int m)
{
    int i, j, p, *x = wa, *y = wb;
    for (i = 0; i < m; i++) wss[i] = 0;
    for (i = 0; i < n; i++) wss[x[i] = r[i]]++;
    for (i = 1; i < m; i++) wss[i] += wss[i - 1];
    for (i = n - 1; i >= 0; i--) sa[--wss[x[i]]] = i;
    for (j = 1, p = 1; p < n; j *= 2, m = p) {
        for (p = 0, i = n - j; i < n; i++) y[p++] = i;
        for (i = 0; i < n; i++)
            if (sa[i] >= j) y[p++] = sa[i] - j;
        for (i = 0; i < n; i++) wv[i] = x[y[i]];
        for (i = 0; i < m; i++) wss[i] = 0;
        for (i = 0; i < n; i++) wss[wv[i]]++;
        for (i = 1; i < m; i++) wss[i] += wss[i - 1];
        for (i = n - 1; i >= 0; i--) sa[--wss[wv[i]]] = y[i];
        for (swap(x, y), p = 1, x[sa[0]] = 0, i = 1; i < n; i++) {
            if (cmp(y, sa[i - 1], sa[i], j)) x[sa[i]] = p - 1;
            else x[sa[i]] = p++;
        }
    }
}

void calc(int *r, int *sa, int n)
{
    int i, j, k = 0;
    for (i = 1; i <= n; i++) rak[sa[i]] = i;
    for (i = 0; i < n; height[rak[i++]] = k)
        for (k ? k-- : 0, j = sa[rak[i] - 1]; r[i + k] == r[j + k]; k++);
    for (i = n; i; i--) {
        rak[i] = rak[i - 1];
        sa[i]++;
    }
}

bool check(int k)
{
    memset(f, false, sizeof(f));
    int cnt = 0;
    for (int i = 1; i <= len; i++) {
        if (height[i] >= k) {
            if (0 != belong[sa[i]] && false == f[belong[sa[i]]]) {
                cnt++;
                f[belong[sa[i]]] = true;
            }
        }
        else {
            memset(f, false, sizeof(f));
            if (0 != belong[sa[i]]) {
                cnt = 1;
                f[belong[sa[i]]] = true;
            }
            else cnt = 0;
        }
        if (cnt > n / 2) return true;
    }
    return false;
}

int main()
{
    while (scanf("%d", &n) && 0 != n) {
        int b = 125;
        len = 0;
        for (int i = 1; i <= n; i++) {
            char s[10 * M];
            scanf("%s", s + 1);
            for (int k = 1; k <= strlen(s + 1); k++) {
                cal[++len] = s[k];
                belong[len] = i;
            }
            if (i != n) {
                cal[++len] = b++;
                belong[len] = 0;
            }
        }
        cal[len + 1] = 0;
        belong[len + 1] = 0;
        da(cal + 1, sa, len + 1, 250);
        calc(cal + 1, sa, len);
        int l = 0, r = len;
        while (l < r) {
            int mid = (l + r + 1) / 2;
            if (check(mid)) l = mid;
            else r = mid - 1;
        }
        if (0 == l) {
            printf("?

");
            continue;
        }
        memset(f, false, sizeof(f));
        int cnt = 0;
        for (int i = 1; i <= len; i++) {
            if (height[i] >= l) {
                if (0 != belong[sa[i]] && false == f[belong[sa[i]]]) {
                    cnt++;
                    f[belong[sa[i]]] = true;
                }
            }
            else {
                if (cnt > n / 2) {
                    for (int j = 0; j < l; j++) printf("%c", char(cal[sa[i - 1] + j]));
                    printf("
");
                }
                memset(f, false, sizeof(f));
                if (0 != belong[sa[i]]) {
                    cnt = 1;
                    f[belong[sa[i]]] = true;
                }
                else cnt = 0;
            }
        }
        printf("
");
    }
    return 0;
}
View Code

6. Relevant Phrases of Annihilation

题目链接:spoj - PHRASES

题意:给你n个字符串,求在每个字符串中不重叠的至少出现2次的最长子串

思路:将n个字符串拼接起来,中间用一个从没出现过且不相同的字符隔开,求后缀数组后二分答案,将排序后的后缀分为若干组,判断是否有一组后缀在每个原来的字符串中至少出现两次,并且在每个原来的字符串中,后缀的起始位置的最大值与最小值之差是否不小于当前二分的长度

#include <iostream>
#include <algorithm>
#include <cstdio>
#include <cstring>

using namespace std;

const int N = 100500;
const int M = 10010;
const int INF = 0x3f3f3f3f;

int wa[N], wb[N], wv[N], wss[N];
int rak[N], height[N], cal[N], sa[N];
int n, T, len, belong[N];
int f[12], imin[12], imax[12];

bool cmp(int *r, int a, int b, int l)
{
    return r[a] == r[b] && r[a + l] == r[b + l];
}

void da(int *r, int *sa, int n, int m)
{
    int i, j, p, *x = wa, *y = wb;
    for (i = 0; i < m; i++) wss[i] = 0;
    for (i = 0; i < n; i++) wss[x[i] = r[i]]++;
    for (i = 1; i < m; i++) wss[i] += wss[i - 1];
    for (i = n - 1; i >= 0; i--) sa[--wss[x[i]]] = i;
    for (j = 1, p = 1; p < n; j *= 2, m = p) {
        for (p = 0, i = n - j; i < n; i++) y[p++] = i;
        for (i = 0; i < n; i++)
            if (sa[i] >= j) y[p++] = sa[i] - j;
        for (i = 0; i < n; i++) wv[i] = x[y[i]];
        for (i = 0; i < m; i++) wss[i] = 0;
        for (i = 0; i < n; i++) wss[wv[i]]++;
        for (i = 1; i < m; i++) wss[i] += wss[i - 1];
        for (i = n - 1; i >= 0; i--) sa[--wss[wv[i]]] = y[i];
        for (swap(x, y), p = 1, x[sa[0]] = 0, i = 1; i < n; i++) {
            if (cmp(y, sa[i - 1], sa[i], j)) x[sa[i]] = p - 1;
            else x[sa[i]] = p++;
        }
    }
}

void calc(int *r, int *sa, int n)
{
    int i, j, k = 0;
    for (i = 1; i <= n; i++) rak[sa[i]] = i;
    for (i = 0; i < n; height[rak[i++]] = k)
        for (k ? k-- : 0, j = sa[rak[i] - 1]; r[i + k] == r[j + k]; k++);
    for (i = n; i; i--) {
        rak[i] = rak[i - 1];
        sa[i]++;
    }
}

bool check(int k)
{
    memset(f, 0, sizeof(f));
    memset(imin, INF, sizeof(imin));
    memset(imax, 0, sizeof(imax));
    int cnt = 0;
    for (int i = 1; i <= len; i++) {
        if (height[i] >= k) {
            if (0 != belong[sa[i]] && 0 == f[belong[sa[i]]]) cnt++;
            f[belong[sa[i]]]++;
            imin[belong[sa[i]]] = min(imin[belong[sa[i]]], sa[i]);
            imax[belong[sa[i]]] = max(imax[belong[sa[i]]], sa[i]);
        }
        else {
            memset(f, 0, sizeof(f));
            memset(imin, INF, sizeof(imin));
            memset(imax, 0, sizeof(imax));
            if (0 != belong[sa[i]]) {
                cnt = 1;
                f[belong[sa[i]]]++;
                imin[belong[sa[i]]] = min(imin[belong[sa[i]]], sa[i]);
                imax[belong[sa[i]]] = max(imax[belong[sa[i]]], sa[i]);
            }
            else cnt = 0;
        }
        if (cnt == n) {
            bool flag = true;
            for (int i = 1; i <= n; i++) {
                if (f[belong[sa[i]]] < 2) flag = false;
                if (imax[i] - imin[i] < k) flag = false;
            }
            if (flag) return true;
        }
    }
    return false;
}

int main()
{
    scanf("%d", &T);
    while (T--) {
        scanf("%d", &n);
        len = 0;
        int b = 125;
        for (int i = 1; i <= n; i++) {
            char s[M];
            scanf("%s", s + 1);
            for (int k = 1; k <= strlen(s + 1); k++) {
                cal[++len] = s[k];
                belong[len] = i;
            }
            if (i != n) cal[++len] = ++b;
        }
        cal[len + 1] = 0;
        da(cal + 1, sa, len + 1, 150);
        calc(cal + 1, sa, len);
        int l = 0, r = len;
        while (l < r) {
            int mid = (l + r + 1) / 2;
            if (check(mid)) l = mid;
            else r = mid - 1;
        }
        printf("%d
", l);
    }
    return 0;
}
View Code

7. Long Long Message

题目链接:poj - 2774

题意:给你两个串,求两个串的最长公共子串

思路:将第二个串写在第一个串的后面,中间用一个没有出现过的字符隔开,求这个新字符串的后缀数组,当sa[i-1]和sa[i]不属于同一个字符串时,height[i]的最大值就是答案

#include <iostream>
#include <algorithm>
#include <cstdio>
#include <cstring>

using namespace std;

const int N = 200010;

int wa[N], wb[N], wv[N], wss[N];
int n, T, rak[N], height[N], cal[N], sa[N];
char a[N], b[N];

bool cmp(int *r, int a, int b, int l)
{
    return r[a] == r[b] && r[a + l] == r[b + l];
}

void da(int *r, int *sa, int n, int m)
{
    int i, j, p, *x = wa, *y = wb;
    for (i = 0; i < m; i++) wss[i] = 0;
    for (i = 0; i < n; i++) wss[x[i] = r[i]]++;
    for (i = 1; i < m; i++) wss[i] += wss[i - 1];
    for (i = n - 1; i >= 0; i--) sa[--wss[x[i]]] = i;
    for (j = 1, p = 1; p < n; j *= 2, m = p) {
        for (p = 0, i = n - j; i < n; i++) y[p++] = i;
        for (i = 0; i < n; i++)
            if (sa[i] >= j) y[p++] = sa[i] - j;
        for (i = 0; i < n; i++) wv[i] = x[y[i]];
        for (i = 0; i < m; i++) wss[i] = 0;
        for (i = 0; i < n; i++) wss[wv[i]]++;
        for (i = 1; i < m; i++) wss[i] += wss[i - 1];
        for (i = n - 1; i >= 0; i--) sa[--wss[wv[i]]] = y[i];
        for (swap(x, y), p = 1, x[sa[0]] = 0, i = 1; i < n; i++) {
            if (cmp(y, sa[i - 1], sa[i], j)) x[sa[i]] = p - 1;
            else x[sa[i]] = p++;
        }
    }
}

void calc(int *r, int *sa, int n)
{
    int i, j, k = 0;
    for (i = 1; i <= n; i++) rak[sa[i]] = i;
    for (i = 0; i < n; height[rak[i++]] = k)
        for (k ? k-- : 0, j = sa[rak[i] - 1]; r[i + k] == r[j + k]; k++);
    for (i = n; i; i--) {
        rak[i] = rak[i - 1];
        sa[i]++;
    }
}

int main()
{
    scanf("%s%s", a + 1, b + 1);
    int la = strlen(a + 1), lb = strlen(b + 1);
    for (int i = 1; i <= la; i++) cal[i] = a[i];
    cal[la + 1] = 0;
    for (int i = 1; i <= lb; i++) cal[i + la + 1] = b[i];
    int n = la + lb + 1;
    cal[n + 1] = 0;
    da(cal + 1, sa, n + 1, 150);
    calc(cal + 1, sa, n);
    int res = 0;
    for (int i = 2; i <= n; i++) {
        if (sa[i - 1] < la + 1 && sa[i] > la + 1) res = max(res, height[i]);
        else if (sa[i - 1] > la + 1 && sa[i] < la + 1) res = max(res, height[i]);
    }
    printf("%d
", res);
    return 0;
}
View Code

8. Substrings

题目链接:poj - 1226

题意:给你n个串,求顺序出现或者逆序出现在每个字符串的最长子串

思路:将每个字符串反过来写一遍,再将这2*n个字符串拼接起来,中间用一个从没出现过且不相同的字符隔开,求出后缀数组后二分答案,判断是否有一组后缀在每个原来的字符串或反转后

的字符串中出现,注意特判n=1的情况

#include <iostream>
#include <algorithm>
#include <cstdio>
#include <cstring>

using namespace std;

const int N = 20500;
const int M = 110;
const int INF = 0x3f3f3f3f;

int wa[N], wb[N], wv[N], wss[N];
int rak[N], height[N], cal[N], sa[N];
int n, T, len, belong[N];
bool f[M];

bool cmp(int *r, int a, int b, int l)
{
    return r[a] == r[b] && r[a + l] == r[b + l];
}

void da(int *r, int *sa, int n, int m)
{
    int i, j, p, *x = wa, *y = wb;
    for (i = 0; i < m; i++) wss[i] = 0;
    for (i = 0; i < n; i++) wss[x[i] = r[i]]++;
    for (i = 1; i < m; i++) wss[i] += wss[i - 1];
    for (i = n - 1; i >= 0; i--) sa[--wss[x[i]]] = i;
    for (j = 1, p = 1; p < n; j *= 2, m = p) {
        for (p = 0, i = n - j; i < n; i++) y[p++] = i;
        for (i = 0; i < n; i++)
            if (sa[i] >= j) y[p++] = sa[i] - j;
        for (i = 0; i < n; i++) wv[i] = x[y[i]];
        for (i = 0; i < m; i++) wss[i] = 0;
        for (i = 0; i < n; i++) wss[wv[i]]++;
        for (i = 1; i < m; i++) wss[i] += wss[i - 1];
        for (i = n - 1; i >= 0; i--) sa[--wss[wv[i]]] = y[i];
        for (swap(x, y), p = 1, x[sa[0]] = 0, i = 1; i < n; i++) {
            if (cmp(y, sa[i - 1], sa[i], j)) x[sa[i]] = p - 1;
            else x[sa[i]] = p++;
        }
    }
}

void calc(int *r, int *sa, int n)
{
    int i, j, k = 0;
    for (i = 1; i <= n; i++) rak[sa[i]] = i;
    for (i = 0; i < n; height[rak[i++]] = k)
        for (k ? k-- : 0, j = sa[rak[i] - 1]; r[i + k] == r[j + k]; k++);
    for (i = n; i; i--) {
        rak[i] = rak[i - 1];
        sa[i]++;
    }
}

bool check(int k)
{
    memset(f, false, sizeof(f));
    int cnt = 0;
    for (int i = 1; i <= len; i++) {
        if (height[i] >= k) {
            if (0 != belong[sa[i]] && false == f[belong[sa[i]]]) {
                cnt++;
                f[belong[sa[i]]] = true;
            }
        }
        else {
            memset(f, false, sizeof(f));
            if (0 != belong[sa[i]]) {
                cnt = 1;
                f[belong[sa[i]]] = true;
            }
            else cnt = 0;
        }
        if (cnt == n) return true;
    }
    return false;
}

int main()
{
    scanf("%d", &T);
    while (T--) {
        scanf("%d", &n);
        if (1 == n) {
            char s[M];
            scanf("%s", s + 1);
            printf("%d
", strlen(s + 1));
            continue;
        }
        len = 0;
        int b = 125;
        for (int i = 1; i <= n; i++) {
            char s[M];
            scanf("%s", s + 1);
            for (int k = 1; k <= strlen(s + 1); k++) {
                cal[++len] = s[k] + 1;
                belong[len] = i;
            }
            cal[++len] = ++b;
            for (int k = strlen(s + 1); k >= 1; k--) {
                cal[++len] = s[k] + 1;
                belong[len] = i;
            }
            if (i != n) cal[++len] = ++b;
        }
        cal[len + 1] = 0;
        da(cal + 1, sa, len + 1, 350);
        calc(cal + 1, sa, len);
        int l = 0, r = len;
        while (l < r) {
            int mid = (l + r + 1) / 2;
            if (check(mid)) l = mid;
            else r = mid - 1;
        }
        printf("%d
", l);
    }
    return 0;
}
View Code

9. Mr. Panda and Fantastic Beasts

题目链接:Gym - 101194F

题意:给你n个字符串,找到第一个字符串字典序最小的最短子串,使得这个子串不是其他字符串的子串

思路:将n个字符串拼接起来,中间用一个从没出现过且不相同的字符隔开,求出后缀数组后,对所有来自第一个字符串的后缀进行判断,找到它左边第一个不是来自第一个串的后缀,求lcp为x,找到它右边第一个不是来自第一个串的后缀,求lcp为y,那么显然该后缀至少取前max(x,y)+1个字符做为子串,对每个后缀都进行一次判断,取最小值即可,注意需要判断max(x,y)+1是否超过了第一个字符串后缀的长度

#include <iostream>
#include <algorithm>
#include <cstring>
#include <cstdio>
#include <cmath>

using namespace std;

const int N = 300010;
const int INF = 0x3f3f3f3f;

int wa[N], wb[N], wv[N], wss[N];
int rak[N], height[N], cal[N], sa[N];
int rmq[N], imin[N][30], len1, icas;
int n, len, T, belong[N], lef[N], rig[N];
char s[N];

bool cmp(int *r, int a, int b, int l)
{
    return r[a] == r[b] && r[a + l] == r[b + l];
}

void da(int *r, int *sa, int n, int m)
{
    int i, j, p, *x = wa, *y = wb;
    for (i = 0; i < m; i++) wss[i] = 0;
    for (i = 0; i < n; i++) wss[x[i] = r[i]]++;
    for (i = 1; i < m; i++) wss[i] += wss[i - 1];
    for (i = n - 1; i >= 0; i--) sa[--wss[x[i]]] = i;
    for (j = 1, p = 1; p < n; j *= 2, m = p) {
        for (p = 0, i = n - j; i < n; i++) y[p++] = i;
        for (i = 0; i < n; i++)
            if (sa[i] >= j) y[p++] = sa[i] - j;
        for (i = 0; i < n; i++) wv[i] = x[y[i]];
        for (i = 0; i < m; i++) wss[i] = 0;
        for (i = 0; i < n; i++) wss[wv[i]]++;
        for (i = 1; i < m; i++) wss[i] += wss[i - 1];
        for (i = n - 1; i >= 0; i--) sa[--wss[wv[i]]] = y[i];
        for (swap(x, y), p = 1, x[sa[0]] = 0, i = 1; i < n; i++) {
            if (cmp(y, sa[i - 1], sa[i], j)) x[sa[i]] = p - 1;
            else x[sa[i]] = p++;
        }
    }
}

void calc(int *r, int *sa, int n)
{
    int i, j, k = 0;
    for (i = 1; i <= n; i++) rak[sa[i]] = i;
    for (i = 0; i < n; height[rak[i++]] = k)
        for (k ? k-- : 0, j = sa[rak[i] - 1]; r[i + k] == r[j + k]; k++);
    for (i = n; i; i--) {
        rak[i] = rak[i - 1];
        sa[i]++;
    }
}

void init(int n)
{
    memset(imin, 0, sizeof(imin));
    for (int i = 1; i <= n; i++) imin[i][0] = height[i];
    int l = int(log((double)n) / log(2.0));
    for (int j = 1; j <= l; j++) {
        for (int i = 1; i + (1 << (j - 1)) <= n; i++) {
            imin[i][j] = min(imin[i][j - 1], imin[i + (1 << (j - 1))][j - 1]);
        }
    }
}

int ask(int l, int r)
{
    int k = int(log(double(r - l + 1)) / log(2.0));
    return min(imin[l][k], imin[r - (1 << k) + 1][k]);
}

int lcp(int a, int b)
{
    int ra = rak[a], rb = rak[b];
    if (ra > rb) swap(ra, rb);
    return ask(ra + 1, rb);
}

int main()
{
    scanf("%d", &T);
    while (T--) {
        scanf("%d", &n);
        int b = 125;
        len = 0;
        for (int i = 1; i <= n; i++) {
            scanf("%s", s + 1);
            if (1 == i) len1 = strlen(s + 1);
            int tl = strlen(s + 1);
            for (int k = 1; k <= tl; k++) {
                cal[++len] = s[k];
                belong[len] = i;
            }
            if (i != n) {
                cal[++len] = ++b;
                belong[len] = 0;
            }
        }
        cal[len + 1] = 0;
        da(cal + 1, sa, len + 1, 50200);
        calc(cal + 1, sa, len);
        int lst = -1;
        for (int i = 1; i <= len; i++) {
            if (0 == belong[sa[i]]) continue;
            if (1 == belong[sa[i]]) lef[i] = lst;
            else lst = i;
        }
        lst = -1;
        for (int i = len; i >= 1; i--) {
            if (0 == belong[sa[i]]) continue;
            if (1 == belong[sa[i]]) rig[i] = lst;
            else lst = i;
        }
        init(len);
        int rc = INF, rp = 0;
        for (int i = 1; i <= len; i++) {
            if (1 != belong[sa[i]]) continue;
            int L = lef[i], R = rig[i], tc = 0;
            if (-1 != L) tc = max(tc, lcp(sa[i], sa[L]));
            if (-1 != R) tc = max(tc, lcp(sa[i], sa[R]));
            if (len1 - sa[i] + 1 > tc) {
                if (tc + 1 < rc) {
                    rc = tc + 1;
                    rp = sa[i];
                }
            }
        }
        printf("Case #%d: ", ++icas);
        if (INF == rc) {
            printf("Impossible
");
            continue;
        }
        for (int i = 0; i < rc; i++) printf("%c", char(cal[rp + i]));
        printf("
");
    }
    return 0;
}
View Code

10. Good Article Good sentence

题目链接:hdu - 4416

题意:给出一个串A和若干串B,问A中有多少子串未在任一B串中出现过

思路:将所有的串拼接起来,中间用一个从没出现过且不相同的字符隔开,求出后缀数组,设A串的长度为lenA,对于A串中每个后缀,lenA-sa[i]+1-height[i]就是没有限制条件下第i个后缀的贡献,设j为i后面第一个来自B串的后缀,那么加上限制条件后,每个后缀的贡献就是lenA-sa[i]+1-max(height[i],lcp(i,j)),lcp(i,j)可以通过从后向前扫一遍求出来

#include <iostream>
#include <algorithm>
#include <cstdio>
#include <cstring>
#include <cmath>

using namespace std;

const int N = 300500;
const int INF = 0x3f3f3f3f;

typedef long long ll;

int wa[N], wb[N], wv[N], wss[N];
int rak[N], height[N], cal[N], sa[N];
int T, n, len, len1, icas, lcp[N];
char s[N];

bool cmp(int *r, int a, int b, int l)
{
    return r[a] == r[b] && r[a + l] == r[b + l];
}

void da(int *r, int *sa, int n, int m)
{
    int i, j, p, *x = wa, *y = wb;
    for (i = 0; i < m; i++) wss[i] = 0;
    for (i = 0; i < n; i++) wss[x[i] = r[i]]++;
    for (i = 1; i < m; i++) wss[i] += wss[i - 1];
    for (i = n - 1; i >= 0; i--) sa[--wss[x[i]]] = i;
    for (j = 1, p = 1; p < n; j *= 2, m = p) {
        for (p = 0, i = n - j; i < n; i++) y[p++] = i;
        for (i = 0; i < n; i++)
            if (sa[i] >= j) y[p++] = sa[i] - j;
        for (i = 0; i < n; i++) wv[i] = x[y[i]];
        for (i = 0; i < m; i++) wss[i] = 0;
        for (i = 0; i < n; i++) wss[wv[i]]++;
        for (i = 1; i < m; i++) wss[i] += wss[i - 1];
        for (i = n - 1; i >= 0; i--) sa[--wss[wv[i]]] = y[i];
        for (swap(x, y), p = 1, x[sa[0]] = 0, i = 1; i < n; i++) {
            if (cmp(y, sa[i - 1], sa[i], j)) x[sa[i]] = p - 1;
            else x[sa[i]] = p++;
        }
    }
}

void calc(int *r, int *sa, int n)
{
    int i, j, k = 0;
    for (i = 1; i <= n; i++) rak[sa[i]] = i;
    for (i = 0; i < n; height[rak[i++]] = k)
        for (k ? k-- : 0, j = sa[rak[i] - 1]; r[i + k] == r[j + k]; k++);
    for (i = n; i; i--) {
        rak[i] = rak[i - 1];
        sa[i]++;
    }
}

int main()
{
    scanf("%d", &T);
    while (T--) {
        scanf("%d", &n);
        len = 0;
        int b = 125;
        for (int i = 1; i <= n + 1; i++) {
            scanf("%s", s + 1);
            if (1 == i) len1 = strlen(s + 1);
            int len2 = strlen(s + 1);
            for (int k = 1; k <= len2; k++)
                cal[++len] = s[k];
            if (i != n + 1) cal[++len] = ++b;
        }
        cal[len + 1] = 0;
        da(cal + 1, sa, len + 1, 100500);
        calc(cal + 1, sa, len);
        int rl = 0;
        for (int i = len; i >= 1; i--) {
            if (sa[i] > len1) rl = height[i];
            else {
                lcp[i] = rl;
                rl = min(rl, height[i]);
            }
        }
        ll res = 0;
        for (int i = 1; i <= len; i++) {
            if (sa[i] <= len1) {
                ll t = ll(len1 - sa[i] + 1 - max(lcp[i], height[i]));
                res += t;
            }
        }
        printf("Case %d: %lld
", ++icas, res);
    }
    return 0;
}
View Code

11. string string string

题目链接:hdu - 6194

题意:给出一个字符串,求恰好出现k次的子串个数

思路:求出后缀数组后,枚举长度为k的段,一段一段的枚举,用rmq求出每一段的最长公共前缀长度lcp,即这段有lcp个子串是满足出现k次的,但又有可能子串过短而不止出现k次,所以要减去出现次数大于k次的子串个数,所以每段对答案的贡献就是lcp-max(height[i],height[i+k]),特判k=1的情况

#include <iostream>
#include <algorithm>
#include <cstring>
#include <cstdio>
#include <cmath>

using namespace std;

typedef long long ll;

const int N = 100010;
const int M = 30;
const int INF = 0x3f3f3f3f;

int wa[N], wb[N], wv[N], wss[N];
int rak[N], height[N], cal[N], sa[N];
int T, len, rmq[N], imin[N][M];
ll k;
char s[N];

bool cmp(int *r, int a, int b, int l)
{
    return r[a] == r[b] && r[a + l] == r[b + l];
}

void da(int *r, int *sa, int n, int m)
{
    int i, j, p, *x = wa, *y = wb;
    for (i = 0; i < m; i++) wss[i] = 0;
    for (i = 0; i < n; i++) wss[x[i] = r[i]]++;
    for (i = 1; i < m; i++) wss[i] += wss[i - 1];
    for (i = n - 1; i >= 0; i--) sa[--wss[x[i]]] = i;
    for (j = 1, p = 1; p < n; j *= 2, m = p) {
        for (p = 0, i = n - j; i < n; i++) y[p++] = i;
        for (i = 0; i < n; i++)
            if (sa[i] >= j) y[p++] = sa[i] - j;
        for (i = 0; i < n; i++) wv[i] = x[y[i]];
        for (i = 0; i < m; i++) wss[i] = 0;
        for (i = 0; i < n; i++) wss[wv[i]]++;
        for (i = 1; i < m; i++) wss[i] += wss[i - 1];
        for (i = n - 1; i >= 0; i--) sa[--wss[wv[i]]] = y[i];
        for (swap(x, y), p = 1, x[sa[0]] = 0, i = 1; i < n; i++) {
            if (cmp(y, sa[i - 1], sa[i], j)) x[sa[i]] = p - 1;
            else x[sa[i]] = p++;
        }
    }
}

void calc(int *r, int *sa, int n)
{
    int i, j, k = 0;
    for (i = 1; i <= n; i++) rak[sa[i]] = i;
    for (i = 0; i < n; height[rak[i++]] = k)
        for (k ? k-- : 0, j = sa[rak[i] - 1]; r[i + k] == r[j + k]; k++);
    for (i = n; i; i--) {
        rak[i] = rak[i - 1];
        sa[i]++;
    }
}

void init(int n)
{
    for (int i = 1; i <= n; i++) imin[i][0] = height[i];
    int l = int(log((double)n) / log(2.0));
    for (int j = 1; j <= l; j++) {
        for (int i = 1; i + (1 << (j - 1)) <= n; i++) {
            imin[i][j] = min(imin[i][j - 1], imin[i + (1 << (j - 1))][j - 1]);
        }
    }
}

int ask(int l, int r)
{
    int k = int(log(double(r - l + 1)) / log(2.0));
    return min(imin[l][k], imin[r - (1 << k) + 1][k]);
}

int lcp(int a, int b)
{
    int ra = rak[a], rb = rak[b];
    if (ra > rb) swap(ra, rb);
    return ask(ra + 1, rb);
}

int main()
{
    scanf("%d", &T);
    while (T--) {
        scanf("%lld%s", &k, s + 1);
        int ls = strlen(s + 1);
        len = 0;
        for (int i = 1; i <= ls; i++) cal[++len] = s[i];
        cal[len + 1] = 0;
        da(cal + 1, sa, len + 1, 150);
        calc(cal + 1, sa, len);
        init(len);
        ll res = 0;
        if (1 == k) {
            for (int i = 1; i <= len; i++)
                res += max(0, len - sa[i] + 1 - max(height[i], i + 1 > len ? 0 : height[i + 1]));
            printf("%lld
", res);
            continue;
        }
        for (int i = 1; i + k - 1 <= len; i++) {
            int l = i, r = i + k - 1;
            int t = lcp(sa[l], sa[r]);
            res += max(0, t - max(height[l], r + 1 > len ? 0 : height[r + 1]));
        }
        printf("%lld
", res);
        for (int i = 1; i <= ls; i++) s[i] = '';
    }
    return 0;
}
View Code

12. Boring String Problem

题目链接:hdu - 5008

题意:给出一个字符串,求出第k小的子串,并求出字符串的起止位置,如果有多个重复子串,求出位置最靠左的子串

思路:求出后缀数组,后缀数组本来就是按照字典序排序的,由于height数组的去重性质,我们就可以通过二分查找来定位第k小的子串来自哪个后缀,但由于可能有多个重复的子串,所以需要暴力向后面的后缀进行查找,找到位置最靠左的子串

#include <iostream>
#include <algorithm>
#include <cstring>
#include <cstdio>

using namespace std;

const int N = 100010;

typedef long long ll;

int wa[N], wb[N], wv[N], wss[N];
int rak[N], height[N], cal[N], sa[N];
int len, q;
ll b[N];
char s[N];

bool cmp(int *r, int a, int b, int l)
{
    return r[a] == r[b] && r[a + l] == r[b + l];
}

void da(int *r, int *sa, int n, int m)
{
    int i, j, p, *x = wa, *y = wb;
    for (i = 0; i < m; i++) wss[i] = 0;
    for (i = 0; i < n; i++) wss[x[i] = r[i]]++;
    for (i = 1; i < m; i++) wss[i] += wss[i - 1];
    for (i = n - 1; i >= 0; i--) sa[--wss[x[i]]] = i;
    for (j = 1, p = 1; p < n; j *= 2, m = p) {
        for (p = 0, i = n - j; i < n; i++) y[p++] = i;
        for (i = 0; i < n; i++)
            if (sa[i] >= j) y[p++] = sa[i] - j;
        for (i = 0; i < n; i++) wv[i] = x[y[i]];
        for (i = 0; i < m; i++) wss[i] = 0;
        for (i = 0; i < n; i++) wss[wv[i]]++;
        for (i = 1; i < m; i++) wss[i] += wss[i - 1];
        for (i = n - 1; i >= 0; i--) sa[--wss[wv[i]]] = y[i];
        for (swap(x, y), p = 1, x[sa[0]] = 0, i = 1; i < n; i++) {
            if (cmp(y, sa[i - 1], sa[i], j)) x[sa[i]] = p - 1;
            else x[sa[i]] = p++;
        }
    }
}

void calc(int *r, int *sa, int n)
{
    int i, j, k = 0;
    for (i = 1; i <= n; i++) rak[sa[i]] = i;
    for (i = 0; i < n; height[rak[i++]] = k)
        for (k ? k-- : 0, j = sa[rak[i] - 1]; r[i + k] == r[j + k]; k++);
    for (i = n; i; i--) {
        rak[i] = rak[i - 1];
        sa[i]++;
    }
}

int main()
{
    while (scanf("%s", s + 1) != EOF) {
        int ls = strlen(s + 1);
        len = 0;
        for (int i = 1; i <= ls; i++) cal[++len] = s[i];
        cal[len + 1] = 0;
        da(cal + 1, sa, len + 1, 150);
        calc(cal + 1, sa, len);
        for (int i = 1; i <= len; i++)
            b[i] = b[i - 1] + len - sa[i] + 1 - height[i];
        scanf("%d", &q);
        int l = 0, r = 0;
        while (q--) {
            ll k;
            scanf("%lld", &k);
            k = (l ^ r ^ k) + 1;
            if (k > b[len]) {
                l = r = 0;
                printf("%d %d
", l, r);
                continue;
            }
            int pos = lower_bound(b + 1, b + len + 1, k) - b;
            int t = k - b[pos - 1];
            l = sa[pos], r = sa[pos] + height[pos] + t - 1;
            int rd = r - l + 1;
            while (pos + 1 <= len && height[pos + 1] >= rd) {
                pos++;
                if (sa[pos] < l) {
                    l = sa[pos];
                    r = l + rd - 1;
                }
            }
            printf("%d %d
", l, r);
        }
        for (int i = 1; i <= ls; i++) s[i] = '';
    }
    return 0;
}
View Code
原文地址:https://www.cnblogs.com/zzzzzzy/p/12616006.html