SPOJ PHRASES 后缀数组

题目链接:http://www.spoj.com/problems/PHRASES/en/

题意:给定n个字符串,求一个最长的子串至少在每个串中的不重叠出现次数都不小于2。输出满足条件的最长子串长度

思路:根据<<后缀数组——处理字符串的有力工具>>的思路,先将 n个字符串连起来, 中间用不相同的且没有出现在字符串中的字符隔开, 求后缀数组。 然后二分答案, 再将后缀分组。判断的时候, 要看是否有一组后缀在每个原来的字符串中至少出现两次, 并且在每个原来的字符串中, 后缀的起始位置的最大值与最小值之差是否不小于当前答案(判断能否做到不重叠, 如果題目中没有不重叠的要求, 那么不用做此判断) 。这个做法的时间复杂度为 0(nlogn) 。

#define _CRT_SECURE_NO_DEPRECATE
#include<iostream>
#include<cstdio>
#include<cstring>
#include<algorithm>
#include<string>
#include<queue>
#include<vector>
#include<time.h>
#include<cmath>
#include<set>
using namespace std;
typedef long long int LL;
const int MAXN = 10000 * 10 * 2;
int wa[MAXN], wb[MAXN], wv[MAXN], WS[MAXN];
int cmp(int *r, int a, int b, int l)
{
    return r[a] == r[b] && r[a + l] == r[b + l];
}
void da(int *r, int *sa, int n, int m)
{
    int i, j, p, *x = wa, *y = wb, *t;
    for (i = 0; i < m; i++) WS[i] = 0;
    for (i = 0; i < n; i++) WS[x[i] = r[i]]++;
    for (i = 1; i < m; i++) WS[i] += WS[i - 1];
    for (i = n - 1; i >= 0; i--) sa[--WS[x[i]]] = i;
    for (j = 1, p = 1; p < n; j *= 2, m = p)
    {
        for (p = 0, i = n - j; i < n; i++) y[p++] = i;
        for (i = 0; i < n; i++) if (sa[i] >= j) y[p++] = sa[i] - j;
        for (i = 0; i < n; i++) wv[i] = x[y[i]];
        for (i = 0; i < m; i++) WS[i] = 0;
        for (i = 0; i < n; i++) WS[wv[i]]++;
        for (i = 1; i < m; i++) WS[i] += WS[i - 1];
        for (i = n - 1; i >= 0; i--) sa[--WS[wv[i]]] = y[i];
        for (t = x, x = y, y = t, p = 1, x[sa[0]] = 0, i = 1; i < n; i++)
            x[sa[i]] = cmp(y, sa[i - 1], sa[i], j) ? p - 1 : p++;
    }
    return;
}
int Rank[MAXN], height[MAXN], sa[MAXN];
void calheight(int *r, int *sa, int n){
    int i, j, k = 0;
    for (i = 1; i <= n; i++) { Rank[sa[i]] = i; }
    for (i = 0; i < n; height[Rank[i++]] = k){
        for (k ? k-- : 0, j = sa[Rank[i] - 1]; r[i + k] == r[j + k]; k++);
    }
    return;
}
int r[MAXN], len, n, t, Index[MAXN],vis[MAXN];
char sub[10000+5];
struct Node{
    int cnt,maxsa, minsa;
    void init(){cnt = 0, maxsa = -1, minsa = MAXN;}
}node[20];
bool check(int x){
    int tot = 0,idx,Lidx;
    for (int i = 1; i <= n; i++){
        node[i].init();
    }
    memset(vis, 0, sizeof(vis));
    for (int i = 1; i < len; i++){
        //heigth[i]是sa[i]和sa[i-1]的LCP
        idx = Index[sa[i]], Lidx = Index[sa[i - 1]];
        if (i == len - 1){
            for (int k = 1; k <= n; k++){
                //判断每个字符串的出现次数和后缀的起始位置的最大值和最小值的差是否不小于x
                if (node[k].cnt >= 2 && node[k].maxsa - node[k].minsa >= x){
                    tot++;
                }
                node[k].init();
            }
            if (tot == n){ return true; }//n个串都满足要求,说明长度x存在
            tot = 0;
            break;
        }
        if (height[i] >= x){
            if (!vis[i]){//每个后缀只算一次
                vis[i] = 1; node[idx].cnt++; //记录后缀在该组出现的次数
                node[idx].maxsa = max(node[idx].maxsa, sa[i]);//最大值
                node[idx].minsa = min(node[idx].minsa, sa[i]);//最小值
            }
            if (!vis[i-1]){
                vis[i - 1] = 1; node[Lidx].cnt++;
                node[Lidx].maxsa = max(node[Lidx].maxsa, sa[i-1]);
                node[Lidx].minsa = min(node[Lidx].minsa, sa[i-1]);
            }
        }
        else{
            for (int k = 1; k <= n; k++){ 
                //判断每个字符串的出现次数和后缀的起始位置的最大值和最小值的差是否不小于x
                if (node[k].cnt >= 2&&node[k].maxsa-node[k].minsa>=x){
                    tot++;
                }
                node[k].init();
            }
            if (tot == n){ return true;} //n个串都满足要求,说明长度x存在
            tot = 0;
        }
    }
    return false;
}
void solve(){
    int L = 1, R = 10000/2, mid, ans = 0;
    while (R >= L){
        mid = (L + R) / 2;
        if (check(mid)){
            ans = mid;
            L = mid + 1;
        }
        else{
            R = mid - 1;
        }
    }
    printf("%d
", ans);
}
int main(){
//#ifdef kirito
//    freopen("in.txt", "r", stdin);
//    freopen("out.txt", "w", stdout);
//#endif
//    int start = clock();
    scanf("%d", &t);
    while (t--){
        scanf("%d", &n); len = 0;
        for (int i = 1, val = 0; i <= n; i++, val++){
            scanf("%s", &sub);
            for (int j = 0; j < strlen(sub); j++){
                Index[len] = i; //记录每个拼接后每个位置属于原输入的哪个
                r[len++] = (sub[j] - 'a' + n + 1);
            }
            Index[len] = i;
            r[len++] = val;
        }
        da(r, sa, len, 128);
        calheight(r, sa, len - 1);
        solve();
    }
//#ifdef LOCAL_TIME
//    cout << "[Finished in " << clock() - start << " ms]" << endl;
//#endif
    return 0;
}
原文地址:https://www.cnblogs.com/kirito520/p/5774584.html