POJ 1226 Substrings (后缀数组)

Substrings

Time Limit: 1000MS		Memory Limit: 10000K
Total Submissions: 10244		Accepted: 3515

Description

You are given a number of case-sensitive strings of alphabetic characters, find the largest string X, such that either X, or its inverse can be found as a substring of any of the given strings.

Input

The first line of the input contains a single integer t (1 <= t <= 10), the number of test cases, followed by the input data for each test case. The first line of each test case contains a single integer n (1 <= n <= 100), the number of given strings, followed by n lines, each representing one string of minimum length 1 and maximum length 100. There is no extra white space before and after a string.

Output

There should be one line per test case containing the length of the largest string found.

Sample Input

2
3
ABCD
BCDFF
BRCD
2
rose
orchid

Sample Output

2
2

Source

Tehran 2002 Preliminary

#include<cstdio>
#include<cstring>

using namespace std;

const int maxn=20200;

int wa[maxn],wb[maxn],wv[maxn], ws[maxn];

int cmp(int *r,int a,int b,int l){
    return r[a]==r[b] && r[a+l]==r[b+l];
}

void da(int *r,int *sa,int n,int m){
    int i,j,p,*x=wa,*y=wb,*t;
    for(i=0;i<m;i++)    ws[i]=0;
    for(i=0;i<n;i++)    ws[x[i]=r[i]]++;
    for(i=1;i<m;i++)    ws[i]+=ws[i-1];
    for(i=n-1;i>=0;i--)     sa[--ws[x[i]]]=i;
    for(j=1,p=1;p<n;j<<=1,m=p){
        for(p=0,i=n-j;i<n;i++)  y[p++]=i;
        for(i=0;i<n;i++)    if(sa[i]>=j)    y[p++]=sa[i]-j;
        for(i=0;i<n;i++)    wv[i]=x[y[i]];
        for(i=0;i<m;i++)    ws[i]=0;
        for(i=0;i<n;i++)    ws[wv[i]]++;
        for(i=1;i<m;i++)    ws[i]+=ws[i-1];
        for(i=n-1;i>=0;i--)     sa[--ws[wv[i]]]=y[i];
        for(t=x,x=y,y=t,p=1,x[sa[0]]=0,i=1;i<n;i++)
            x[sa[i]]=cmp(y,sa[i-1],sa[i],j)?p-1:p++;
    }
}

int rank[maxn],height[maxn];

void calheight(int *r,int *sa,int n){
    int i,j,k=0;
    for(i=1;i<=n;i++)   rank[sa[i]]=i;
    for(i=0;i<n;height[rank[i++]]=k)
        for(k?k--:0,j=sa[rank[i]-1];r[i+k]==r[j+k];k++) ;
}

int r[maxn],sa[maxn];
int who[maxn],yes[110]={0},cnt=0;
int len,n;

int check(int mid){
    int i,j,k,t,s;
    for(i=2;i<=len;i=j+1){
        for(;height[i]<mid && i<=len;i++) ;
        for(j=i;height[j]>=mid;j++) ;
        if(j-i+1<n)
            continue;
        cnt++;
        s=0;
        for(k=i-1;k<j;k++)
            if((t=who[sa[k]])!=0 && yes[t]!=cnt)
                yes[t]=cnt,s++;
        if(s>=n)
            return 1;
    }
    return 0;
}

char st[110];
int main()
{
    //freopen("input.txt","r",stdin);

    int i,j,k,min,mid,max,nn;
    scanf("%d",&nn);
    while(nn-->0)
    {
      scanf("%d",&n);
      len=0;
      for(i=1;i<=n;i++)
      {
        scanf("%s",st);
        k=strlen(st);
        for(j=0;j<k;j++)
        {
          r[j+len]=st[j]+200;
          who[j+len]=i;
        }
        r[len+k]=2*i-1;
        who[len+k]=0;
        len+=k+1;
        for(j=0;j<k;j++)
        {
          r[j+len]=st[k-1-j]+200;
          who[j+len]=i;
        }
        r[len+k]=2*i;
        who[len+k]=0;
        len+=k+1;
      }
      len--;
      r[len]=0;
      da(r,sa,len+1,328);
      calheight(r,sa,len);
      height[len+1]=-1;
      min=1;max=100;
      while(min<=max)
      {
        mid=(min+max)>>1;
        if(check(mid)) min=mid+1;
        else max=mid-1;
      }
      if(n==1) max=len/2;
      printf("%d\n",max);
    }
    return 0;
}

2,标准库函数strncpy()，可以将一字符串的一部分拷贝到另一个字符串中。strncpy()函数有3个参数：第一个参数是目录字符串；第二个参数是源字符串；第三个参数是一个整数，代表要从源字符串拷贝到目标字符串中的字符数

题意：给定n个串，求一个最大子串长度，使得它或者它的逆向串在每个串中出现。

思路：先求出最短的串sho[]，然后枚举答案长度ans（len~1）。于是sho[]就被分成了len-ans+1个子串pos[],再分别求出这些字串的反串inv[],判断pos[]和inv[]是否都出现在所有的串中。

#include<iostream>
#include<cstdio>
#include<cstring>

using namespace std;

int n;
char str[110][110],sho[110];

int main(){

    //freopen("input.txt","r",stdin);

    int t;
    scanf("%d",&t);
    while(t--){
        scanf("%d",&n);
        int len=110,k;
        for(int i=0;i<n;i++){
            scanf("%s",str[i]);
            int tmp=strlen(str[i]);
            if(tmp<len){
                len=tmp;
                k=i;    // 找出最短的串。
            }
        }
        strcpy(sho,str[k]);
        int ans=len,flag=0;
        char pos[110],inv[110];
        while(ans>0){
            for(int i=0;i<=len-ans;i++){
                strncpy(pos,sho+i,ans); // 求出正序子串pos[]。 
                for(int j=0;j<ans;j++)
                    inv[j]=pos[ans-1-j];    // 求出反序子串inv[]。
                pos[ans]=inv[ans]='\0';
                flag=1;
                for(int j=0;j<n;j++)
                    if(!strstr(str[j],pos) && !strstr(str[j],inv)){ // 判断pos[]，inv[]是否都出现在n个串中。运用strstr函数。
                        flag=0;
                        break;
                    }
                if(flag)
                    break;
            }
            if(flag)
                break;
            else
                ans--;
        }
        printf("%d\n",ans);
    }
    return 0;
}

3,KMP

/*
 * 1226 KMP
 *
 * 用的KMP， O(n^3)
 * 由于数据规模小， 直接暴力枚举最短串的各个子串再依次与其他串KMP应该也能过 (复杂度也是O(n^3))
 *
 * 这里不直接枚举各个子串， 而是枚举子串的起点位置， 依次与其他串KMP， 记录和每个串及其反串的最大匹配长度，
 *     再以此计算这些值的最小值， 就得到该起点的子串所能达到的最大匹配长度(与刚才的方法其实差不多)
 *
 */

#include <cstdio>
#include <cstring>
using namespace std;

const int maxN = 100 + 5;
const int maxL = 100 + 5;
//minLen, minNum : 最短串
int caseNum, n, minLen, minNum;
int next[maxL];
//串 反串
char str[maxN][maxL], restr[maxN][maxL];


void reverseString(int i, int len){
    for(int j=0; j<len; j++)
        restr[i][j] = str[i][len - j -1];
    restr[i][len] = 0;
}

//以start为起点的模式
void preKMP(int start){
    int j, k;
    next[start] = start - 1;
    j = start, k = start - 1;
    while(j < minLen - 1){
        while(k > start - 1 && str[minNum][j] != str[minNum][k]){
            k = next[k];
        }
        j++, k++;
        if(str[minNum][j] == str[minNum][k])
            next[j] = next[k];
        else
            next[j] = k;
    }
}


int KMP(int t, int start){
    int i, j, ansMax, tmpMax = start;
    int tLen = strlen(str[t]);

    i = 0; j = start;
    while(i < tLen && j < minLen){

        while(j >= start && str[t][i] != str[minNum][j]){
            j = next[j];
        }
        i++, j++;
        //记录最大的j
        if(tmpMax < j)
            tmpMax = j;
    }
    ansMax = tmpMax - start;
    tmpMax = start;
    i = 0; j = start;
    while(i < tLen && j <minLen){

        while(j >= start && restr[t][i] != str[minNum][j]){
            j = next[j];
        }
        i++, j++;
        if(tmpMax < j)
            tmpMax = j;
    }
    //串与反串的最大j
    if(ansMax < tmpMax - start)
        ansMax = tmpMax - start;

    return ansMax;
}


int main(){
    scanf("%d", &caseNum);
    while(caseNum--){
        scanf("%d", &n);

        //注意n == 1 ， 为此WA了两次！！
        if(n == 1){
            scanf("%s", str[0]);
            printf("%d\n", strlen(str[0]));
            continue;
        }

        minLen = maxL;
        for(int i=0; i<n; i++){
            scanf("%s", str[i]);
            int len = strlen(str[i]);
            reverseString(i, len);

            if(len < minLen){
                minLen = len;
                minNum = i;
            }
        }

        int tmpMin = maxL, tmp, tmpMax = -1;
        //枚举起点
        for(int i=0; i<minLen; i++){
            preKMP(i);
            tmpMin = maxL;
            for(int j=0; j<n; j++){
                if(j == minNum) continue;

                tmp = KMP(j, i);
                if(tmp < tmpMin)
                    tmpMin = tmp;
            }
            
            if(tmpMax < tmpMin)
                tmpMax = tmpMin;
        }
        printf("%d\n", tmpMax);

    }

    return 0;
}