后缀数组---New Distinct Substrings

Description

Given a string, we need to find the total number of its distinct substrings.

Input

T- number of test cases. T<=20; Each test case consists of one string, whose length is <= 50000

Output

For each test case output one number saying the number of distinct substrings.

Example

Input:
2
CCCCC
ABABA

Output:
5
9

题意:给一个字符串长小于50000,求这个字符串的不同子串的个数;

思路:使用后缀数组算法先求出sa[],sa[i]表示排第i的后缀的开始位置下标,然后求出height[]数组,height[i]表示排名第i的后缀和排名第i-1的后缀的最大公共前缀的长度,容易知道排名i和i-1的串的子串重复个数为height[i]个,而原始串的子串个数为sum=n*(n+1)/2,故用sum减去所有的henght[]即为结果;
#include <iostream>
#include <algorithm>
#include <cstdio>
#include <cstring>
#define rep(i,n) for(int i = 0;i < n; i++)
using namespace std;
const int size=50005,INF=1<<30;
int rk[size],sa[size],height[size],w[size],wa[size],res[size];
void getSa (int len,int up) {
    int *k = rk,*id = height,*r = res, *cnt = wa;
    rep(i,up) cnt[i] = 0;
    rep(i,len) cnt[k[i] = w[i]]++;
    rep(i,up) cnt[i+1] += cnt[i];
    for(int i = len - 1; i >= 0; i--) {
        sa[--cnt[k[i]]] = i;
    }
    int d = 1,p = 0;
    while(p < len){
        for(int i = len - d; i < len; i++) id[p++] = i;
        rep(i,len)  if(sa[i] >= d) id[p++] = sa[i] - d;
        rep(i,len) r[i] = k[id[i]];
        rep(i,up) cnt[i] = 0;
        rep(i,len) cnt[r[i]]++;
        rep(i,up) cnt[i+1] += cnt[i];
        for(int i = len - 1; i >= 0; i--) {
            sa[--cnt[r[i]]] = id[i];
        }
        swap(k,r);
        p = 0;
        k[sa[0]] = p++;
        rep(i,len-1) {
            if(sa[i]+d < len && sa[i+1]+d <len &&r[sa[i]] == r[sa[i+1]]&& r[sa[i]+d] == r[sa[i+1]+d])
                k[sa[i+1]] = p - 1;
            else k[sa[i+1]] = p++;
        }
        if(p >= len) return ;
        d *= 2,up = p, p = 0;
    }
}

void getHeight(int len) {
    rep(i,len) rk[sa[i]] = i;
    height[0] =  0;
    for(int i = 0,p = 0; i < len - 1; i++) {
        int j = sa[rk[i]-1];
        while(i+p < len&& j+p < len&& w[i+p] == w[j+p]) {
            p++;
        }
        height[rk[i]] = p;
        p = max(0,p - 1);
    }
}

int getSuffix(char s[]) {
    int len = strlen(s),up = 0;
    for(int i = 0; i < len; i++) {
        w[i] = s[i];
        up = max(up,w[i]);
    }
    w[len++] = 0;
    getSa(len,up+1);
    getHeight(len);
    return len;
}

int main()
{
    int T;
    long long sum;
    char s[size];
    scanf("%d",&T);
    while(T--)
    {
        scanf("%s",s);
        sum=0;
        getSuffix(s);
        long long len=strlen(s);
        for(int i=0;i<=len;i++)
          sum+=height[i];
          sum=len*(len+1)/2-sum;
        printf("%lld
",sum);
    }
}
原文地址:https://www.cnblogs.com/chen9510/p/5471342.html