后缀数组

两篇论文:许智磊后缀数组.pdf   后缀数组——处理字符串的有力工具.pdf

贴两模版:

DA:

/*
	*后缀数组,倍增算法实现,复杂度O(nlogn)
	*sa[i]: 第i小的后缀是在字符串位置,即后缀sa[i]
	*rank[i]: 后追i在sa数组下标,即第rank[i]小
	*height[i]: LCP (suffix (sa[i-1], sa[i]))
*/
int sa[N], rank[N], height[N];
int ws[N], wa[N], wb[N];

bool cmp(int *r, int a, int b, int l) {
    return (r[a] == r[b] && r[a+l] == r[b+l]);
}
//r数组为读入的字符串,m = max (r[i]) + 1,一般字符128足够了
//n为strlen (s) + 1,加上最后一个''
void DA(char *r, int n, int m = 128) {
    int i, j, p, *x = wa, *y = wb;
    for (i=0; i<m; ++i) ws[i] = 0;
    for (i=0; i<n; ++i) ws[x[i]=r[i]]++;
    for (i=1; i<m; ++i) ws[i] += ws[i-1];
    for (i=n-1; i>=0; --i) sa[--ws[x[i]]] = i;
    for (j=1, p=1; p<n; j<<=1, m=p) {
        for (p=0, i=n-j; i<n; ++i) y[p++] = i;
        for (i=0; i<n; ++i) if (sa[i] >= j) y[p++] = sa[i] - j;
        for (i=0; i<m; ++i) ws[i] = 0;
        for (i=0; i<n; ++i) ws[x[y[i]]]++;
        for (i=1; i<m; ++i) ws[i] += ws[i-1];
        for (i=n-1; i>=0; --i) sa[--ws[x[y[i]]]] = y[i];
        std::swap (x, y);
        for (p = 1, x[sa[0]] = 0, i=1; i<n; ++i) {
            x[sa[i]] = cmp (y, sa[i-1], sa[i], j) ? p - 1 : p++;
        }
    }
}
void calc_height(char *r, int *sa, int n) {
    int i, j, k = 0;
    for (i=1; i<=n; ++i) rank[sa[i]] = i; //i: 第i小的后缀 sa[0] = n(s[n]='')
    for (i=0; i<n; ++i) { //i: 后缀i
        if (k) k--;
        j = sa[rank[i]-1];
        while (r[i+k] == r[j+k]) k++;
        height[rank[i]] = k;  //其实并没有计算height[n]
    }
}
/*
	*LCP (suffix (i), suffix (j)) = min (height[l] to height[r]); //RMQ
	*l = rank[i], r = rank[j]; if (l > r) swap (l, r); l++;
*/

DC3:

/*
    *后缀数组,DC3算法实现,复杂度O(n)
*/
int wa[N],wb[N],wv[N],ws[N];
int rank[N],height[N];   
int sa[N],r[N];

int c0(int *y,int a,int b) {
    return y[a]==y[b]&&y[a+1]==y[b+1]&&y[a+2]==y[b+2];
}
int c12(int k,int *y,int a,int b) {
    if(k==2) return y[a]<y[b]||y[a]==y[b]&&c12(1,y,a+1,b+1);
    else return y[a]<y[b]||y[a]==y[b]&&wv[a+1]<wv[b+1];
}
void sort(int *r,int *a,int *b,int n,int m) {
    int i;
    for(i=0;i<n;i++) wv[i]=r[a[i]];
    for(i=0;i<m;i++) ws[i]=0;
    for(i=0;i<n;i++) ws[wv[i]]++;
    for(i=1;i<m;i++) ws[i]+=ws[i-1];
    for(i=n-1;i>=0;i--) b[--ws[wv[i]]]=a[i];
}
void DC3(int *r,int *sa,int n,int m) {
    int i,j,*rn=r+n,*san=sa+n,ta=0,tb=(n+1)/3,tbc=0,p;
    r[n]=r[n+1]=0;
    for(i=0;i<n;i++) if(i%3!=0) wa[tbc++]=i;
    sort(r+2,wa,wb,tbc,m);
    sort(r+1,wb,wa,tbc,m);
    sort(r,wa,wb,tbc,m);
    for(p=1,rn[F(wb[0])]=0,i=1;i<tbc;i++)
        rn[F(wb[i])]=c0(r,wb[i-1],wb[i])?p-1:p++;
    if(p<tbc) dc3(rn,san,tbc,p);
    else for(i=0;i<tbc;i++) san[rn[i]]=i;
    for(i=0;i<tbc;i++) if(san[i]<tb) wb[ta++]=san[i]*3;
    if(n%3==1) wb[ta++]=n-1;
    sort(r,wb,wa,ta,m);
    for(i=0;i<tbc;i++) wv[wb[i]=G(san[i])]=i;
    for(i=0,j=0,p=0;i<ta && j<tbc;p++)
        sa[p]=c12(wb[j]%3,r,wa[i],wb[j])?wa[i++]:wb[j++];
    for(;i<ta;p++) sa[p]=wa[i++];
    for(;j<tbc;p++) sa[p]=wb[j++];
}
void calc_height(int *r,int *sa,int n) {
    int i,j,k=0;
    for(i=1;i<=n;i++) rank[sa[i]]=i;
    for(i=0;i<n;height[rank[i++]]=k)
        for(k?k--:0,j=sa[rank[i]-1];r[i+k]==r[j+k];k++);
}

  

原文地址:https://www.cnblogs.com/Running-Time/p/5450483.html