HDU 6704 K-th occurrence(后缀数组,主席树,st表,二分)

题目链接

题目大意

  给你一个字符串,问substr(l,r)第k次出现的第一个字符的下标。

解题思路

  对于所有满足条件的子串,以其首字母开头的所有后缀的lcp一定都是大于等于这个子串长度的,根据lcp的性质,(lcp(i, j) = min(lcp(k_1, k_2), i leq k_1,k_2 leq j)。我们可以得到如果将所有子串按字典序排序,则所有满足条件的后缀必定是一个连续区间,那么第k次出现的下标就是这个区间第k小的数。
  具体写法是,我们可以先求出后缀数组,然后按排名将sa[i]插入主席树中,并且用st表预处理出height数组的区间最小值。对于每个询问,我们可以知道其所在的后缀的排名t,然后我们再用st表二分出其所在的lcp大于等于r-l+1的区间的左右端点L,R,最后用主席树求出区间第k小即可。

代码

const int maxn = 1e5+10;
char s[maxn];
int n, m;
int sa[maxn], x[maxn], y[maxn], c[maxn];
void get_sa() {
    for (int i = 1; i<=n; ++i) ++c[x[i]=s[i]];
    for (int i = 2; i<=m; ++i) c[i] += c[i-1];
    for (int i = n; i; --i) sa[c[x[i]]--] = i;
    for (int k = 1; k<=n; k<<=1) {
        int num = 0;
        for (int i = n-k+1; i<=n; ++i) y[++num] = i;
        for (int i = 1; i<=n; ++i)
            if (sa[i]>k) y[++num] = sa[i]-k;
        for (int i = 1; i<=m; ++i) c[i] = 0;
        for (int i = 1; i<=n; ++i) ++c[x[i]];
        for (int i = 2; i<=m; ++i) c[i] += c[i-1];
        for (int i = n; i; --i) sa[c[x[y[i]]]--] = y[i], y[i] = 0;
        swap(x, y);
        x[sa[1]] = 1, num = 1;
        for (int i = 2; i<=n; ++i) 
            x[sa[i]] = (y[sa[i]]==y[sa[i-1]] && y[sa[i]+k]==y[sa[i-1]+k]) ? num:++num;
        if (num==n) break;
        m = num;
    }
}
int rk[maxn], h[maxn];
void get_h() {
    for (int i = 1; i<=n; ++i) rk[sa[i]] = i;
    for (int i = 1, k = 0; i<=n; ++i) {
        if (rk[i]==1) continue;
        if (k) --k;
        int j = sa[rk[i]-1];
        while(i+k<=n && j+k<=n && s[i+k]==s[j+k]) ++k;
        h[rk[i]] = k;
    }
}
int f[maxn][20], lg[maxn];
void build() {
    for (int i = n; i; --i) {
        f[i][0] = h[i];
        for (int j = 1; j<21; ++j)
            if (i+(1<<j-1)<=n) f[i][j] = min(f[i][j-1], f[i+(1<<j-1)][j-1]);
    }
    for (int i = 2; i<=n; ++i) lg[i] = lg[i/2]+1;
}
int query(int l, int r) {
    if (l>r) return INF;
    int len = r-l+1;
    int k = lg[len];
    return min(f[l][k], f[r-(1<<k)+1][k]);
}
struct Node {
    int l, r, sum;
} hjt[maxn*40];
int tot, rt[maxn];
void init() {
    tot = 0; 
    clr(c, 0); clr(h, 0); 
}
void insert(int pre, int &now, int l, int r, int pos) {
    now = ++tot;
    hjt[now] = hjt[pre];
    ++hjt[now].sum;
    int mid = (l+r)>>1;
    if (l==r) return;
    if (pos<=mid) insert(hjt[pre].l, hjt[now].l, l, mid, pos);
    else insert(hjt[pre].r, hjt[now].r, mid+1, r, pos);
}
int ask(int pre, int now, int l, int r, int k) {
    if (l==r) return l;
    int mid = (l+r)>>1;
    int t = hjt[hjt[now].l].sum-hjt[hjt[pre].l].sum;
    if (k<=t) return ask(hjt[pre].l, hjt[now].l, l, mid, k);
    else return ask(hjt[pre].r, hjt[now].r, mid+1, r, k-t);
}
int main() {
    IOS;
    int __; cin >> __;
    while(__--) {
        init();
        int q; cin >> n >> q;
        cin >> s+1;
        m = 122; 
        get_sa(); get_h(); build();
        //for (int i = 1; i<=n; ++i) cout << h[i] << ' ' << rk[i] << endl;
        for (int i = 1; i<=n; ++i) insert(rt[i-1], rt[i], 1, n, sa[i]);
        while(q--) {
            int ql, qr, k; cin >> ql >> qr >> k;
            //cout << ql << endl;
            int len = qr-ql+1;
            int t = rk[ql];
            int L = 1, R = t-1;
            while(L<R) {
                int mid = (L+R)>>1;
                if (query(mid+1, t)>=len) R = mid;
                else L = mid+1;
            }
            int l = L;
            if (query(L+1, t)<len) ++l;
            L = t+1, R = n;
            while(L<R) {
                int mid = (L+R+1)>>1;
                if (query(t+1, mid)>=len) L = mid;
                else R = mid-1;
            }
            int r = R;
            if (query(t+1, r)<len) --r;
            if (l>r && k==1) {
                cout << ql << endl;
                continue;
            }
            if (r-l+1<k) cout << -1 << endl;
            else cout << ask(rt[l-1], rt[r], 1, n, k) << endl;
        }
    }
    return 0; 
}
原文地址:https://www.cnblogs.com/shuitiangong/p/15237591.html