poj3415 Common Substrings

Common Substrings
Time Limit: 5000MS   Memory Limit: 65536K
Total Submissions: 12378   Accepted: 4172

Description

A substring of a string T is defined as:

T(ik)=TiTi+1...Ti+k-1, 1≤ii+k-1≤|T|.

Given two strings AB and one integer K, we define S, a set of triples (ijk):

S = {(ijk) | kKA(ik)=B(jk)}.

You are to give the value of |S| for specific AB and K.

Input

The input file contains several blocks of data. For each block, the first line contains one integer K, followed by two lines containing strings A and B, respectively. The input file is ended by K=0.

1 ≤ |A|, |B| ≤ 105
1 ≤ K ≤ min{|A|, |B|}
Characters of A and B are all Latin letters.

Output

For each case, output an integer |S|.

Sample Input

2
aababaa
abaabaa
1
xx
xx
0

Sample Output

22
5

Source

题目大意:求在两个字符串中都出现的并且长度≥K的子串个数(位置不同也算不同).
分析:做法太神了.
   这道题显然可以用后缀数组来做. 对于多个字符串的问题,先把它们拼在一起,求出ht,sa,rk数组. 然后考虑怎么统计.
   这道题统计答案并不只是统计相邻的后缀,既然能够不相邻,那么就要分组. 分别求出每一组的贡献就好了.
   假设后缀A,B在同一组里.那么A,B的贡献就是LCP - k + 1. 要求出在同一组中,对于所有的A,它前面的B与它的贡献和  ,对于B也要求一次. 如果两个后缀是相邻的就好处理了,不相邻的话求LCP就要用RMQ. 每次枚举两个端点复杂度太高,怎么办呢?
   一个非常神奇的做法是利用单调栈,A,B分开处理.单调栈维护ht数组.变量tot动态维护答案,如果当前枚举到的后缀在字符串B中,那么就tot就统计进答案里. cnt维护当前元素本身和它弹掉的元素的个数(包括弹掉的元素 本身弹掉的,实际上就是把许多元素绑定在了一起,因为rmq要取最小的限制). 每次弹出元素的时候把弹掉的元素和当前元素绑定在一起,并维护tot就好了.这样就保证了其中的ht是最小的(满足rmq),还能顺便求和.
   真是一道好题!这种单调栈的利用方法要记住了.
#include <cstdio>
#include <cstring>
#include <iostream>
#include <algorithm>

using namespace std;

const int maxn = 200010;
char s1[maxn],s2[maxn],s[maxn];
int len1,len2,len,sett[maxn],a[maxn],cnt,fir[maxn],sec[maxn],tong[maxn],pos[maxn];
int rk[maxn],sa[maxn],ht[maxn],poss,K,sta[maxn][2];
long long ans,tot,top;

void solve()
{
    memset(rk,0,sizeof(rk));
    memset(sa,0,sizeof(sa));
    memset(ht,0,sizeof(ht));
    memset(fir,0,sizeof(fir));
    memset(sec,0,sizeof(sec));
    memset(pos,0,sizeof(pos));
    memset(tong,0,sizeof(tong));
    copy(s + 1,s + len + 1,sett + 1);
    sort(sett + 1,sett + 1 + len);
    cnt = unique(sett + 1,sett + 1 + len) - sett - 1;
    for (int i = 1; i <= len; i++)
        a[i] = lower_bound(sett + 1,sett + 1 + cnt,s[i]) - sett;
    for (int i = 1; i <= len; i++)
        tong[a[i]]++;
    for (int i = 1; i <= len; i++)
        tong[i] += tong[i - 1];
    for (int i = 1; i <= len; i++)
        rk[i] = tong[a[i] - 1] + 1;
    for (int t = 1; t <= len; t *= 2)
    {
        for (int i = 1; i <= len; i++)
            fir[i] = rk[i];
        for (int i = 1; i <= len; i++)
        {
            if (i + t > len)
                sec[i] = 0;
            else
                sec[i] = rk[i + t];
        }
        fill(tong,tong + 1 + len,0);
        for (int i = 1; i <= len; i++)
            tong[sec[i]]++;
        for (int i = 1; i <= len; i++)
            tong[i] += tong[i - 1];
        for (int i = 1; i <= len; i++)
            pos[len - --tong[sec[i]]] = i;
        fill(tong,tong + 1 + len,0);
        for (int i = 1; i <= len; i++)
            tong[fir[i]]++;
        for (int i = 1; i <= len; i++)
            tong[i] += tong[i - 1];
        for (int i = 1; i <= len; i++)
        {
            int temp = pos[i];
            sa[tong[fir[temp]]--] = temp;
        }
        bool flag = true;
        int last = 0;
        for (int i = 1; i <= len; i++)
        {
            int temp = sa[i];
            if (!last)
                rk[temp] = 1;
            else if (fir[temp] == fir[last] && sec[temp] == sec[last])
            {
                rk[temp] = rk[last];
                flag = false;
            }
            else
                rk[temp] = rk[last] + 1;
            last = temp;
        }
        if (flag)
            break;
    }
    int k = 0;
    for (int i = 1; i <= len; i++)
    {
        if (rk[i] == 1)
            k = 0;
        else
        {
            if (k)
                k--;
            int j = sa[rk[i] - 1];
            while (i + k <= len && j + k <= len && a[i + k] == a[j + k])
                k++;
        }
        ht[rk[i]] = k;
    }
}

void solve1()
{
    top = ans = tot = 0;
    for (int i = 2; i <= len; i++)
    {
        if (ht[i] < K)
        {
            top = tot = 0;
            continue;
        }
        cnt = 0;
        if (sa[i - 1] < poss)
        {
            cnt++;
            tot += ht[i] - K + 1;
        }
        while (top && ht[i] <= sta[top][0])
        {
            tot += (ht[i] - sta[top][0]) * sta[top][1];
            cnt += sta[top][1];
            top--;
        }
        sta[++top][0] = ht[i];
        sta[top][1] = cnt;
        if (sa[i] > poss)
            ans += tot;
    }
}

void solve2()
{
    top = tot = 0;
    for (int i = 2; i <= len; i++)
    {
        if (ht[i] < K)
        {
            top = tot = 0;
            continue;
        }
        cnt = 0;
        if (sa[i - 1] > poss)
        {
            cnt++;
            tot += ht[i] - K + 1;
        }
        while (top && ht[i] <= sta[top][0])
        {
            tot += sta[top][1] * (ht[i] - sta[top][0]);
            cnt += sta[top][1];
            top--;
        }
        sta[++top][0] = ht[i];
        sta[top][1] = cnt;
        if (sa[i] < poss)
            ans += tot;
    }
}

int main()
{
    while (scanf("%d",&K) != EOF && K)
    {
        scanf("%s",s1 + 1);
        len = ans = 0;
        len1 = strlen(s1 + 1);
        scanf("%s",s2 + 1);
        len2 = strlen(s2 + 1);
        for (int i = 1; i <= len1; i++)
            s[++len] = s1[i];
        s[++len] = '&';
        poss = len;
        for (int i = 1; i <= len2; i++)
            s[++len] = s2[i];
        solve();
        solve1();
        solve2();
        printf("%lld
",ans);
    }

    return 0;
}
   
原文地址:https://www.cnblogs.com/zbtrs/p/8547108.html