[TJOI 2017] DNA

\(\frak{Description}\)

\(\text{Link.}\)

\(\frak{Solution}\)

\(\text{Sol 1.}\) 后缀数组

\(S_0\) 的长度为 \(l_1\)\(S\)\(l_2\).

考虑到只允许失配 \(3\) 个碱基,我们考虑直接枚举 + 匹配。假设枚举的子串的开头为 \(i\),匹配了 \(j\) 位,我们现在要匹配 \(S_0\)\(i+j\) 位与 \(S\)\(j+1\) 位。

我们利用后缀数组跳跃。若两者的 \(\rm lcp\)\(k\),那么第 \(i+j+k\)\(j+1+k\) 一定是不匹配的。越过不匹配的点,将可以更改的次数减去一。复杂度是 \(\mathcal O(n\log n)\) 的。

\(\text{Sol 2.}\) \(\mathtt{FFT}\)

只有四种碱基,可以考虑枚举每一种碱基,假设枚举碱基 \(\rm C\),就令:

\[f(i)=[S_0(i)=\text{C}]\\ g(i)=[S(i)=\text{C}] \]

那么对于从 \(p\) 开始匹配的字符串,碱基 \(\rm C\) 匹配的个数就是:

\[\sum_{i=0}^{m-1} f(p+i)\cdot g(i) \]

不妨令 \(h(i)=g(m-i-1)\),就有:

\[\text{Ans}_p=\sum_{i+j=p+m-1} f(i)\cdot h(j) \]

\(\frak{Code}\)

\(\text{Sol 1.}\) 后缀数组

#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <iostream>
using namespace std;

const int N = 2e5 + 3;

int lg[N], T, rmq[N][20], n, m, h[N], tax[N], SA[N], tp[N], rk[N], a[N];

int read() {
	int x = 0, f = 1; char S;
	while((S = getchar()) > '9' || S < '0') {
		if(S == '-') f = -1;
		if(S == EOF) exit(0);
	}
	while(S <= '9' && S >= '0') {
		x = (x << 1) + (x << 3) + (S ^ 48);
		S = getchar();
	}
	return x * f;
}

bool cmp(const int x, const int y, const int d) {return tp[x] == tp[y] && tp[x + d] == tp[y + d];}

void Sort() {
    for(int i = 0; i <= m; ++ i) tax[i] = 0;
    for(int i = 1; i <= n; ++ i) ++ tax[rk[tp[i]]];
    for(int i = 1; i <= m; ++ i) tax[i] += tax[i - 1];
    for(int i = n; i >= 1; -- i) SA[tax[rk[tp[i]]] --] = tp[i];
}

void Swap(int &x, int &y) {
    x ^= y ^= x ^= y;
}

int Min(const int x, const int y) {
    if(x < y) return x;
    return y;
}

void Suffix() {
    for(int i = 1; i <= n; ++ i) rk[i] = a[i], tp[i] = i;
    m = 100; Sort();
    for(int w = 1, p = 1, i; p < n; m = p, w <<= 1) {
        for(p = 0, i = n - w + 1; i <= n; ++ i) tp[++ p] = i;
        for(i = 1; i <= n; ++ i) if(SA[i] > w) tp[++ p] = SA[i] - w;
        Sort(); swap(rk, tp); rk[SA[1]] = p = 1;
        for(i = 2; i <= n; ++ i) rk[SA[i]] = cmp(SA[i], SA[i - 1], w) ? p : ++ p;
    }
    int j, k = 0;
    for(int i = 1; i <= n; h[rk[i ++]] = k)
        for(k = k ? k - 1 : k, j = SA[rk[i] - 1]; a[i + k] == a[j + k]; ++ k);
    for(int i = 1; i <= n; ++ i) rmq[i][0] = h[i];
    for(int j = 1; (1 << j) <= n; ++ j)
        for(int i = 1; i + (1 << j) - 1 <= n; ++ i)
            rmq[i][j] = Min(rmq[i][j - 1], rmq[i + (1 << j - 1)][j - 1]);
}

int lcp(const int x, const int y) {
    int l = rk[x], r = rk[y];
    if(l > r) Swap(l, r);
    int t = lg[r - l];
    return Min(rmq[l + 1][t], rmq[r - (1 << t) + 1][t]);
}

int main() {
    char ch[N];
    int l1, l2, ans;
    for(int i = 2; i <= N - 1; ++ i) lg[i] = lg[i >> 1] + 1;
    T = read();
    while(T --) {
        ans = 0;
        scanf("%s", ch); l1 = strlen(ch);
        for(int i = 1; i <= l1; ++ i) a[i] = ch[i - 1];
        scanf("%s", ch); l2 = strlen(ch);
        a[l1 + 1] = '$';
        for(int i = l1 + 2; i <= l1 + l2 + 1; ++ i) a[i] = ch[i - l1 - 2];
        n = l1 + l2 + 1;
        Suffix();
        for(int i = 1; i <= l1 - l2 + 1; ++ i) {
            int j = 0;
            for(int k = 0; j <= l2 && k <= 3; ++ j, ++ k) j += lcp(i + j, l1 + j + 2);
            if(j > l2) ++ ans;
        }
        printf("%d\n", ans);
    }
    return 0;
}

\(\text{Sol 2.}\) \(\mathtt{FFT}\)

#include <cstdio>
#define print(x,y) write(x),putchar(y)

template <class T>
inline T read(const T sample) {
    T x=0; char s; bool f=0;
    while((s=getchar())>'9' || s<'0')
        f |= (s=='-');
    while(s>='0' && s<='9')
        x = (x<<1)+(x<<3)+(s^48),
        s = getchar();
    return f?-x:x;
}

template <class T>
inline void write(T x) {
    static int writ[50],w_tp=0;
    if(x<0) putchar('-'),x=-x;
    do writ[++w_tp]=x-x/10*10,x/=10; while(x);
    while(putchar(writ[w_tp--]^48),w_tp);
}

#include <cmath>
#include <cstring>
#include <iostream>
using namespace std;

const int maxn = 4e5+5;
const double PI = acos(-1.0);

int n,m,lim,bit,mp[200];
int ans[maxn],rev[maxn];
char s[maxn],t[maxn];
struct cp {
    double x,y;
    cp() {}
    cp(const double X,const double Y):x(X),y(Y) {}

    cp operator + (const cp& t) const {
        return cp(x+t.x,y+t.y);
    }
    cp operator - (const cp& t) const {
        return cp(x-t.x,y-t.y);
    }
    cp operator * (const cp& t) const {
        return cp(x*t.x-y*t.y,x*t.y+y*t.x);
    }

} f[maxn],g[maxn],w,wn,tmp;

void preWork() {
    lim=1, bit=0; 
    while(lim<n+m-1) lim<<=1, ++bit;
    for(int i=0;i<lim;++i)
        rev[i] = (rev[i>>1]>>1)|((i&1)<<bit-1);
}

void FFT(cp *f,int opt=1) {
    for(int i=0;i<lim;++i)
        if(i<rev[i]) swap(f[i],f[rev[i]]);
    for(int mid=1;mid<lim;mid<<=1) {
        wn = cp(cos(PI/mid),sin(PI/mid)*opt);
        for(int i=0;i<lim;i+=(mid<<1)) {
            w = cp(1,0);
            for(int j=0;j<mid;++j,w=w*wn) {
                tmp = f[i|j|mid]*w;
                f[i|j|mid] = f[i|j]-tmp;
                f[i|j] = f[i|j]+tmp;
            }
        }
    }
}

int main() {
    mp['A']=0, mp['G']=1;
    mp['C']=2, mp['T']=3;
    for(int T=read(9); T; --T) {
        scanf("%s %s",s,t);
        n=strlen(s), m=strlen(t);
        preWork();
        memset(ans,0,sizeof(int)*n);
        for(int c=0;c<4;++c) {
            for(int i=0;i<n;++i)
                f[i].x = (mp[s[i]]==c),
                f[i].y = 0;
            for(int i=n;i<lim;++i) 
                f[i].x=f[i].y=0;
            for(int i=0;i<m;++i)
                g[i].x = (mp[t[m-i-1]]==c),
                g[i].y = 0;
            for(int i=m;i<lim;++i) 
                g[i].x=g[i].y=0;
            FFT(f), FFT(g);
            for(int i=0;i<lim;++i)
                f[i] = f[i]*g[i];
            FFT(f,-1);
            for(int i=m-1;i<n;++i)
                ans[i] += int(f[i].x/lim+0.5);
        }
        int ret=0;
        for(int i=m-1;i<n;++i)
            ret += (ans[i]>=m-3);
        print(ret,'\n');
    }
    return 0;
}
原文地址:https://www.cnblogs.com/AWhiteWall/p/12390859.html