POJ2278 DNA Sequence —— AC自动机 + 矩阵优化

题目链接:https://vjudge.net/problem/POJ-2778

DNA Sequence
Time Limit: 1000MS   Memory Limit: 65536K
Total Submissions: 18479   Accepted: 7112

Description

It's well known that DNA Sequence is a sequence only contains A, C, T and G, and it's very useful to analyze a segment of DNA Sequence,For example, if a animal's DNA sequence contains segment ATC then it may mean that the animal may have a genetic disease. Until now scientists have found several those segments, the problem is how many kinds of DNA sequences of a species don't contain those segments. 

Suppose that DNA sequences of a species is a sequence that consist of A, C, T and G,and the length of sequences is a given integer n. 

Input

First line contains two integer m (0 <= m <= 10), n (1 <= n <=2000000000). Here, m is the number of genetic disease segment, and n is the length of sequences. 

Next m lines each line contain a DNA genetic disease segment, and length of these segments is not larger than 10. 

Output

An integer, the number of DNA sequences, mod 100000.

Sample Input

4 3
AT
AC
AG
AA

Sample Output

36

Source

题意:

给出m个DNA序列,问长度为n且不含这m个序列的DNA有多少个?

题解:

1.把这m个序列插入到AC自动机中。

2.根据自动机中各个状态之间的关系,构成一张邻接矩阵A,但需要去除与“结束点”有关的边,这样就能保证不含有给出的序列。

3.长度为n,那么答案就是 A^n 中,初始状态那一行之和。

代码如下:

  1 #include <iostream>
  2 #include <cstdio>
  3 #include <cstring>
  4 #include <algorithm>
  5 #include <vector>
  6 #include <cmath>
  7 #include <queue>
  8 #include <stack>
  9 #include <map>
 10 #include <string>
 11 #include <set>
 12 using namespace std;
 13 typedef long long LL;
 14 const double EPS = 1e-6;
 15 const int INF = 2e9;
 16 const LL LNF = 9e18;
 17 const int MOD = 1e5;
 18 const int MAXN = 110+10;
 19 
 20 int Size;
 21 int Map[128];
 22 struct MA
 23 {
 24     int mat[110][110];
 25     void init()
 26     {
 27         for(int i = 0; i<Size; i++)
 28         for(int j = 0; j<Size; j++)
 29             mat[i][j] = (i==j);
 30     }
 31 };
 32 
 33 MA operator*(const MA &x, const MA &y)
 34 {
 35     MA ret;
 36     memset(ret.mat, 0, sizeof(ret.mat));
 37     for(int i = 0; i<Size; i++)
 38     for(int j = 0; j<Size; j++)
 39     for(int k = 0; k<Size; k++)
 40         ret.mat[i][j] += (1LL*x.mat[i][k]*y.mat[k][j])%MOD, ret.mat[i][j] %= MOD;
 41     return ret;
 42 }
 43 
 44 MA qpow(MA x, int y)
 45 {
 46     MA s;
 47     s.init();
 48     while(y)
 49     {
 50         if(y&1) s = s*x;
 51         x = x*x;
 52         y >>= 1;
 53     }
 54     return s;
 55 }
 56 
 57 struct Trie
 58 {
 59     const static int sz = 4, base = 'A';
 60     int next[MAXN][sz], fail[MAXN], end[MAXN];
 61     int root, L;
 62     int newnode()
 63     {
 64         for(int i = 0; i<sz; i++)
 65             next[L][i] = -1;
 66         end[L++] = false;
 67         return L-1;
 68     }
 69     void init()
 70     {
 71         L = 0;
 72         root = newnode();
 73     }
 74     void insert(char buf[])
 75     {
 76         int len = strlen(buf);
 77         int now = root;
 78         for(int i = 0; i<len; i++)
 79         {
 80             if(next[now][Map[buf[i]]] == -1) next[now][Map[buf[i]]] = newnode();
 81             now = next[now][Map[buf[i]]];
 82         }
 83         end[now] = true;
 84     }
 85     void build()
 86     {
 87         queue<int>Q;
 88         fail[root] = root;
 89         for(int i = 0; i<sz; i++)
 90         {
 91             if(next[root][i] == -1) next[root][i] = root;
 92             else fail[next[root][i]] = root, Q.push(next[root][i]);
 93         }
 94         while(!Q.empty())
 95         {
 96             int now = Q.front();
 97             Q.pop();
 98             end[now] |= end[fail[now]]; //当前串的后缀是否也包含单词
 99             for(int i = 0; i<sz; i++)
100             {
101                 if(next[now][i] == -1) next[now][i] = next[fail[now]][i];
102                 else fail[next[now][i]] = next[fail[now]][i], Q.push(next[now][i]);
103             }
104         }
105     }
106 
107     int query(int n)
108     {
109         MA s;
110         memset(s.mat, 0, sizeof(s.mat));
111         for(int i = 0; i<L; i++)
112         {
113             if(end[i]) continue;    //存在单词的状态没有后继
114             for(int j = 0; j<sz; j++)
115             {
116                 if(end[next[i][j]]) continue;   //存在单词的状态没有前驱
117                 s.mat[i][next[i][j]]++; // i到next[i][j]的路径数+1。注意,当next[i][j]==root时,路径数很可能大于1。
118             }
119         }
120 
121         int ret = 0;
122         Size = L;
123         s = qpow(s, n);
124         for(int i = 0; i<L; i++)    //答案为:初始状态到各个状态(包括初始状态)的路径数之和。
125             ret = (ret+s.mat[0][i])%MOD;
126         return ret;
127     }
128 };
129 
130 Trie ac;
131 char buf[20];
132 int main()
133 {
134     Map['A'] = 0; Map['C'] = 1; Map['G'] = 2; Map['T'] = 3; //离散化
135     int n, m;
136     while(scanf("%d%d", &m,&n)!=EOF)
137     {
138         ac.init();
139         for(int i = 1; i<=m; i++)
140         {
141             scanf("%s", buf);
142             ac.insert(buf);
143         }
144         ac.build();
145         int ans = ac.query(n);
146         printf("%d
", ans);
147     }
148     return 0;
149 }
View Code
原文地址:https://www.cnblogs.com/DOLFAMINGO/p/8455234.html