hdu 2222 Keywords Search

http://acm.hdu.edu.cn/showproblem.php?pid=2222

  毫无遮掩的AC自动机一枚!刚开始的时候不知道这题是用AC自动机做的,于是我就用了后缀数组,大概算了一下,貌似是勉强可以通过少数的大数据的。不过实际情况是超时了。。。。。

这是后缀数组的代码:

Suffix Array
  1 /*
  2  * Author: Lyon
  3  * Problem: hdu 2222
  4  * Method: Suffix Array
  5  */
  6 #include <cstdio>
  7 #include <cstring>
  8 #include <algorithm>
  9 
 10 #define FOR(i, a, n) for (int i = a; i < n; i++)
 11 #define REP(i, n) for (int i = 0; i < n; i++)
 12 
 13 using namespace std;
 14 
 15 const int maxn = 1000001;
 16 int wa[maxn], wb[maxn], wv[maxn], wc[maxn];
 17 
 18 int cmp(int *r, int a, int b, int l){
 19     return r[a] == r[b] && r[a + l] == r[b + l];
 20 }
 21 
 22 void work(int *r, int *sa, int n, int m){
 23     int *x = wa, *y = wb;
 24 
 25     REP(i, m) wc[i] = 0;
 26     REP(i, n) wc[x[i] = r[i]]++;
 27     FOR(i, 1, m) wc[i] += wc[i - 1];
 28     for (int i = n - 1; i >= 0; i--){
 29         sa[--wc[x[i]]] = i;
 30     }
 31     for (int j = 1, p = 1; p < n; j *= 2, m = p){
 32         p = 0;
 33         for (int i = n - j; i < n; i++){
 34             y[p++] = i;
 35         }
 36         REP(i, n){
 37             if (sa[i] >= j) y[p++] = sa[i] - j;
 38         }
 39         REP(i, n){
 40             wv[i] = x[y[i]];
 41         }
 42         REP(i, m){
 43             wc[i] = 0;
 44         }
 45         REP(i, n){
 46             wc[wv[i]]++;
 47         }
 48         FOR(i, 1, m){
 49             wc[i] += wc[i - 1];
 50         }
 51         for (int i = n - 1; i >= 0; i--){
 52             sa[--wc[wv[i]]] = y[i];
 53         }
 54         swap(x, y);
 55         p = 1;
 56         x[sa[0]] = 0;
 57         for (int i = 1; i < n; i++){
 58             x[sa[i]] = cmp(y, sa[i - 1], sa[i], j) ? p - 1 : p++;
 59         }
 60     }
 61 
 62     return ;
 63 }
 64 
 65 int rk[maxn], ht[maxn];
 66 
 67 void saCal(int *r, int *sa, int n){
 68     int k = 0;
 69 
 70     for (int i = 1; i <= n; i++){
 71         rk[sa[i]] = i;
 72     }
 73     for (int i = 0, j; i < n; ht[rk[i++]] = k){
 74         for (k ? k-- : 0, j = sa[rk[i] - 1]; r[i + k] == r[j + k]; k++);
 75     }
 76 
 77     return ;
 78 }
 79 
 80 char s[maxn];
 81 int r[maxn], sa[maxn];
 82 char sub[10001][55];
 83 
 84 int judge(char *str, char *sb){
 85     int i = 0;
 86 
 87     while (*(str + i) && *(sb + i)){
 88         if (*(str + i) != *(sb + i)){
 89             break;
 90         }
 91         i++;
 92     }
 93     if (!(*(sb + i))) return 0; // sub is fully paired
 94     return *(str + i) - *(sb + i);
 95 }
 96 
 97 bool bs(int n, char *sb){
 98     int h = 0, t = n - 1;
 99     int m;
100 
101     while (h <= t){
102         m = (h + t) >> 1;
103 
104         int dif = judge(&s[sa[m]], sb);
105 
106         if (!dif) return true;
107         if (dif < 0) h = m + 1;
108         else t = m - 1;
109     }
110 
111     return false;
112 }
113 
114 int deal(int n){
115     int ret = 0;
116     int len = strlen(s);
117 
118     work(r, sa, len, 128);
119 /*
120     for (int i = 0; i < len; i++){
121         printf("String %d:\n", i);
122         printf("%s\n", &s[sa[i]]);
123         printf("sa %d\n", sa[i]);
124         puts("");
125     }
126 */
127     for (int i = 0; i < n; i++){
128         if (bs(len, sub[i])){
129             ret++;
130             //printf("%s\n", sub[i]);
131         }
132     }
133 
134     return ret;
135 }
136 
137 int main(){
138     int n, T;
139 
140     scanf("%d", &T);
141     while (T-- && ~scanf("%d", &n)){
142         for (int i = 0; i < n; i++){
143             scanf("%s", sub[i]);
144         }
145         scanf("%s", s);
146 
147         int len = strlen(s);
148 
149         r[len] = 0;
150         while (len--){
151             r[len] = s[len];
152         }
153         printf("%d\n", deal(n));
154     }
155 
156     return 0;
157 }

  然后我就突然想起了一直被我忽略的AC自动机算法,我在网上找了一下相关的资料,原来AC自动机是可以通过状态转移,在O(n)的复杂度下同时匹配多个较小规模的子串的。我粗略的看了一下它的原理,联系上上学期学过的数字逻辑的课,电路状态的转移和AC自动机的转移可谓是同出一辙!两者都是通过构建自动机,从而判断串行数据的是否满足给定的条件。有了数字逻辑课的基础,看这个就觉得相当简单了。

  简单讲一下步骤,在子串输入的同时,构建tri树。输入完毕以后,用build函数,将结点fail的状态转移关系构建出来。在构建的过程中,如果学过KMP算法,很快就可以看出这个过程跟KMP是十分相似的,因为两者的原理都是自动机,也就是状态的转移。然后就是query的时候了,不断的输入被查找的字符串的字母,然后通过之前构建好的自动机,不断的判断并转移到下一个状态。不过根据题意,这里要注意将输出过的状态设置为不可再次访问。

下面的是AC自动机的代码(动态空间):

View Code
  1 /*
  2  * Author: SCAU_Lyon
  3  * Problem: hdu 2222
  4  * Method: AC-auto
  5  */
  6 
  7 #include <cstdio>
  8 #include <cstring>
  9 #include <algorithm>
 10 #include <cstdlib>
 11 
 12 using namespace std;
 13 
 14 const int kind = 26;
 15 const int maxn = 51 * 10001;
 16 
 17 struct node{
 18     node *fail;
 19     node *next[kind];
 20     int count;
 21     node(){
 22         fail = NULL;
 23         count = 0;
 24         memset(next, 0, sizeof(next));
 25     }
 26 }*q[maxn];
 27 
 28 char keyword[51];
 29 char str[1000001];
 30 int head, tail;
 31 
 32 void insert(char *s, node *root){
 33     node *p = root;
 34     int i = 0, index;
 35 
 36     while (s[i]){
 37         index = s[i] - 'a';
 38         if (p->next[index] == NULL) p->next[index] = new node();
 39         p = p->next[index];
 40         i++;
 41     }
 42     p->count++;
 43 }
 44 
 45 void build(node *root){
 46     int i;
 47 
 48     root->fail = NULL;
 49     head = tail = 0;
 50     q[head++] = root;
 51     while (head != tail){
 52         node *temp = q[tail++];
 53         node *p = NULL;
 54 
 55         for (i = 0; i < kind; i++){
 56             if (temp->next[i] != NULL){
 57                 if (temp == root) temp->next[i]->fail = root;
 58                 else{
 59                     p = temp->fail;
 60                     while (p != NULL){
 61                         if (p->next[i] != NULL){
 62                             temp->next[i]->fail = p->next[i];
 63                             break;
 64                         }
 65                         p = p->fail;
 66                     }
 67                     if (p == NULL) temp->next[i]->fail = root;
 68                 }
 69                 q[head++] = temp->next[i];
 70             }
 71         }
 72     }
 73 }
 74 
 75 int query(node *root){
 76     int i = 0, cnt = 0, index;
 77     node *p = root;
 78 
 79     while (str[i]){
 80         index = str[i] - 'a';
 81         while (p->next[index] == NULL && p != root) p = p->fail;
 82         p = p->next[index];
 83         p = (p == NULL) ? root : p;
 84 
 85         node *temp = p;
 86 
 87         while (temp != root && temp->count != -1){
 88             cnt += temp->count;
 89             temp->count = -1;
 90             temp = temp->fail;
 91         }
 92         i++;
 93     }
 94 
 95     return cnt;
 96 }
 97 
 98 void traverse(node *root){
 99     if (root == NULL) return ;
100     printf("root %d\n", root);
101     printf("fail %d\n", root->fail);
102     printf("count %d\n", root->count);
103     for (int i = 0; i < kind; i++){
104         if (root->next[i] == NULL) continue;
105         printf("%c %d\n", i + 'a', root->next[i]);
106     }
107     puts("");
108     for (int i = 0; i < kind; i++){
109         traverse(root->next[i]);
110     }
111 }
112 
113 int main(){
114     int n, T;
115 
116     scanf("%d", &T);
117     while (T--){
118         scanf("%d", &n);
119 
120         node *root = new node();
121 
122         for(int i = 0; i < n; i++){
123             scanf("%s", keyword);
124             insert(keyword, root);
125         }
126         build(root);
127 //traverse(root);
128 //puts("Built!");
129         scanf("%s", str);
130         printf("%d\n", query(root));
131     }
132 
133     return 0;
134 }

静态链表:

View Code
  1 #include <cstdio>
  2 #include <cstring>
  3 #include <algorithm>
  4 #include <cstdlib>
  5 
  6 using namespace std;
  7 
  8 const int kind = 26;
  9 const int maxn = 51 * 10001;
 10 
 11 struct Node{
 12     int fail;
 13     int next[kind];
 14     int count;
 15     void init(){
 16         fail = -1;
 17         count = 0;
 18         memset(next, -1, sizeof(next));
 19     }
 20 }node[maxn];
 21 
 22 char keyword[51];
 23 char str[1000001];
 24 int head, tail, nodeUsed;
 25 int q[maxn];
 26 
 27 void insert(char *s, int root){
 28     int p = root;
 29     int i = 0, index;
 30 
 31     while (s[i]){
 32         index = s[i] - 'a';
 33         if (node[p].next[index] == -1){
 34             node[p].next[index] = nodeUsed;
 35             node[nodeUsed].init();
 36             nodeUsed++;
 37         }
 38         p = node[p].next[index];
 39         i++;
 40     }
 41     node[p].count++;
 42 }
 43 
 44 void build(int root){
 45     int i;
 46 
 47     node[root].fail = -1;
 48     head = tail = 0;
 49     q[head++] = root;
 50     while (head != tail){
 51         int temp = q[tail++];
 52         int p = -1;
 53 
 54         for (i = 0; i < kind; i++){
 55             if (node[temp].next[i] != -1){
 56                 if (temp == root) node[node[temp].next[i]].fail = root;
 57                 else{
 58                     p = node[temp].fail;
 59                     while (p != -1){
 60                         if (node[p].next[i] != -1){
 61                             node[node[temp].next[i]].fail = node[p].next[i];
 62                             break;
 63                         }
 64                         p = node[p].fail;
 65                     }
 66                     if (p == -1) node[node[temp].next[i]].fail = root;
 67                 }
 68                 q[head++] = node[temp].next[i];
 69             }
 70         }
 71     }
 72 }
 73 
 74 int query(int root){
 75     int i = 0, cnt = 0, index;
 76     int p = root;
 77 
 78     while (str[i]){
 79         index = str[i] - 'a';
 80         while (node[p].next[index] == -1 && p != root) p = node[p].fail;
 81         p = node[p].next[index];
 82         p = (p == -1) ? root : p;
 83 
 84         int temp = p;
 85 
 86         while (temp != root && node[temp].count != -1){
 87             cnt += node[temp].count;
 88             node[temp].count = -1;
 89             temp = node[temp].fail;
 90         }
 91         i++;
 92     }
 93 
 94     return cnt;
 95 }
 96 
 97 void traverse(int root){
 98     if (root == -1) return ;
 99     printf("root %d\n", root);
100     printf("fail %d\n", node[root].fail);
101     printf("count %d\n", node[root].count);
102     for (int i = 0; i < kind; i++){
103         if (node[root].next[i] == -1) continue;
104         printf("%c %d\n", i + 'a', node[root].next[i]);
105     }
106     puts("");
107     for (int i = 0; i < kind; i++){
108         traverse(node[root].next[i]);
109     }
110 }
111 
112 int main(){
113     int n, T;
114 
115     scanf("%d", &T);
116     while (T--){
117         scanf("%d", &n);
118 
119         int root = 0;
120 
121         node[0].init();
122         nodeUsed = 1;
123 
124         for(int i = 0; i < n; i++){
125             scanf("%s", keyword);
126             insert(keyword, root);
127         }
128         build(root);
129 //traverse(root);
130 //puts("Built!");
131         scanf("%s", str);
132         printf("%d\n", query(root));
133     }
134 
135     return 0;
136 }

UPD(2013.8.24):

  1 #include <iostream>
  2 #include <algorithm>
  3 #include <cstring>
  4 #include <cstdio>
  5 
  6 using namespace std;
  7 
  8 const int N = 11111;
  9 const int M = 55;
 10 const int K = 26;
 11 const int L = 1111111;
 12 
 13 struct Node {
 14     Node *nx[K], *fail;
 15     int cnt;
 16     bool pre;
 17     void init() {
 18         for (int i = 0; i < K; i++) nx[i] = NULL;
 19         fail = NULL;
 20         cnt = 0;
 21         pre = false;
 22     }
 23 } node[N * M];
 24 int ttnode;
 25 
 26 struct Trie {
 27     Node *RT;
 28     void init() {
 29         RT = node + ttnode, node[ttnode++].init();
 30     }
 31     void insert(char *s) {
 32         Node *p = RT;
 33         int idx;
 34         while (*s) {
 35             idx = *s - 'a';
 36             if (p->nx[idx] == NULL) {
 37                 p->nx[idx] = node + ttnode, node[ttnode++].init();
 38             }
 39             p = p->nx[idx], s++;
 40         }
 41         p->cnt++, p->pre = true;
 42     }
 43     Node *q[N * M];
 44     void build() {
 45         RT->fail = RT;
 46         int qh, qt;
 47         qh = qt = 0;
 48         for (int i = 0; i < K; i++) {
 49             if (RT->nx[i]) {
 50                 RT->nx[i]->fail = RT;
 51                 q[qt++] = RT->nx[i];
 52             }
 53         }
 54         //cout << "pass" << endl;
 55         while (qh < qt) {
 56             Node *p = q[qh++];
 57             for (int i = 0; i < K; i++) {
 58                 if (p->nx[i]) {
 59                     Node *t = p->fail;
 60                     while (t->nx[i] == NULL && t != RT) t = t->fail;
 61                     t = t->nx[i];
 62                     if (t) p->nx[i]->fail = t, p->nx[i]->pre |= t->pre;
 63                     else p->nx[i]->fail = RT;
 64                     q[qt++] = p->nx[i];
 65                 }
 66             }
 67         }
 68     }
 69     int query(char *s) {
 70         int cnt = 0, idx;
 71         Node *p = RT;
 72         while (*s) {
 73             idx = *s - 'a';
 74             while (p->nx[idx] == NULL && p != RT) p = p->fail;
 75             p = p->nx[idx];
 76             if (p) {
 77                 Node *t = p;
 78                 while (t != RT && t->pre) cnt += t->cnt, t->pre = false, t = t->fail;
 79             } else p = RT;
 80             //cout << *s << ' ' << cnt << endl;
 81             s++;
 82         }
 83         return cnt;
 84     }
 85 } trie;
 86 
 87 char str[L], buf[M];
 88 
 89 int main() {
 90     //freopen("in", "r", stdin);
 91     int T, n;
 92     scanf("%d", &T);
 93     while (T--) {
 94         scanf("%d", &n);
 95         ttnode = 0;
 96         trie.init();
 97         for (int i = 0; i < n; i++) {
 98             scanf("%s", buf);
 99             trie.insert(buf);
100             //cout << buf << endl;
101         }
102         trie.build();
103         //cout << "built" << endl;
104         scanf("%s", str);
105         printf("%d\n", trie.query(str));
106     }
107     return 0;
108 }
View Code

详细解释: http://www.cppblog.com/mythit/archive/2009/04/21/80633.html

——written by Lyon

原文地址:https://www.cnblogs.com/LyonLys/p/hdu_2222_Lyon.html