hdu 1277 全文检索 (直接映射查找 || 自动机)

Problem - 1277

  无聊做水题的时候发现的一道题目。这道题第一反应可以用自动机来解决。当然,条件是各种限制,从而导致可以用直接映射标记的方法来搜索。具体的做法就像RK算法一样,将字符串hash成一个数,这里每一个关键字前四位都是不同的,这样就有利于hash搜索了。当前四位匹配的时候,就可以搜索整个串是否完全匹配。这整个的复杂度大概是O(n*m),n是全文测长度,m是关键字的长度。

自动机代码:

  1 #include <cstdio>
  2 #include <iostream>
  3 #include <algorithm>
  4 #include <cstring>
  5 #include <string>
  6 #include <queue>
  7 #include <set>
  8 
  9 using namespace std;
 10 
 11 const int kind = 10;
 12 struct Node {
 13     Node *next[kind];
 14     Node *fail;
 15     string label;
 16     Node() {
 17         memset(next, 0, sizeof(next));
 18         fail = NULL;
 19         label = "";
 20     }
 21     Node(char *tmp) {
 22         memset(next, 0, sizeof(next));
 23         fail = NULL;
 24         label = tmp;
 25     }
 26 } ;
 27 
 28 struct Trie {
 29     Node *Root;
 30     Trie() { Root = new Node();}
 31 
 32     void insert(char *s, char *label) {
 33         Node *p = Root;
 34         while (*s) {
 35             int idx = *s - '0';
 36             if (!p->next[idx]) {
 37                 p->next[idx] = new Node();
 38                 p->next[idx]->fail = p;
 39             }
 40             p = p->next[idx];
 41             s++;
 42         }
 43         p->label = label;
 44     }
 45     void build() {
 46         queue<Node *> Q;
 47         while (!Q.empty()) Q.pop();
 48         Q.push(Root);
 49         Root->fail = NULL;
 50         Node *cur, *p;
 51         while (!Q.empty()) {
 52             cur = Q.front();
 53             Q.pop();
 54             for (int i = 0; i < kind; i++) {
 55 //                cout << cur->next[i] << ' ';
 56                 if (cur->next[i]) {
 57 //                    cout << i << ' ';
 58                     Q.push(cur->next[i]);
 59                     p = cur->fail;
 60                     while (p) {
 61                         if (p->next[i]) {
 62                             cur->next[i]->fail = p->next[i];
 63                             break;
 64                         }
 65                         p = p->fail;
 66                     }
 67                     if (!p) cur->next[i]->fail = Root;
 68                 }
 69             }
 70         }
 71     }
 72     void query(char *s) {
 73         set<string> vis;
 74         bool pnt = false;
 75         Node *cur = Root;
 76         while (*s) {
 77             int idx = *(s++) - '0';
 78             while (cur && !cur->next[idx]) cur = cur->fail;
 79             cur = cur ? cur->next[idx] : Root;
 80             if (vis.find(cur->label) != vis.end()) continue;
 81             if (cur->label.length()) {
 82                 if (!pnt) {
 83                     printf("Found key:");
 84                     pnt = true;
 85                 }
 86                 cout << ' ' << cur->label;
 87                 vis.insert(cur->label);
 88             }
 89         }
 90         if (!pnt) puts("No key can be found !");
 91         cout << endl;
 92     }
 93 } trie;
 94 
 95 char str[66666], buf[111];
 96 
 97 int main() {
 98 //    freopen("in", "r", stdin);
 99     int n, m;
100     while (cin >> n >> m) {
101         trie = Trie();
102         gets(buf);
103         str[0] = 0;
104         while (n-- && gets(buf)) strcat(str, buf);
105         gets(buf);
106         while (m-- && gets(buf)) {
107             char *p = buf;
108             while (*(p++) != ']') ;
109             *(p++) = 0;
110             while (*p == ' ') p++;
111             trie.insert(p, buf);
112 //            cout << p << ' ' << buf << " inserted!"<< endl;
113         }
114         trie.build();
115 //        cout << "built!!" << endl;
116         trie.query(str);
117     }
118     return 0;
119 }
View Code

UPD:

 1 #include <cstdio>
 2 #include <iostream>
 3 #include <algorithm>
 4 #include <cstring>
 5 #include <string>
 6 
 7 using namespace std;
 8 
 9 const int HASHLEN = 5;
10 const int M = 111111;
11 const int N = 11111;
12 string label[N], key[N];
13 bool vis[N];
14 int id[M];
15 char str[6 * N], buf[N];
16 
17 bool check(int t, char *p) {
18     int len = key[t].length();
19     for (int i = 0; i < len; i++) {
20         if (key[t][i] != p[i]) return false;
21     }
22     return true;
23 }
24 
25 void work(char *p) {
26     bool pnt = false;
27     char *q = p;
28     int cur = 0, ep = 10;
29     for (int i = 0; i < HASHLEN - 1; i++) cur = cur * 10 + *(q++) - '0', ep *= 10;
30     while (*q && *p) {
31 //        cout << cur << endl;
32         cur = cur * 10 + *q - '0';
33         cur %= ep;
34         int t = id[cur];
35         if (t != -1 && !vis[t]) {
36             if (check(t, p)) {
37                 if (!pnt) printf("Found key:");
38                 pnt = vis[t] = true;
39                 cout << ' ' << label[t];
40             }
41         }
42         p++, q++;
43     }
44     if (!pnt) printf("No key can be found !");
45     cout << endl;
46 }
47 
48 int main() {
49 //    freopen("in", "r", stdin);
50     int n, m;
51     while (cin >> n >> m) {
52         memset(id, -1, sizeof(id));
53         memset(vis, 0, sizeof(vis));
54         gets(buf);
55         str[0] = 0;
56         while (n-- && gets(buf)) strcat(str, buf);
57         gets(buf);
58         for (int i = 0; i < m; i++) {
59             gets(buf);
60             char *p = buf;
61             while (*p && *p != ']') p++;
62             *(++p) = 0;
63             p++;
64             while (*p && *p == ' ') p++;
65             label[i] = buf;
66             key[i] = p;
67 //            cout << label[i] << ' ' << key[i] << endl;
68             int cid = 0;
69             for (int i = 0; i < HASHLEN; i++) cid = cid * 10 + p[i] - '0';
70 //            cout << cid << endl;
71             if (id[cid] != -1) { puts("shit!"); while (1) ;}
72             id[cid] = i;
73         }
74         work(str);
75     }
76     return 0;
77 }
View Code

  不知道为什么这个代码能过G++但是C++过不了,应该是代码里面有bug。另外,数据似乎跟描述的有点出入,说好的不同key值的前四位不会相同,可是写了句来试它的数据时就发现被骗了,所以后来用了5位,过了!

——written by Lyon

原文地址:https://www.cnblogs.com/LyonLys/p/hdu_1277_Lyon.html