HDU 2222 Keywords Search (AC自动机)

题目链接

Problem Description
In the modern time, Search engine came into the life of everybody like Google, Baidu, etc.
Wiskey also wants to bring this feature to his image retrieval system.
Every image have a long description, when users type some keywords to find the image, the system will match the keywords with description of image and show the image which the most keywords be matched.
To simplify the problem, giving you a description of image, and some keywords, you should tell me how many keywords will be match.

Input
First line will contain one integer means how many cases will follow by.
Each case will contain two integers N means the number of keywords and N keywords follow. (N <= 10000)
Each keyword will only contains characters 'a'-'z', and the length will be not longer than 50.
The last line is the description, and the length will be not longer than 1000000.

Output
Print how many keywords are contained in the description.

Sample Input
1
5
she
he
say
shr
her
yasherhs

Sample Output
3

分析:
给出n个单词和一个文本串,问在文本串中有多少个单词出现过。

简单的字典树入门的模板题。

代码:

#include<stdio.h>
#include<iostream>
using namespace std;
const int N = 10010;
char str[1000005];
struct node
{
    node *next[26];
    node *fail;
    int count;
    node()
    {
        for(int i = 0; i < 26; i++)
            next[i] = NULL;
        count = 0;
        fail = NULL;
    }
}*q[50*N];
node *root;
int head, tail;

void Insert(char *str)//建立字典树
{
    node *p = root;
    int i = 0, index;
    while(str[i])//没有寻找到最后的时候
    {
        index = str[i] - 'a';//字母转数字
        if(p->next[index] == NULL)//没有这个节点的话构建新的节点
            p->next[index] = new node();
        p = p->next[index];//不管节点是新建立的,还是原来就存在的,指针都要往下移
        i++;
    }
    p->count++;//标记这个当前以这个结束的单词有几个
}
void build_ac_automation(node *root)//构建fail指针
{
    root->fail = NULL;//根节点fail指针为空,也就是自己
    q[tail++] = root;
    while(head < tail)//寻找fail指针的过程相当于是一个广搜的过程
    {
        node *temp = q[head++];//temp临时表示当前取出的这个节点
        node *p = NULL;
        for(int i = 0; i < 26; i++)
        {
            if(temp->next[i] != NULL)//temp有子节点的话
            {
                if(temp == root) temp->next[i]->fail = root;//如果temp为根节点,那么它所有的子节点的fail指针都是指向它本身
                else
                {
                    p = temp->fail;//如果不是根节点的话,那么就寻找他的fail指针
                    while(p != NULL)//当fail指着不是根节点的情况下
                    {
                        if(p->next[i] != NULL)//并且对应的相同的子节点也存在
                        {
                            temp->next[i]->fail = p->next[i];//那么这个节点就是子节点的fail指针
                            break;
                        }
                        p = p->fail;//不满足的就接着往上找,直到找到满足条件的
                    }
                    if(p == NULL) temp->next[i]->fail = root;//或则找不到满足条件的,那么fail指针就是根节点
                }
                q[tail++] = temp->next[i];
            }
        }
    }
}
int Query(node *root)//查找
{
    int i = 0, cnt = 0, index;
    node *p = root;
    while(str[i])
    {
        index = str[i] - 'a';//字母转数字
        while(p->next[index] == NULL && p != root) p = p->fail;//一直找到fail指针
        p = p->next[index];//指向当前的节点
        if(p == NULL) p = root;
        node *temp = p;
        while(temp != root && temp->count != -1)//单词没有访问过
        {
            cnt += temp->count;//找到最后的话就加上单词次数
            temp->count = -1;//找过就标记
            temp = temp->fail;
        }
        i++;
    }
    return cnt;
}

int main()
{
    int T, n;
    scanf("%d",&T);
    while(T--)
    {
        head = tail = 0;//数组模拟队列,表示头和尾
        root = new node();
        scanf("%d",&n);
        while(n--)
        {
            scanf("%s", str);
            Insert(str);
        }
        build_ac_automation(root);
        scanf("%s",str);
        printf("%d
", Query(root));
    }
    return 0;
}
原文地址:https://www.cnblogs.com/cmmdc/p/7898374.html