AC自动机学习笔记

建议先学习 KMP, 阅读本篇前请至少先阅读 KMP学习笔记 的前言


\[Upd \ in \ 2019.10.10 \]

增加一些新的理解,顺便把博客搬过来。

AC 自动机的 nxt 与 KMP 的并不一样,前者是指向一个与自己等价的地方,而 KMP 则是指向 与前一个等价的地方的后一个 ,所以才是失配后去找的地方。

还有注意只有 $ ch $ 需要开满,其他的数组只用开到 数量 * 长度 即可。


#include <iostream>
#include <cstdio>
#include <cstring>
#include <queue>
#define reg register
#define ll long long
using namespace std;
template <typename _Typer>
inline _Typer read() {
	reg char ch = getchar();
	reg _Typer init = 0, base = 1;
	while ((ch < '0' || ch > '9') && ch != '-') ch = getchar();
	if (ch == '-') base = -1;
	else init = ch - '0';
	ch = getchar();
	while (ch >= '0' && ch <= '9')
		init = (init<<3) + (init<<1) + ch - '0', ch = getchar();
	return init * base;
}
const ll N = 1000005, INF = 1e9;
const int Sigma_Size = 26;
const char Cmod = 'a';

int ch[N][Sigma_Size];
int val[N], sz;
int ans;
struct Trie {
	Trie () { sz = 1; /*memset(ch, 0, sizeof(ch)); memset(val, 0, sizeof(val));*/}
	int idx(char c) { return c - Cmod;}
	void insert(char *s, int v) {
		int u = 0, len = strlen(s);
		for (reg int i = 0; i < len; ++i) {
			int c = idx(s[i]);
			if (!ch[u][c]) {
//				memset(ch[sz], 0, sizeof(ch[sz]));
				val[sz] = 0;
				ch[u][c] = sz;
				++sz;
			}
			u = ch[u][c];
		}
		val[u] += v;
	}
	int query(char *s) {
		int u = 0, len = strlen(s);
		for (reg int i = 0; i < len; ++i) {
			int c = idx(s[i]);
			if (!ch[u][c]) return -1;
			u = ch[u][c];
		}
		return val[u];
	}
}trie;

int nex[N*Sigma_Size], lst[N*Sigma_Size];
// nex[x]=y, only when char[x]=char[y] and 0...y=somewhere...x
inline void getfail() {
	queue <int> q;
	nex[0] = 0;
	for (reg int i = 0; i < Sigma_Size; ++i) {
		int u = ch[0][i];
		if (u) { nex[u] = 0; q.push(u); lst[u] = 0;}
	}
	while (!q.empty()) {
		int r = q.front(); q.pop();
		for (reg int c = 0; c < Sigma_Size; ++c) {
			int u = ch[r][c];
			// when r,i fail -> nex[r],i -> 0-nex[r]=x-r -> r=nex[r]
			// because haven't ch[r][c](u) -> r.c=nex[r].c (@1)
			if (!u) { ch[r][c] = ch[nex[r]][c]; continue;}
			q.push(u);
			int v = nex[r];
			// 1.ch[v][c]>0 -> now.c=v.c -> nex[u(ch[r][c])]=sz(v.c)=ch[v][c]
			// 2.v=0 -> no one same of now(r.c) -> now.c=0 -> nex[u]=0(ch[v][c])
			nex[u] = ch[v][c];
			// 1.nex[u].val>0 -> nex[u] is a word
			// 2.nex[u].val=0 -> nex[u] isn't a word ->
			//   1).lst[nex[u]].val>0 -> lst[nex[u]] is a word
			//   2).lst[nex[u]].val=0 -> it's not a word ->
			//		.
			//		.
			//		.
			// so, lst[x] mean the lastest word
			// (if lst[x] is 0, the reason is that no word is finish with x(lst[x])) (@2)
			lst[u] = val[nex[u]] ? nex[u] : lst[nex[u]];
		}
	}
}
inline void update(int j) {
	for ( ; j && val[j]; j = lst[j]) ans += val[j], val[j] = 0;
}
inline void find(char *T) {
	getfail();
	int j = 0, len = strlen(T);
	for (reg int i = 0; i < len; ++i) {
		int c = trie.idx(T[i]);
		// try to compare T[i](c),ch[j][c]
		// if j have son is c -> some word's P[j].next is c
		// else -> no word's P[j].next is c -> fail -> compare T[i],P[nex[j]]
		// because j haven't son is c, ch[j][c](first) is 0 -> @1 -> ch[j][c]=ch[nex[j][c]
		j = ch[j][c];
		// if val[j]>0 a word have compared successful -> update
		if (val[j]) update(j);
		// else may be when we compare in now, some word have compared successful
		// but in this node, the val is 0
		// and now, if lst[j]>0, it means some word is finish with j(ch[pre[j]][c(T[i])])
		// (because @2)
		else if (lst[j]) update(lst[j]);
	}
}

char Ts[N];
inline void scan();
inline int _main() {
	scan();
	find(Ts);
	printf("%d\n", ans);
	return 0;
}

int main() {
	#define offline1
	#ifdef offline
	freopen("Aho-Corasick.in", "r", stdin);
	freopen("Aho-Corasick.out", "w", stdout);
	_main();
	fclose(stdin); fclose(stdout);
	#else
	_main();
	#endif
	return 0;
}

inline void scan() {
	int n = read<int>();
	char Ps[N];
	for (reg int i = 1; i <= n; ++i) {
		scanf("%s", Ps);
		trie.insert(Ps, 1);
	}
	scanf("%s", Ts);
}

原文地址:https://www.cnblogs.com/Ameiyo/p/11648332.html