【算法竞赛入门经典—训练指南】学习笔记(含例题代码与思路)第三章:实用数据结构

值得注意的是,本章虽然依然有很多不错的思想和题目,但并不建议初学知识点时从这里入门。并不是因为题目难,而是讲解并没有看网上其他博客来的清楚。

本章缺少的重要科技:(Link-Cut-Tree),主席树,后缀自动机。


  • 基础数据结构:数组,链表,队列,栈,并查集,优先队列。

例题(1) 猜猜数据结构((UVa11995)

  • 直接用(STL)的轮子模拟。
  • 坑点:小心可能会对空的轮子执行(pop)导致(RE)
#include <bits/stdc++.h>
using namespace std;

stack <int> s;
queue <int> q;
priority_queue <int> pq;

int n, x;

int read () {
	int s = 0, w = 1, ch = getchar ();
	while ('9' < ch || ch < '0') {
		if (ch == '-') w = -1;
		ch = getchar ();
	}
	while ('0' <= ch && ch <= '9') {
		s = s * 10 + ch - '0';
		ch = getchar ();
	}
	return s * w;
}

int main () {
//	freopen ("data.in", "r", stdin);
	while (scanf ("%d", &n) == 1) {
		int can1 = 1, can2 = 1, can3 = 1;
		for (int i = 1; i <= n; ++i) {
			if (read () == 1) {
				x = read ();
				s.push (x);
				q.push (x);
				pq.push (x);
			} else {
				x = read ();
				if (s.empty ()) {
					while (i != n) {
						read (), read (), ++i;
					}
					can1 = can2 = can3 = 0;
					break;
				}
				can1 &= (s.top () == x); s.pop ();
				can2 &= (q.front () == x); q.pop ();
				can3 &= (pq.top () == x); pq.pop ();
			}
		}
		if (can1 + can2 + can3 > 1) {
			puts ("not sure");
		} else if (can1 + can2 + can3 == 0) {
			puts ("impossible");
		} else {
			if (can1) puts ("stack");
			if (can2) puts ("queue");
			if (can3) puts ("priority queue");
		}
		while (!s.empty ()) s.pop ();
		while (!q.empty ()) q.pop ();
		while (!pq.empty ()) pq.pop ();
	}
}

例题(2) 一道简单题((UVa11991)

  • 同样还是一个(STL)的轮子。直接用(vector)保存每一个数的出现情况即可。
#include <bits/stdc++.h>
using namespace std;

const int N = 1000010;

int n, m, arr[N];

map <int, int> mp[N];

int main () {
//	freopen ("data.in", "r", stdin);
	ios :: sync_with_stdio (false);
	while (cin >> n >> m) {
		for (int i = 1; i <= n; ++i) {
			cin >> arr[i]; int w = arr[i];
			mp[w][++mp[w][0]] = i; 
		}
		for (int i = 1; i <= m; ++i) {
			static int k, v;
			cin >> k >> v;
			cout << mp[v][k] << endl;
		}
		for (int i = 1; i <= n; ++i) {
			mp[arr[i]].clear ();
		}
	}
}

例题(3) 阿格斯((LA3135)

  • 每次都取走最先发生的事件,然后再把下一次发生的情况塞回去,处理(k)次。
#include <bits/stdc++.h>
using namespace std;

struct Node {
	int Q_num, dotime, period;
	
	bool operator < (Node rhs) const {
		return dotime == rhs.dotime ? Q_num > rhs.Q_num : dotime > rhs.dotime;
	}
	
	Node (int _Q_num = 0, int _dotime = 0, int _period = 0) {
		Q_num = _Q_num, dotime = _dotime, period = _period;
	}
};

priority_queue <Node> q;

char opt[10]; int k, Q_num, dotime, period;

int main () {
//	freopen ("data.in", "r", stdin);
	while (cin >> opt && opt[0] == 'R') {
		cin >> Q_num >> period;
		q.push (Node (Q_num, period, period));
	}
	cin >> k;
	while (k--) {
		Node u = q.top (); q.pop ();
		cout << u.Q_num << endl;
		u.dotime += u.period; 
		q.push (u);
	}
}

例题(4)(K)个最小和((UVA11997)

  • 这个题就比较有意思了。
  • 首先每一行的序列它们的顺序是无关紧要的,所以我们可以从小到大排个序。
  • 如果只有两行的话,问题就变成了合并两个单调不下降的序列,并取其前(k)小。
  • 推广到(N)行就是每次把前两行合并,取其合并后的前(K)大,新的序列继续向下合并。
    • 因为对于任意两个序列,我们可以一直选取第二个序列的最小值,则第一个序列中比第(K)大要大值的一定不产生贡献。
  • 现在关键就在于合并了。如果暴力合并总复杂度是(O(N^3))的,难以接受。更优秀的办法是先取两个序列分别的最小值合并取走(这两个数合并产生的值一定最小且在答案里!),然后再把这个最小值后继可能的次小值插入优先队列维护,就这样每次取最小的然后向后延伸。做(K)次复杂度就是(O(NlogN)),总复杂度就是(O(N^2logN))。这个东西叫做多路归并。
#include <bits/stdc++.h>
using namespace std;

const int N = 750 + 5;

int k, arr1[N], arr2[N], data[N];

struct Node {
	int p1, p2;
	
	bool operator < (Node rhs) const {
		return arr1[p1] + arr2[p2] > arr1[rhs.p1] + arr2[rhs.p2];
	} 
	
	int get_val () {return arr1[p1] + arr2[p2];}
	
	Node (int _p1 = 0, int _p2 = 0) {
		p1 = _p1, p2 = _p2;
	} 
};

priority_queue <Node> q;

int main () {
//	freopen ("data.in", "r", stdin);
//	freopen ("data.out", "w", stdout);
	while (scanf ("%d", &k) == 1) {
		memset (arr1, 0, sizeof (arr1));
		for (int i = 1; i <= k; ++i) {
			scanf ("%d", &arr1[i]);
		}
		for (int i = 2; i <= k; ++i) {
			for (int j = 1; j <= k; ++j) {
				scanf ("%d", &arr2[j]);
			}
			sort (arr2 + 1, arr2 + 1 + k); //从小到大 
//			printf ("arr1 : "); for (int i = 1; i <= k; ++i) printf ("%d ", arr1[i]); printf ("
");
//			printf ("arr2 : "); for (int i = 1; i <= k; ++i) printf ("%d ", arr2[i]); printf ("
");
			while (!q.empty ()) q.pop ();
			for (int p1 = 1; p1 <= k; ++p1) {
				q.push (Node (p1, 1));
			}
			for (int j = 1; j <= k; ++j) {
				Node u = q.top (); q.pop ();
//				printf ("u = {pos1 = %d, pos2 = %d}
", u.p1, u.p2);
				data[j] = u.get_val ();
				u.p2++; q.push (u);
			}
			swap (arr1, data);
		}
		for (int i = 1; i <= k; ++i) {
			printf ("%d", arr1[i]); if (i != k) putchar (' ');
		}
		printf ("
");
	}
}

这两天不在状态,今天上午写不动题目了,先来整理博客吧。——(2019.04.26)

例题(5) 易爆物((LA3644)

  • (k)个简单化合物(k)种元素稍加思索可以转化为等效条件:无向图中存在一个环。
  • 用并查集维护合并的物品(化合物)的连通性,如果加入某个会成环就不加入。
  • (emm)输入有点鬼不要介意、、
#include <bits/stdc++.h>
using namespace std;

const int N = 100000 + 5;

int u, v, fa[N];

int find (int x) {
	return fa[x] == x ? x : fa[x] = find (fa[x]);
}

stack <int> s;

int main () {
	for (int i = 0; i < N; ++i) fa[i] = i;
	while (scanf ("%d", &u) == 1) {
		if (u == -1) {puts ("0"); continue;}
		int ans = 0; scanf ("%d", &v); 
		if (find (u) == find (v)) {
			ans++;
		} else {
			fa[find (u)] = find (v);
		}
		s.push (u); s.push (v);
		while (scanf ("%d", &u) == 1) {
			if (u == -1) break;
			scanf ("%d", &v);
			if (find (u) == find (v)) {
				ans++;
			} else {
				fa[find (u)] = find (v);
			}
			s.push (u); s.push (v);
		}
		while (!s.empty ()) fa[s.top()] = s.top (), s.pop ();
		printf ("%d
", ans);
	}
}

例题(6) 合作网络((LA3027)

  • 树的形态是唯一的,所以其他各种方法可能会不是特别好办。
  • 因为保证(u)没有父节点,所以已有的父子节点关系实际上没有意义,都完全可以直接被化为点到当前联通块的根节点距离。
  • 动态维护一棵森林,(LCT)太大材小用,这里我们用带权并查集就好,可以直接按照定义维护第二步里面的操作。
#include <bits/stdc++.h>
using namespace std;

const int N = 20000 + 5;

int fa[N], dis[N];

int T, n, u, v; char opt[5];

int findset (int x) {
	if (fa[x] != x) {
		int rt = findset (fa[x]);
		dis[x] += dis[fa[x]];
		return fa[x] = rt;
        //dis[x]递推传下来
	} else return x;
}

int main () {
//	freopen ("data.in", "r", stdin);
	cin >> T;
	while (T--) {
		cin >> n;
		for (int i = 1; i <= n; ++i) fa[i] = i, dis[i] = 0;
		while (scanf ("%s", opt) && opt[0] != 'O') {
			if (opt[0] == 'E') {
				cin >> u;
				findset (u);
				cout << dis[u] << endl;
			} else {
				cin >> u >> v;
				fa[u] = v;
				dis[u] = abs (u - v) % 1000;
			}
		}
	}
}

  • 树状数组:长得像树的数组。
    • 用处:处理前缀和比较方便,其他操作用线段树就好 =_= 因为线段树比较万能
    • 优点:代码好写常数小。
    • 缺点:操作局限于前缀和。(高级操作不太直观所以不方便)

例题(7) 乒乓比赛((LA4329)

  • 要枚举三个东西:选手(1),裁判,选手(2)
  • 显然枚举中间那个比较方便,可以确定出来两边的情况。
  • 对于每一个裁判的枚举点需要支持查询他前面和后面有多少人技能值比他大(/)小。这个东西用树状数组顺序插入维护一遍再逆序插入维护一遍即可(一个关系可以由另一个直接推出来所以查一个就好)。
#include <bits/stdc++.h>
using namespace std;

#define int long long
const int N = 100000 + 5;

int T, n, arr[N], tot1[N], tot2[N];

struct BIT {
	int a[N];
	
	int lowbit (int x) {return x & -x;}
	
	void clear () {
		memset (a, 0, sizeof (a));
	}
	
	void insert (int pos, int val) {
		while (pos < N) {
			a[pos] += val;
			pos += lowbit (pos);
		}
	}
	
	int query (int pos) {
		int ret = 0;
		while (pos != 0) {
			ret += a[pos];
			pos -= lowbit (pos);
		}
		return ret;
	}
}tree;

signed main () {
//	freopen ("data.in", "r", stdin);
	cin >> T;
	while (T--) {
		cin >> n;
		for (int i = 1; i <= n; ++i) cin >> arr[i];
		
		tree.clear ();
		for (int i = 1; i <= n; ++i) {
			tot1[i] = tree.query (arr[i]);
			tree.insert (arr[i], 1);
		}
		
		tree.clear ();
		for (int i = n; i >= 1; --i) {
			tot2[i] = tree.query (N - arr[i]);
			tree.insert (N - arr[i], 1);
		}
		
		int ans = 0;
		for (int i = 1; i <= n; ++i) {
//			cout << "tot1[i] = " << tot1[i] << " tot2[i] = " << tot2[i] << endl;
			ans += tot1[i] * tot2[i] + (i - 1 - tot1[i]) * (n - i - tot2[i]);
		}
		cout << ans << endl;
	}
}

  • (RMQ)问题:询问区间最大最小。
  • 可用数据结构:
    • 树状数组:骚操作,容易出锅不建议写。
      • 时间(O(logN)),空间(O(N))
    • 线段树:最标准。
      • 时间(O(logN)),空间(O(N))
    • (ST)表 :代码短,特定条件下好用。
      • 预处理(O(NlogN)),查询(O(1)),空间(O(NlogN))

(Tips):前缀最大最小不用蠢蠢地写(RMQ)(=\_=)

上面几种数据结构不再介绍。


例题(8) 频繁出现的数值((UVa11235)

  • 注意关键词:非降序出现。
  • 可以把每个相等的数值划分一个区间,每个区间有用的只有其整体长度还有其左右端点。被完整覆盖在查询区间里的等值区间用(ST)表维护出现次数最大值,其他零散的块特判一下。
#include <bits/stdc++.h>
using namespace std;

const int N = 200000 + 5;

int n, q, arr[N], lw[N], rw[N], st[N][20];

int query (int l, int r) {
	int mx = log2 (r - l + 1);
	return max (st[l][mx], st[r - (1 << mx) + 1][mx]);
}

int main () {
	while (cin >> n && n) { cin >> q;
		for (int i = 0; i < N; ++i) st[i][0] = 0;
		for (int i = 1; i <= n; ++i) {
			cin >> arr[i]; arr[i] += 100001;
			if (arr[i] != arr[i - 1]) {
				lw[arr[i]] = i, rw[arr[i - 1]] = i - 1;
			}
			st[arr[i]][0]++;
		}
		for (int i = 1; (1 << i) < N; ++i) {
			for (int j = 1; j + (1 << i) - 1 < N; ++j) {
				st[j][i] = max (st[j][i - 1], st[j + (1 << (i - 1))][i - 1]);
			}
		}
		for (int i = 1; i <= q; ++i) {
			int l, r, ans = 0;
			cin >> l >> r;
			// cout << "i = " << i << endl;
			// cout << "arr[l] = " << arr[l] << " arr[r] = " << arr[r] << endl;
			if (arr[r] - 1 >= arr[l] + 1) {
				ans = query (arr[l] + 1, arr[r] - 1);
				ans = max (ans, r - lw[arr[r]] + 1);
				ans = max (ans, rw[arr[l]] - l + 1);
			} else if (arr[r] == arr[l] + 1){
		    	ans = max (ans, r - lw[arr[r]] + 1);
				ans = max (ans, rw[arr[l]] - l + 1);
			} else if (arr[r] == arr[l]) {
				ans = r - l + 1;
			}
			cout << ans << endl;
		}
	}
}


线段树:维护动态范围最小值。

  • 关键在于利用子区间划分的递推关系构造(push\_up)(push\_down)函数。

例题(9) 动态最大连续和((LA3938)

  • 题意:指定一个区间,求最大连续子段和。
  • 常见套路:连续询问或者带修改的最大子段和,化为递推分治的写法。
  • 然后就没了,在线段树里面维护一下递推关系就好,详见代码。
#include <bits/stdc++.h>
using namespace std;

#define ls (p << 1)
#define rs (p << 1 | 1)
#define mid ((l + r) >> 1)
#define int long long
const int N = 500000 + 5;
const int INF = 0x3f3f3f3f3f3f3f3fll;

struct Node {
	int val, maxl, maxr, vall, valr, mlp, mrp;
	
	void clear () {
		val = maxl = maxr = -INF; vall = valr = mlp = mrp = 0;
	}
	
	Node (int _val = -INF, int _maxl = -INF, int _maxr = -INF, int _vall = 0, int _valr = 0, int _mlp = 0, int _mrp = 0) {
		val = _val, maxl = _maxl, maxr = _maxr, vall = _vall, valr = _valr, mlp = _mlp, mrp = _mrp;
	}
}t[N << 2];

int n, m, arr[N], sum[N];

void push_up (int l, int r, Node &now, Node Ls, Node Rs) {
	if (Ls.maxr + Rs.maxl >= now.val) {
		if (Ls.maxr + Rs.maxl > now.val) {
			now.val = Ls.maxr + Rs.maxl;
			now.vall = Ls.mrp;
			now.valr = Rs.mlp;
		} else {
			if (Ls.mrp <= now.vall) {
				if (Ls.mrp < now.vall) {
					now.val = Ls.maxr + Rs.maxl;
					now.vall = Ls.mrp;
					now.valr = Rs.mlp;
				} else if (Rs.mlp < now.valr) {
					now.val = Ls.maxr + Rs.maxl;
					now.vall = Ls.mrp;
					now.valr = Rs.mlp;
				}
			}
		}
	}
	
	if (Ls.val >= now.val) {
		if (Ls.val > now.val) {
			now.val = Ls.val;
			now.vall = Ls.vall;
			now.valr = Ls.valr;
		} else {
			if (Ls.vall <= now.vall) {
				if (Ls.vall < now.vall) {
					now.val = Ls.val;
					now.vall = Ls.vall;
					now.valr = Ls.valr;
				} else if (Ls.valr < now.valr) {
					now.val = Ls.val;
					now.vall = Ls.vall;
					now.valr = Ls.valr;
				}
			}
		}
	}
	
	if (Rs.val >= now.val) {
		if (Rs.val > now.val) {
			now.val = Rs.val;
			now.vall = Rs.vall;
			now.valr = Rs.valr;
		} else {
			if (Rs.vall <= now.vall) {
				if (Rs.vall < now.vall) {
					now.val = Rs.val;
					now.vall = Rs.vall;
					now.valr = Rs.valr;
				} else if (Rs.valr < now.valr) {
					now.val = Rs.val;
					now.vall = Rs.vall;
					now.valr = Rs.valr;
				}
			}
		}
	}
	
	if (Ls.maxl >= sum[mid] - sum[l - 1] + Rs.maxl) {
		now.maxl = Ls.maxl;
		now.mlp = Ls.mlp;
	} else {
		now.maxl = sum[mid] - sum[l - 1] + Rs.maxl;
		now.mlp = Rs.mlp;
	}
	
	if (sum[r] - sum[mid] + Ls.maxr >= Rs.maxr) {
	 	now.maxr = sum[r] - sum[mid] + Ls.maxr;
		now.mrp = Ls.mrp;
	} else {
		now.maxr = Rs.maxr;
		now.mrp = Rs.mrp;
	}
//	printf ("l = %I64d, r = %I64d, maxw = %I64d
", now.vall, now.valr, now.val);
}

void build (int l, int r, int p) {
	if (l == r) {
		t[p] = Node (arr[l], arr[l], arr[l], l, r, l, r);
		return;
	}
	build (l, mid, ls);
	build (mid + 1, r, rs);
	push_up (l, r, t[p], t[ls], t[rs]);
}

Node query (int l, int r, int nl, int nr, int p) {
	if (nl <= l && r <= nr) {
		return t[p];
	}
	Node ret, Ls, Rs;
	if (nl <= mid) Ls = query (l, mid, nl, nr, ls);
	if (mid < nr) Rs = query (mid + 1, r, nl, nr, rs);
	push_up (l, r, ret, Ls, Rs);
//	cout << "Test : " << ret.vall << " " << ret.valr << endl;
	return ret;
} 

int kase;

signed main () {
//	freopen ("data.in", "r", stdin);
	ios :: sync_with_stdio (false);
	while (cin >> n >> m) {
		for (int i = 1; i <= n << 2; ++i) {
			t[i].clear ();
		}
		for (int i = 1; i <= n; ++i) {
			cin >> arr[i];
			sum[i] = sum[i - 1] + arr[i];
		}
		build (1, n, 1);
		cout << "Case " << ++kase << ":" << endl;
		for (int i = 1; i <= m; ++i) {
			static int l, r;
			cin >> l >> r;
			Node ans = query (1, n, l, r, 1);
			cout << ans.vall << " " << ans.valr << endl;
		}
	}
}


例题(10) 快速矩阵操作((UVa11992)

  • 题意:查一个子矩阵的最大最小和(sum)
  • 然而却是一个智障题。因为行数实在太少,所以二十个线段树一个一个维护就可以了。答案应该蛮好合并的。
  • 修改和赋值的顺序值得注意。
#include <bits/stdc++.h>
using namespace std;

#define ls (p << 1)
#define rs (p << 1 | 1)
#define mid ((l + r) >> 1)
#define LL long long
const int N = 1000000 + 5;

const int INF = 0x7fffffff;

struct Node {	
	LL sum; int minw, maxw, tag1, tag2;
	
	void clear () { 
		minw = INF; 
		sum = tag1 = 0;
		maxw = tag2 = -INF;
	}
	
	void Init () {
		sum = tag1 = minw = maxw = 0; tag2 = -INF;
	}
	
	void merge (Node rhs) {
		sum += rhs.sum;
		minw = min (minw, rhs.minw);
		maxw = max (maxw, rhs.maxw);
	}
};

struct Segment_Tree {
	Node t[N];
	
	void push_up (int p) {
		t[p].sum = t[ls].sum + t[rs].sum;
		t[p].minw = min (t[ls].minw, t[rs].minw);
		t[p].maxw = max (t[ls].maxw, t[rs].maxw);
	}
	
	void push_down (int l, int r, int p) {
		if (t[p].tag2 != -INF) {
			t[ls].tag1 = 0;
			t[rs].tag1 = 0;
			t[ls].tag2 = t[p].tag2;
			t[rs].tag2 = t[p].tag2;
			t[ls].sum = t[p].tag2 * (mid - l + 1);
			t[rs].sum = t[p].tag2 * (r - mid);
			t[ls].maxw = t[ls].minw = t[p].tag2;
			t[rs].maxw = t[rs].minw = t[p].tag2;
			t[p].tag2 = -INF;
		}
		t[ls].tag1 += t[p].tag1;
		t[rs].tag1 += t[p].tag1;
		t[ls].sum += t[p].tag1 * (mid - l + 1);
		t[rs].sum += t[p].tag1 * (r - mid);
		t[ls].maxw += t[p].tag1; t[ls].minw += t[p].tag1;
		t[rs].maxw += t[p].tag1; t[rs].minw += t[p].tag1;
		t[p].tag1 = 0;
	}
	
	void modify1 (int l, int r, int nl, int nr, int w, int p) { //add w to [l, r]
		if (nl <= l && r <= nr) {
			t[p].tag1 += w;
			t[p].maxw += w;
			t[p].minw += w;
			t[p].sum += 1ll * (r - l + 1) * w;
			return;
		}
		push_down (l, r, p);
		if (nl <= mid) modify1 (l, mid, nl, nr, w, ls);
		if (mid < nr) modify1 (mid + 1, r, nl, nr, w, rs);
		push_up (p);
	}
	
	void modify2 (int l, int r, int nl, int nr, int w, int p) { // modify w to [l, r]
		if (nl <= l && r <= nr) {
			t[p].tag2 = w;
			t[p].tag1 = 0;
			t[p].maxw = w;
			t[p].minw = w;
			t[p].sum = 1ll * (r - l + 1) * w;
			return;
		}
		push_down (l, r, p);
		if (nl <= mid) modify2 (l, mid, nl, nr, w, ls);
		if (mid < nr) modify2 (mid + 1, r, nl, nr, w, rs);
		push_up (p);
	}
	
	Node query (int l, int r, int nl, int nr, int p) {
		if (nl <= l && r <= nr) {
			return t[p];
		}
		Node ret; ret.clear ();
		push_down (l, r, p);
		if (nl <= mid) ret.merge (query (l, mid, nl, nr, ls));
		if (mid < nr) ret.merge (query (mid + 1, r, nl, nr, rs));
		push_up (p);
		return ret; 
	}
	
	void clear (int l, int r, int p) {
		t[p].Init ();
		if (l == r) return;
		clear (l, mid, ls);
		clear (mid + 1, r, rs);
	}
}tree[21];

int r, c, m;

void _modify1 (int x1, int y1, int x2, int y2, int w) {
	for (int i = x1; i <= x2; ++i) {
		tree[i].modify1 (1, c, y1, y2, w, 1);
	}
}

void _modify2 (int x1, int y1, int x2, int y2, int w) {
	for (int i = x1; i <= x2; ++i) {
		tree[i].modify2 (1, c, y1, y2, w, 1);
	}
}

Node query (int x1, int y1, int x2, int y2) {
	Node ret; ret.clear ();
	for (int i = x1; i <= x2; ++i) {
		ret.merge (tree[i].query (1, c, y1, y2, 1));
	}
	return ret;
}

void _clear () {
	for (int i = 1; i <= r; ++i) {
		tree[i].clear (1, c, 1);
	}
}

int main () {
//	freopen ("data.in", "r", stdin);
	ios :: sync_with_stdio (false);
	while (cin >> r >> c >> m) {
		_clear ();
		int opt, x1, y1, x2, y2, w;
		for (int i = 1; i <= m; ++i) {
			cin >> opt >> x1 >> y1 >> x2 >> y2;
			if (opt == 1) {
				cin >> w;
				_modify1 (x1, y1, x2, y2, w);
			}
			if (opt == 2) {
				cin >> w;
				_modify2 (x1, y1, x2, y2, w);
			} 
			if (opt == 3) {
				Node u = query (x1, y1, x2, y2);
				cout << u.sum << " " << u.minw << " " << u.maxw << endl;
			}
		}
	}
}


  • (Trie):前缀树。

  • 用处:

    • 把多个串插入一个(Trie),可以作关于模式串的前缀的查询。
    • 把一个串的所有后缀插入一个(Trie),可以作关于模式串的子串的查询。
    • 把模式串拉出来在(Trie)上跑:可以作关于文本串的前缀(/)整体的查询。
  • 常见操作:查询存在性,查询出现次数,子串计数。


例题(11) 背单词((LA3942)

  • 查分解方式啊。。首先肯定先把字典插到(Trie)里面。
  • (dp_i)是从(i)开始的串的构成方式总数,可以先得到一个朴素的(DP)式子是(dp_i = sum_{i} dp_{i + len_x}),转移条件是(s_{[i,i + len_x)})这一段能在(Trie)上匹配出来一个完整的字典串(x),转化为前缀的表达方式(因为(Trie)处理前缀)就是(dic_x)是从(i+1)开始的(s)的一个前缀。
  • 查找一下(Trie)(s)的每一个后缀,看会出现哪些可转移点。为什么这么搞是对的呢?因为(Trie)树深度不超过(100),复杂度(O(3e5*1e2))
#include <bits/stdc++.h>
using namespace std;

const int MaxS = 100;
const int N = 300000 + 5;
const int M = 400000 + 5;
const int Mod = 20071027;

int n, l; char s[N], dic[N];

int tot, fin[M], ch[M][26];

struct Node {int u, v;};

stack <Node> sta;

void add_str (char *A) {
	int len = strlen (A), now = 0;
	for (int i = 0; i < len; ++i) {
//		printf ("now = %d
", now);
		if (!ch[now][A[i] - 'a']) {
			ch[now][A[i] - 'a'] = ++tot;
			sta.push ((Node) {now, A[i] - 'a'});
		}
		now = ch[now][A[i] - 'a'];
	}
//	printf ("fin[%d] = true
", now);
	fin[now] = true;
}

void Initialize () {
	tot = 0;
	memset (fin, 0, sizeof (fin));
	while (!sta.empty ()) {
		ch[sta.top ().u][sta.top ().v] = 0; sta.pop ();
	}
}

int dp[N];

int get_ans () {
	memset (dp, 0, sizeof (dp));
	dp[l + 1] = 1;
	for (int i = l; i >= 1; --i) {
		int now = 0;
//		printf ("i = %d
", i);
		for (int j = 1; j <= 100 && i + j - 1 <= l; ++j) {
			now = ch[now][s[i + j - 1] - 'a'];
//			printf ("now = %d
", now);
			if (now == 0) break;
//			cout << "[" << i << ", " << n << "] -> have substr(lcp)" << i + j << endl;
			if (fin[now]) dp[i] = (dp[i] + dp[i + j]) % Mod;
		}
	}
	return dp[1];
}

int kase;

int main () {
//	freopen ("data.in", "r", stdin);
	while (cin >> (s + 1)) {
		cin >> n;
		Initialize ();
		l = strlen (s + 1);
		for (int i = 1; i <= n; ++i) {
			cin >> dic; 
			add_str (dic);
		}
		cout << "Case " << ++kase << ": " << get_ans () << endl;
	}	
}

例题(12)(strcmp)函数((UVa11732)

  • 把所有串插到一个(Trie)里玩。考虑任意两个串对答案的贡献是它们(lca)的深度乘二加一,那就可以枚举每一个串,拿它去和之前插入的其他串匹配求解统计了。
#include <bits/stdc++.h>
using namespace std;

#define LL long long
const int N = 4000 + 5;
const int M = 1000 + 5;

int n, tot, fin[N * M], used[N * M]; char s[M];

map <int, int> ch[N * M];

struct Node {int u, v;};

stack <Node> sta;

void insert (char *s) {
	int l = strlen (s), now = 0;
	for (int i = 0; i < l; ++i) {
		if (!ch[now].count (s[i])) {
			ch[now][s[i]] = ++tot;
			sta.push ((Node) {now, s[i]});
		}
		now = ch[now][s[i]];
		used[now]++;
	}
	fin[now]++;
	sta.push ((Node) {now, 0});
}

int get_ans (char *s) {
	LL ret = 0; 
	int now = 0, l = strlen (s);
	for (int i = 0; i < l; ++i) {
		if (!ch[now].count (s[i])) return ret;
		else {
			now = ch[now][s[i]];
			ret += used[now] * 2;
		}
	}
//	cout << "fin = " << fin[now] << endl;
	return ret + fin[now];
}

void Initialize () {
	for (int i = 0; i <= tot; ++i) {
		ch[i].clear ();
		fin[i] = used[i] = 0;
	}
	tot = 0;
}

int kase;

int main () {
	while (cin >> n && n) {
		Initialize ();
		LL ans = 0;
		for (int i = 1; i <= n; ++i) {
			scanf ("%s", s);
//			cout << "getans = " << get_ans (s) + i - 1 << endl;
			ans += get_ans (s) + (i - 1);
			insert (s);
		}
		cout << "Case " << ++kase << ": " << ans << endl;
	}
}


例题(13) 周期((LA3026)

  • 为啥要写(KMP)啊。。毛用没有的东西,可以直接被(AC)自动机代替啊。。。
  • 在这个题里还是(SA)最方便。预处理,往前判,看能循环几次。预处理(O(NlogN)),枚举复杂度是调和级数,也是(O(NlogN))
#include <bits/stdc++.h>
using namespace std;

const int N = 1000000 + 5;

int n, m; char s[N];

int sa[N], tp[N], rk[N], _rk[N], bin[N], height[N];

void radix_sort () {
	for (int i = 0; i <= m; ++i) bin[i] = 0;
	for (int i = 1; i <= n; ++i) bin[rk[tp[i]]]++;
	for (int i = 1; i <= m; ++i) bin[i] += bin[i - 1];
	for (int i = n; i >= 1; --i) sa[bin[rk[tp[i]]]--] = tp[i];
}

int st[N][20];

int lcp (int x, int y) {
	if (x == y) return n - x + 1;
	x = rk[x], y = rk[y];
	if (x > y) swap (x, y); x++;
	int mx = log2 (y - x + 1);
	return min (st[x][mx], st[y - (1 << mx) + 1][mx]);
}

void suffix_sort () {
	m = 255;
	memset (sa, 0, sizeof (sa));
	for (int i = 1; i <= n; ++i) {
		rk[i] = s[i], tp[i] = i;
	}
	radix_sort ();
	for (int w = 1; w <= n; w <<= 1) {
		int cnt = 0;
		for (int i = n - w + 1; i <= n; ++i) tp[++cnt] = i;
		for (int i = 1; i <= n; ++i) if (sa[i] > w) tp[++cnt] = sa[i] - w;
		radix_sort ();
		memcpy (_rk, rk, sizeof (rk));
		rk[sa[1]] = cnt = 1;
		for (int i = 2; i <= n; ++i) {
			rk[sa[i]] = _rk[sa[i]] == _rk[sa[i - 1]] && _rk[sa[i] + w] == _rk[sa[i - 1] + w] ? cnt : ++cnt;
		}   
		m = cnt;
		if (n == cnt) break;
	}
	int k = 0;
	for (int i = 1; i <= n; ++i) {
		if (k != 0) --k;
		int j = sa[rk[i] - 1];
		while (s[i + k] == s[j + k]) ++k;
		height[rk[i]] = k;
	}
	for (int i = 1; i <= n; ++i) st[i][0] = height[i];
	for (int i = 1; (1 << i) <= n; ++i) {
		for (int j = 1; j + (1 << i) - 1 <= n; ++j) {
			st[j][i] = min (st[j][i - 1], st[j + (1 << (i - 1))][i - 1]);
		}
	}
}	 

int kase, rep[N];

int main () {
//	freopen ("data.in", "r", stdin);
	ios :: sync_with_stdio (false);
	while (cin >> n && n) {
		cin >> (s + 1);
		suffix_sort ();
//		for (int i = 1; i <= n; ++i) cout << sa[i] << " "; cout << endl;
//		for (int i = 1; i <= n; ++i) cout << height[i] << " "; cout << endl;
		for (int i = 1; i <= n; ++i) rep[i] = 0;
		for (int i = 1; i <= n; ++i) {
			int tot = 1;
//			cout << "i = " << i << endl;
			for (int j = i + 1; j + i - 1 <= n; j += i) {
//				cout << "lcp (1, " << j << ") = " << lcp (1, j) << endl;
				if (lcp (1, j) < i) break;
				rep[j + i - 1] = max (rep[j + i - 1], ++tot);
			}
		}
		cout << "Test case #" << ++kase << endl;
		for (int i = 1; i <= n; ++i) {
			if (rep[i] > 1) {
				cout << i << " " << rep[i] << endl;
			}
		}
		cout << endl;
	}
}

  • (AC)自动机:处理多模式串匹配问题。
  • 可以同时判断很多串是否在一个串里出现,求出现次数,等等各种操作。
  • (Trie)边:指向添加一个字符后的前缀
  • (Fail)边:指向当前已匹配串在(Trie)图内存在的最长的后缀。事实上如果把(Trie)边全都连起来是一棵树((n-1)条边),而且一个点的子树含且仅含(Trie)内这个节点代表的字符串的所有后缀,套上数据结构可以做很多事情。

例题(14) 出现次数最多的子串((LA4670)

  • 找出哪些子串在文本中出现最多。
  • 做法:子串建(AC)自动机,文本串跑一边匹配,每次顺着(fail)边记一下走到的模式串。
#include <bits/stdc++.h>
using namespace std;

const int N = 10500 + 5;
const int M = 1000000 + 5;

int n; char dic[155][75], s[M];

struct AC_Auto {
	int tot, ch[N][26], vis[N], fail[N];
	
	vector <int> fin[N];
	
	void clear () {
		tot = 0;
		memset (ch, 0, sizeof (ch));
		memset (vis, 0, sizeof (vis));
		memset (fail, 0, sizeof (fail));
		for (int i = 0; i < N; ++i) {
			fin[i].clear ();
		}
	}
	
	void Insert (char *s, int id) {
		int l = strlen (s), now = 0;
		for (int i = 0; i < l; ++i) {
			if (!ch[now][s[i] - 'a']) {
				ch[now][s[i] - 'a'] = ++tot;
			}
			now = ch[now][s[i] - 'a'];
		}
		fin[now].push_back (id);
	}
	
	queue <int> q;
	
	void build () {
		for (int i = 0; i < 26; ++i) {
			if (ch[0][i]) q.push (ch[0][i]);
		}
		while (!q.empty ()) {
			int now = q.front (); q.pop ();
			for (int i = 0; i < 26; ++i) {
				if (ch[now][i]) {
					q.push (ch[now][i]);
					fail[ch[now][i]] = ch[fail[now]][i];
				} else {
					ch[now][i] = ch[fail[now]][i];
				}
			}		
		}
	}
	
	void query (char *s) {
		int l = strlen (s), now = 0, ans = 0;
		for (int i = 0; i < l; ++i) {
			now = ch[now][s[i] - 'a'];
			for (int t = now; t != 0; t = fail[t]) {
				for (int k = 0; k < fin[t].size (); ++k) {
					vis[fin[t][k]]++;
					ans = max (ans, vis[fin[t][k]]);
				}
			}
		}
		cout << ans << endl;
		for (int i = 1; i <= n; ++i) {
			if (vis[i] == ans) {
				cout << dic[i] << endl;
			}
		}
	}
}AC;

int main () {
	ios :: sync_with_stdio (false);
	while (cin >> n && n) {
		AC.clear ();
		for (int i = 1; i <= n; ++i) {
			cin >> dic[i];
			AC.Insert (dic[i], i);
		}
		AC.build ();
		cin >> s;
		AC.query (s);
	}
}

略困。今天先更新到这。


原文地址:https://www.cnblogs.com/maomao9173/p/10759965.html