后缀数组专题训练

后缀数组学习资料:http://blog.csdn.net/wxfwxf328/article/details/7599929


题目链接:http://acm.hdu.edu.cn/showproblem.php?pid=4691

思路:首先求出上下相邻的查询的字符串的LCP(最长公共前缀),这个可以通过求出的height数组得到。我们可以先求出上下相邻的字符串的rank,然后对height数组做RQM就可以(当然可以先预处理,然后查询的复杂度就为O(1)啦。

#include <iostream>
#include <cstdio>
#include <cstring>
#include <cmath>
#include <algorithm>
#include <vector>
#include <queue>
using namespace std;

const int MAX_N = (100000 + 10000);
const int inf = 0x3f3f3f3f;

struct SA {

	/*
	rank[] 数组中rank[0] ~ rank[n - 1]为有效值,rank[n]必为0(字符串最后一位补0).
	sa[] 数组中sa[1] ~ sa[n]为有效值,sa[0] 为 n必定为无效值
	height[] 数组中有效值为height[2] ~ height[n].
	*/

	int wa[MAX_N], wb[MAX_N], wv[MAX_N], ws[MAX_N];

	void da(int *r, int *sa, int n, int m) 
	{
		int i, j, p, *x = wa, *y = wb;
		//第一轮基数排序
		for (i = 0; i < m; ++i) ws[i] = 0;
		for (i = 0; i < n; ++i) ws[ x[i] = r[i] ]++;
		for (i = 1; i < m; ++i) ws[i] += ws[i - 1];
		for (i = n - 1; i >= 0; --i) sa[--ws[x[i]]] = i;

		for (j = 1, p = 1; p < n; j <<= 1, m = p) {
			
			for (p = 0, i = n - j; i < n; ++i) y[p++] = i;  //从n - j到j的字符串的第二关键字都是0,因此在y数组中排在前面
			//上一行中只有sa[i] >= j的第sa[i]个字符串的Rank才会作为下一行的第sa[i] - j个字符串的第二关键字
			//并且是按sa[i]的顺序Rank[sa[i]]递增的,因此完成了对剩余元素的第二关键字的排序
			for (i = 0; i < n; ++i) if (sa[i] >= j) y[p++] = sa[i] - j;

			//y数组是按照第二关键字排序的结果
			//按照第一关键字进行基数排序
			for (i = 0; i < n; ++i) wv[i] = x[y[i]];
			for (i = 0; i < m; ++i) ws[i] = 0;
			for (i = 0; i < n; ++i) ws[wv[i]]++;
			for (i = 1; i < m; ++i) ws[i] += ws[i - 1];
			for (i = n - 1; i >= 0; --i) sa[--ws[wv[i]]] = y[i];

			swap(x, y);
			//p指的不同Rank的字符串的个数
			for (p = 1, x[sa[0]] = 0, i = 1; i < n; ++i) {
				x[sa[i]] = cmp(y, sa[i - 1], sa[i], j) ? p - 1 : p++;
			}
		}
	}

	int cmp(int *r, int a, int b, int l) {
		return (r[a] == r[b] && r[a + l] == r[b + l]);
	}

	
	//求Rank数组以及height数组
	//定义height[i]为suffix(sa[i - 1])和suffix(sa[i])的最长公共前缀(LCP);
	//对于任意起始位置的i,j(假设Rank[i] < Rank[j])的后缀的最长公共前缀,为height[Rank[i]+1],height[Rank[i] + 2],...,height[Rank[j]]的最小值
	int sa[MAX_N], Rank[MAX_N], height[MAX_N];

	void calheight(char *str, int *r, int *sa, int n)
	{
		int k = 0;
		for (int i = 0; i <= n; ++i) r[sa[i]] = i;
		for (int i = 0; i < n; ++i) {
			if (k) k--;

			int j = sa[r[i] - 1];

			//如果k不为0,则最长公共前缀至少为k - 1;
			//k == 0, 则直接比较第i个字符串和第j个字符串有多少相同的
			while (str[i + k] == str[j + k]) k++;


			height[r[i]] = k;
		}
	}
} m_sa;

int dp[MAX_N][22];
void InitRMQ(int n)
{
	for (int i = 0; i <= n; ++i) dp[i][0] = m_sa.height[i];

	for (int j = 1; (1 << j) <= n; ++j) {
		for (int i = 1; i + (1 << j) - 1 <= n; ++i) {
			dp[i][j] = min(dp[i][j - 1], dp[i + (1 << (j - 1))][j - 1]);
		}
	}
}

int RMQ_ST(int l, int r)
{
	int k = (int)(log(1.0 * r - l + 1) / log(2.0));
	return min(dp[l][k], dp[r - (1 << k) + 1][k]);
}


int getLCP(int a, int b)
{
	a = m_sa.Rank[a];
	b = m_sa.Rank[b];

	//printf("a = %d, b = %d
", a, b);
	
	if (a > b) swap(a, b);

	return RMQ_ST(a + 1, b);
}

int Cal(int n)
{
	if (n == 0) return 1;
	int cnt = 0;
	while (n) {
		++cnt;
		n /= 10;
	}

	return cnt;
}

char str[MAX_N];
int lpos[MAX_N], rpos[MAX_N];
int main()
{
	while (~scanf("%s", str)) {
		int len = strlen(str);

		for (int i = 0; i < len; ++i) m_sa.Rank[i] = str[i] - 'a' + 1;
		m_sa.Rank[len] = 0;

		m_sa.da(m_sa.Rank, m_sa.sa, len + 1, 30);
		m_sa.calheight(str, m_sa.Rank, m_sa.sa, len);

		/*
		puts("Rank..");
		for (int i = 0; i < len; ++i) {
			printf("%d ", m_sa.Rank[i]);
		}

		puts("
Height...");

		for (int i = 0; i <= len; ++i) {
			printf("%d ", m_sa.height[i]);
		}

		puts("");

		*/

		InitRMQ(len);


		int Cas;
		scanf("%d", &Cas);

		__int64 ans1 = 0, ans2 = 0;
		int tmp;
		for (int i = 0; i < Cas; ++i) {
			scanf("%d %d", &lpos[i], &rpos[i]);
			if (i == 0) {
				ans1 += rpos[i] - lpos[i] + 1;
				ans2 += rpos[i] - lpos[i] + 3;
				continue;
			}

			if (lpos[i] != lpos[i - 1]) tmp = getLCP(lpos[i - 1], lpos[i]);
			else tmp = inf;

			//printf("tmp = %d
", tmp);

			tmp = min(tmp, rpos[i] - lpos[i]);
			tmp = min(tmp, rpos[i - 1] - lpos[i - 1]);

			ans1 += rpos[i] - lpos[i] + 1;
			ans2 += rpos[i] - lpos[i] - tmp + 1;
			ans2 += 1;
			ans2 += Cal(tmp);
		}
		
		printf("%I64d %I64d
", ans1, ans2);
	}
	return 0;
}


题目链接:http://acm.hdu.edu.cn/showproblem.php?pid=4552

思路:题目的意思就是求字符串所有的前缀出现的次数之和,其实可以转化为所有后缀与该字符串的最长公共前缀之和。

#include <iostream>
#include <cstdio>
#include <cstring>
#include <algorithm>
using namespace std;

const int MAX_N = (100000 + 10000);

struct suffix_sa {
	
	int wa[MAX_N], wb[MAX_N], wv[MAX_N], ws[MAX_N];

	void da(int *r, int *sa, int n, int m)
	{
		int i, j, p, *x = wa, *y = wb;

		for (i = 0; i < m; ++i) ws[i] = 0;
		for (i = 0; i < n; ++i) ws[ x[i] = r[i] ]++;
		for (i = 1; i < m; ++i) ws[i] += ws[i - 1];
		for (i = n - 1; i >= 0; --i) sa[--ws[x[i]]] = i;

		for (j = 1, p = 1; p < n; j <<= 1, m = p) {
			
			for (p = 0, i = n - j; i < n; ++i) y[p++] = i;
			for (i = 0; i < n; ++i) if (sa[i] >= j) y[p++] = sa[i] - j;

			for (i = 0; i < n; ++i) wv[i] = x[y[i]];
			for (i = 0; i < m; ++i) ws[i] = 0;
			for (i = 0; i < n; ++i) ws[wv[i]]++;
			for (i = 1; i < m; ++i) ws[i] += ws[i - 1];
			for (i = n - 1; i >= 0; --i) sa[--ws[wv[i]]] = y[i];

			swap(x, y);
			for(p = 1, x[sa[0]] = 0, i = 1; i < n; ++i) {
				x[sa[i]] = cmp(y, sa[i - 1], sa[i], j) ? p - 1 : p++;
			}
		}
	}

	int cmp(int *r, int a, int b, int l)
	{
		return (r[a] == r[b] && r[a + l] == r[b + l]);
	}

	int sa[MAX_N], Rank[MAX_N], height[MAX_N];

	void calheight(char *str, int *r, int *sa, int n)
	{
		for (int i = 0; i <= n; ++i) r[sa[i]] = i;

		int k = 0;
		for (int i = 0; i < n; ++i) {
			if (k) k--;

			int j = sa[r[i] - 1];

			while (str[i + k] == str[j + k]) k++;
			
			height[r[i]] = k;
		}
	}
} m_sa;


char str[MAX_N];

int main()
{
	while (~scanf("%s", str)) {
		int len = strlen(str);
		for (int i = 0; i < len; ++i) m_sa.Rank[i] = str[i] - 'a' + 1;
		m_sa.Rank[len] = 0;

		m_sa.da(m_sa.Rank, m_sa.sa, len + 1, 30);
		m_sa.calheight(str, m_sa.Rank, m_sa.sa, len);

		/*
		printf("Rank...
");
		for (int i = 0; i <= len; ++i) {
			printf("%d ", m_sa.Rank[i]);
		}

		puts("
Height...");
		for (int i = 0; i <= len; ++i) {
			printf("%d ", m_sa.height[i]);
		}

		puts("");
		*/

		int ans = len, tmp = len;
		for (int i = m_sa.Rank[0] + 1; i <= len; ++i) {
			tmp = min(tmp, m_sa.height[i]);
			ans += tmp;
			ans %= 256;
		}

		tmp = len;
		for (int i = m_sa.Rank[0]; i >= 0; --i) {
			tmp = min(tmp, m_sa.height[i]);
			ans += tmp;
			ans %= 256;
		}

		printf("%d
", ans);
	}
	return 0;
}




题目链接:http://acm.hdu.edu.cn/showproblem.php?pid=3518

思路:题目的意思是要求给定字符串中至少出现两次并且不重叠的子串的个数。可以这么理解,如果某个子串满足这个条件,那么该子串至少是某两个后缀的公共前缀,但是需要注意的是要不重叠,于是还需再判断一下这两个后缀的起始位置距离是否大于这个子串的长度即可。于是我们可以枚举子串的长度,然后利用height数组求解。

#include <iostream>
#include <cstdio>
#include <cstring>
#include <algorithm>
using namespace std;

const int MAX_N = (1000 + 100);
struct suffix_sa {
	
	int wa[MAX_N], wb[MAX_N], ws[MAX_N], wv[MAX_N];

	void da(int *r, int *sa, int n, int m)
	{
		int i, j, p, *x =wa, *y = wb;
		for (i = 0; i < m; ++i) ws[i] = 0;
		for (i = 0; i < n; ++i) ws[ x[i] = r[i] ]++;
		for (i = 1; i < m; ++i) ws[i] += ws[i - 1];
		for (i = n - 1; i >= 0; --i) sa[--ws[x[i]]] = i;
		
		for (j = 1, p = 1; p < n; j <<= 1, m = p) {
			
			for (p = 0, i = n - j; i < n; ++i) y[p++] = i;
			for (i = 0; i < n; ++i) if (sa[i] >= j) y[p++] = sa[i] - j;
			
			for (i = 0; i < n; ++i) wv[i] = x[y[i]];
			for (i = 0; i < m; ++i) ws[i] = 0;
			for (i = 0; i < n; ++i) ws[wv[i]]++;
			for (i = 1; i < m; ++i) ws[i] += ws[i - 1];
			for (i = n - 1; i >= 0; --i) sa[--ws[wv[i]]] = y[i];

			swap(x, y);
			for (p = 1, x[sa[0]] = 0, i = 1; i < n; ++i) {
				x[sa[i]] = cmp(y, sa[i - 1], sa[i], j) ? p - 1 : p++;
			}
		}
	}

	int cmp(int *r, int a, int b, int l)
	{
		return (r[a] == r[b] && r[a + l] == r[b + l]);
	}

	int sa[MAX_N], Rank[MAX_N], height[MAX_N];

	void calheight(char *str, int *r, int n)
	{
		for (int i = 0; i <= n; ++i) r[sa[i]] = i;

		int k = 0;
		for (int i = 0; i < n; ++i) {

			if (k) k--;

			int j = sa[r[i] - 1];

			while (str[i + k] == str[j + k]) k++;

			height[r[i]] = k;
		}
	}
} m_sa;


char str[MAX_N];
int main()
{
	while (~scanf("%s", str) && strcmp(str, "#") != 0) {
		int n = strlen(str);
		for (int i = 0; i < n; ++i) m_sa.Rank[i] = str[i] - 'a' + 1;
		m_sa.Rank[n] = 0;

		m_sa.da(m_sa.Rank, m_sa.sa, n + 1, 30);
		m_sa.calheight(str, m_sa.Rank, n);

		/*
		for (int i = 0; i <= n; ++i) {
			printf("%d ", m_sa.Rank[i]);
		}

		puts("");
		for (int i = 1; i <= n; ++i) {
			printf("%d ", m_sa.height[i]);
		}
		puts("");
		*/

		__int64 ans = 0;
		for (int len = 1; len <= (n + 1) / 2; ++len) {
			int l  = MAX_N, r = -1;

			for (int i = 1; i <= n; ++i) {
				if (m_sa.height[i] >= len) {
					l = min(l, min(m_sa.sa[i - 1], m_sa.sa[i]));
					r = max(r, max(m_sa.sa[i - 1], m_sa.sa[i]));
				} else {
					if (l + len <= r) ++ans;
					l = MAX_N, r = -1;
				}
			}

			if (r != -1 && l + len <= r) ++ans;
		}

		printf("%d
", ans);
	}
	return 0;
}



题目链接:http://acm.hdu.edu.cn/showproblem.php?pid=1403

思路:求两个字符串的最长公共子串,可以将这两个字符串收尾拼接在一起,然后就是求新字符串所有后缀的最长公共前缀。

#include <iostream>
#include <cstdio>
#include <cstring>
#include <algorithm>
using namespace std;

const int MAX_N = (233333);
struct suffix_sa {
	
	int wa[MAX_N], wb[MAX_N], ws[MAX_N], wv[MAX_N];
	void da(int *r, int *sa, int n, int m)
	{
		int i, j, p, *x = wa, *y = wb;
		for (i = 0; i < m; ++i) ws[i] = 0;
		for (i = 0; i < n; ++i) ws[ x[i] = r[i] ]++;
		for (i = 1; i < m; ++i) ws[i] += ws[i - 1];
		for (i = n - 1; i >= 0; --i) sa[--ws[x[i]]] = i;

		for (j = 1, p = 1; p < n; j <<= 1, m = p) {
			
			for (p = 0, i = n - j; i < n; ++i) y[p++] = i;
			for (i = 0; i < n; ++i) if (sa[i] >= j) y[p++] = sa[i] - j;

			for (i = 0; i < n; ++i) wv[i] = x[y[i]];
			for (i = 0; i < m; ++i) ws[i] = 0;
			for (i = 0; i < n; ++i) ws[wv[i]]++;
			for (i = 1; i < m; ++i) ws[i] += ws[i - 1];
			for (i = n - 1; i >= 0; --i) sa[--ws[wv[i]]] = y[i];

			swap(x, y);
			for (p = 1, x[sa[0]] = 0, i = 1; i < n; ++i) {
				x[sa[i]] = cmp(y, sa[i - 1], sa[i], j) ? p - 1 : p++;
			}
		}
	}

	int cmp(int *r, int a, int b, int l)
	{
		return (r[a] == r[b] && r[a + l] == r[b + l]);
	}

	int sa[MAX_N], Rank[MAX_N], height[MAX_N];
	void calheight(char *str, int *r, int n)
	{
		for (int i = 0; i <= n; ++i) r[sa[i]] = i;
		
		int k = 0;
		for (int i = 0; i < n; ++i) {
			if (k) k--;

			int j = sa[r[i] - 1];
			
			while (str[i + k] == str[j + k]) k++;
			
			height[r[i]] = k;
		}
	}
} m_sa;


char str1[MAX_N], str2[MAX_N];
int main()
{
	while (~scanf("%s %s", str1, str2)) {
		int n1 = strlen(str1), n2 = strlen(str2), n;
		for (int i = 0; i < n2; ++i) str1[n1 + i] = str2[i];
		n = n1 + n2;
		for (int i = 0; i < n; ++i) m_sa.Rank[i] = str1[i] - 'a' + 1;
		m_sa.Rank[n] = 0;

		m_sa.da(m_sa.Rank, m_sa.sa, n + 1, 30);
		m_sa.calheight(str1, m_sa.Rank, n);

		/*
		for (int i = 0; i <= n; ++i) {
			printf("%d ", m_sa.Rank[i]);
		}

		puts("");
		for (int  i = 0; i <= n; ++i) {
			printf("%d ", m_sa.height[i]);
		}
		puts("");
		*/

		int ans = 0;
		for (int i = 1; i <= n; ++i) {
			if ((m_sa.sa[i - 1] < n1 && m_sa.sa[i] >= n1) || (m_sa.sa[i - 1] >= n1 && m_sa.sa[i] < n1)) {
				ans = max(ans, m_sa.height[i]);
			}
		}

		printf("%d
", ans);
	}
	return 0;
}

更新中。。。



原文地址:https://www.cnblogs.com/wally/p/4477037.html