【LOJ #6041】「雅礼集训 2017 Day7」事情的相似度

Problem

LOJ #6041「雅礼集训 2017 Day7」事情的相似度

给定一个长度为 (n) 的 (01) 串 (s)，有 (m) 次询问，每次询问给定了一个区间 ([l,r](l < r))，你需要找到最大的 (t)，使得存在两个整数 (i,j(l leq i < j leq r))，(s[1..i]) 和 (s[1..j]) 的最长公共后缀的长度为 (t)。允许离线。(n,m leq 10^5)。

时间限制：(3 exttt s)
内存限制：(1024 exttt{MB})

Solution

算法一：SA(M) + 莫队（部分分）

把字符串取反，那么就相当于询问区间内的后缀，两两之间 LCP 的最大值。

考虑离线后莫队，莫队的时候维护一个 set 表示当前区间的所有点的 rank，再维护一个 multiset 表示在前一个 set 中所有相邻两点的 LCP。

视 (n,m) 同阶，时间复杂度 (mathcal O(n sqrt n log n))。非常可惜，通过不了本题。

用 SAM 实现也是可以的，LCP 对应过去就是 parent 树 LCA 的 maxl（maxl 表示后缀自动机中某个点表示子串的最大长度）。

算法二：SA(M) + bitset（骚解）

有没有更给力一点的？

假设我们用 SAM，相当于询问编号在某个区间内，对应的 parent 树上结点两两的 LCA 的最大的 maxl。

同样将询问离线，不过这个时候我们考虑每个点的贡献。我们按照 maxl 从大到小枚举点 (x)，那么我们发现，如果某个询问 ([l,r]) 包含了 (x) 不同儿子的子树的点，那么这个 (x) 就对 ([l,r]) 这个询问有贡献，并且因为是按照 maxl 从大到小枚举点，所以第一个给 ([l,r]) 产生贡献的就是它的答案。那么我们怎么去优化这个暴力过程呢？

用牛逼的 bitset 操作。

我们考虑对于字符串的每个位置，存一个 bitset 表示包含这个位置的所有询问，接着在 parent 树上按 maxl 从大到小合并这些 bitset。合并两个 bitset 的时候，对于两边都有的询问就直接算出它们的答案。为了保证时间复杂度还要维护一个 bitset 表示已经得到答案的询问，保证合并的时候枚举公共的询问的过程中，每个询问只会被枚举一次。

诶等等？我空间怎么开爆了？

把 m 个询问拆成两部分，分别做。

诶等等？怎么把区间里面每个点的 bitset 的一位置成 1 啊？

for (int i = 1; i <= m; ++i)
{
	int l, r; 
	read(l), read(r); 
	bit[l][i] = 1, bit[r + 1][i] = 1;  
}
for (int i = 2; i <= n; ++i)
	bit[i] ^= bit[i - 1];

时间复杂度 (mathcal O(frac{nm}{omega}))。

用 SA 实现也是可以的，对应过去就是按 height 从大到小合并。

#include <bits/stdc++.h>

template <class T>
inline void read(T &x)
{
	static char ch;
	while (!isdigit(ch = getchar())); 
	x = ch - '0'; 
	while (isdigit(ch = getchar()))
		x = x * 10 + ch - '0'; 
}

template <class T>
inline void putint(T x)
{
	static char buf[15], *tail = buf; 
	if (!x)
		putchar('0'); 
	else
	{
		for (; x; x /= 10) *++tail = x % 10 + '0'; 
		for (; tail != buf; --tail) putchar(*tail); 
	}
}

const int MaxN = 1e5 + 5; 
const int MaxNT = MaxN << 1; 

typedef std::bitset<(MaxN >> 1)> bst; 

struct node
{
	int trans[2]; 
	int maxl, par; 
}tr[MaxNT]; 

int nT = 1, lst = 1; 
int cur_len, pos[MaxN]; 

int n, m; 
int m1, m2; 
char s[MaxN]; 

int id[MaxNT], ans[MaxN]; 
std::vector<int> son[MaxNT]; 

int seq[MaxNT]; 
bst bit[MaxN], non_ans; 

inline void extend(int ch)
{
	int x = lst; 
	tr[lst = ++nT].maxl = ++cur_len; 
	for (; x && !tr[x].trans[ch]; x = tr[x].par)
		tr[x].trans[ch] = lst; 
	if (!x)
		tr[lst].par = 1; 
	else
	{
		int y = tr[x].trans[ch]; 
		if (tr[x].maxl + 1 == tr[y].maxl)
			tr[lst].par = y; 
		else
		{
			int np = ++nT; 
			tr[np] = tr[y]; 
			tr[np].maxl = tr[x].maxl + 1; 

			tr[y].par = tr[lst].par = np; 
			for (; x && tr[x].trans[ch] == y; x = tr[x].par)
				tr[x].trans[ch] = np; 
		}
	}
}

inline void build_par()
{
	static int buc[MaxNT]; 
	for (int i = 1; i <= nT; ++i)
		++buc[tr[i].maxl]; 
	for (int i = 1; i <= nT; ++i)
		buc[i] += buc[i - 1]; 
	for (int i = 1; i <= nT; ++i)
		seq[buc[tr[i].maxl]--] = i; 
	for (int i = 2; i <= nT; ++i)
		son[tr[i].par].push_back(i); 
}

inline void merge(int &x, int y, int len)
{
	if (!x || !y)
	{
		x += y; 
		return; 
	}

	bit[x] &= non_ans; 
	bst cur = bit[x] & bit[y]; 
	for (int i = cur._Find_first(), lim = MaxN >> 1; i < lim; i = cur._Find_next(i))
	{
		ans[i] = len; 
		non_ans[i] = 0; 
	}
	bit[x] = (bit[x] | bit[y]) & non_ans; 
}

inline void calc()
{
	for (int i = nT; i >= 1; --i)
	{
		int u = seq[i]; 
		for (int v : son[u])
			merge(id[u], id[v], tr[u].maxl); 
	}
}

inline void solve(int m)
{
	non_ans.set(); 
	for (int i = 1; i <= n; ++i)
		bit[i].reset(); 
	for (int i = 1; i <= m; ++i)
	{
		int l, r; 
		read(l), read(r); 
		bit[l][i] = 1, bit[r + 1][i] = 1;  
	}
	for (int i = 2; i <= n; ++i)
		bit[i] ^= bit[i - 1]; 

	for (int i = 1; i <= nT; ++i)
		id[i] = 0; 
	for (int i = 1; i <= n; ++i)
		id[pos[i]] = i; 

	calc(); 
	for (int i = 1; i <= m; ++i)
		putint(ans[i]), putchar('
'); 
}

int main()
{
#ifdef orzczk
	freopen("a.in", "r", stdin); 
	freopen("a.out", "w", stdout); 
#endif

	scanf("%d%d", &n, &m); 
	scanf("%s", s + 1); 

	for (int i = 1; i <= n; ++i)
	{
		extend(s[i] - '0'); 
		pos[i] = lst; 
	}
	build_par(); 

	m1 = m >> 1, m2 = m - m1; 
	solve(m1), solve(m2); 

	return 0; 
}

算法三：SAM + LCT + BIT（妙解）

有没有更给力一点的？

考虑将询问离线，然后从小到大枚举右端点 (r)。考虑在 parent 树上把 (r) 对应的点到根的路径上全部覆盖上 (r) 的标记（也就是说每个点只保留编号最大的标记），在打标记之前，如果在路径上遇到的某个点 (x) 在之前有一个标记 (l_0)（(x) 是从下往上第一个拥有 (l_0) 标记的结点），那么说明 (l_0) 和 (r) 对应的点的 LCA 是 (x)，可以用 (maxl_x) 更新左端点不超过 (l_0) 的区间答案。于是拿一个 BIT 维护所有左端点的答案即可。

考虑如何优化到根路径上的暴力过程。这个过程可以考虑用 LCT 的 access 来实现。我们发现，当前同一条实路径上的标记相同，这启发我们维护出实路径的最大的 maxl，这样就可以实现更新答案的过程。access 完一起给整个实路径打个修改标记即可。

在所有 access 的过程中，跨过的虚边的总数是 (mathcal O(n log n)) 的，每次跨过虚边都要在 BIT 上修改，时间复杂度 (mathcal O(n log^2 n))

如果遇到强制在线的毒瘤出题人，把树状数组改成可持久化线段树即可。

##include <bits/stdc++.h>

template <class T>
inline void read(T &x)
{
	static char ch;
	while (!isdigit(ch = getchar())); 
	x = ch - '0'; 
	while (isdigit(ch = getchar()))
		x = x * 10 + ch - '0'; 
}

template <class T>
inline void putint(T x)
{
	static char buf[15], *tail = buf; 
	if (!x)
		putchar('0'); 
	else
	{
		for (; x; x /= 10) *++tail = x % 10 + '0'; 
		for (; tail != buf; --tail) putchar(*tail); 
	}
}

template <class T>
inline void relax(T &x, const T &y)
{
	if (x < y)
		x = y; 
}

const int MaxN = 1e5 + 5; 
const int MaxNT = MaxN << 1; 

namespace BIT
{
	int n; 
	int bit[MaxN]; 
	inline void modify(int x, int v)
	{
	//	std::cerr << x << ' ' << v << '
'; 
		for (; x; x ^= x & -x)
			relax(bit[x], v); 
	}

	inline int query(int x)
	{
		int res = 0; 
		for (; x <= n; x += x & -x)
			relax(res, bit[x]); 
		return res; 
	}
}

namespace LCT
{
	int lc[MaxNT], rc[MaxNT], fa[MaxNT]; 

	int val[MaxNT], len[MaxNT]; 
	int tag_val[MaxNT]; 

	inline bool is_root(int x)
	{
		return !fa[x] || (lc[fa[x]] != x && rc[fa[x]] != x); 
	}

	inline bool which(int x)
	{
		return rc[fa[x]] == x; 
	}

	inline void node_val(int x, int v)
	{
		tag_val[x] = val[x] = v; 
	}

	inline void dnt(int x)
	{
		if (tag_val[x])
		{
			if (lc[x])
				node_val(lc[x], tag_val[x]); 
			if (rc[x])
				node_val(rc[x], tag_val[x]); 
			tag_val[x] = 0; 
		}
	}

	inline void Rotate(int x)
	{
		int y = fa[x], z = fa[y]; 
		int b = lc[y] == x ? rc[x] : lc[x]; 

		if (!is_root(y))
			(lc[z] == y ? lc[z] : rc[z]) = x; 
		fa[x] = z, fa[y] = x; 
		
		if (b)
			fa[b] = y; 

		if (lc[y] == x)
			rc[x] = y, lc[y] = b; 
		else
			lc[x] = y, rc[y] = b; 
	}

	inline void Splay(int x)
	{
		static int que[MaxN], qr, y; 
		for (y = x; !is_root(y); y = fa[y])
			que[++qr] = y; 
		que[++qr] = y; 

		for (; qr >= 1; --qr)
			dnt(que[qr]); 

		while (!is_root(x))
		{
			if (!is_root(fa[x]))
				Rotate(which(x) == which(fa[x]) ? fa[x] : x); 
			Rotate(x); 
		}
	}

	inline void Access(int x, int cur_val)
	{
		int y = 0; 
		for (; x; rc[x] = y, y = x, x = fa[x])
		{
			Splay(x); 
			BIT::modify(val[x], len[x]); 
		}

		node_val(y, cur_val); 
	}
}

namespace SAM
{
	struct node
	{
		int par, maxl; 
		int trans[2]; 
	}tr[MaxNT]; 

	int nT = 1, lst = 1; 
	int cur_len; 

	inline void extend(int ch)
	{
		int x = lst; 
		tr[lst = ++nT].maxl = ++cur_len; 
		for (; x && !tr[x].trans[ch]; x = tr[x].par)
			tr[x].trans[ch] = lst; 
		if (!x)
			tr[lst].par = 1; 
		else
		{
			int y = tr[x].trans[ch]; 
			if (tr[x].maxl + 1 == tr[y].maxl)
				tr[lst].par = y; 
			else
			{
				int np = ++nT; 
				tr[np] = tr[y]; 
				tr[np].maxl = tr[x].maxl + 1; 

				tr[y].par = tr[lst].par = np; 
				for (; x && tr[x].trans[ch] == y; x = tr[x].par)
					tr[x].trans[ch] = np; 
			}
		}
	}

	inline void build_lct()
	{
		for (int i = 1; i <= nT; ++i)
		{
			LCT::fa[i] = tr[i].par; 
			LCT::len[i] = tr[i].maxl; 
		}
	}
}

int SAM_pos[MaxN]; 

int req_lef[MaxN], ans[MaxN]; 
std::vector<int> req[MaxN]; 

int n, m; 
char s[MaxN]; 

int main()
{
#ifdef orzczk
	freopen("a.in", "r", stdin); 
	freopen("a.out", "w", stdout); 
#endif

	scanf("%d%d", &n, &m); 
	scanf("%s", s + 1); 

	for (int i = 1; i <= n; ++i)
	{
		SAM::extend(s[i] - '0'); 
		SAM_pos[i] = SAM::lst; 
	}

	SAM::build_lct(); 
	BIT::n = n; 

	for (int i = 1; i <= m; ++i)
	{
		int r; 
		read(req_lef[i]), read(r); 
		req[r].push_back(i); 
	}

	for (int r = 1; r <= n; ++r)
	{
	//	std::cerr << "solve " << r << '
'; 
		LCT::Access(SAM_pos[r], r); 
		for (int i : req[r])
			ans[i] = BIT::query(req_lef[i]);
	}

	for (int i = 1; i <= m; ++i)
	{
		putint(ans[i]); 
		putchar('
'); 
	}

	return 0; 
}