1. New Distinct Substrings
题目链接:spoj - SUBST1
题意:给你一个串,求他的子串个数
思路:每个子串都是某个后缀的前缀,所以计算每个后缀的贡献即可,按照后缀suffix(sa[1]),suffix(sa[2])...的顺序计算,当加入suffix(sa[k])时,本应产生n-sa[k]+1个前缀,但是有height[k]个前缀在前面已经计算过了,所以suffix(sa[k])对答案的贡献就是n-sa[k]+1-height[k]
#include <iostream> #include <algorithm> #include <cstdio> #include <cstring> using namespace std; const int N = 50010; int wa[N], wb[N], wv[N], wss[N]; int n, T, rak[N], height[N], cal[N], sa[N]; char s[N]; bool cmp(int *r, int a, int b, int l) { return r[a] == r[b] && r[a + l] == r[b + l]; } void da(int *r, int *sa, int n, int m) { int i, j, p, *x = wa, *y = wb; for (i = 0; i < m; i++) wss[i] = 0; for (i = 0; i < n; i++) wss[x[i] = r[i]]++; for (i = 1; i < m; i++) wss[i] += wss[i - 1]; for (i = n - 1; i >= 0; i--) sa[--wss[x[i]]] = i; for (j = 1, p = 1; p < n; j *= 2, m = p) { for (p = 0, i = n - j; i < n; i++) y[p++] = i; for (i = 0; i < n; i++) if (sa[i] >= j) y[p++] = sa[i] - j; for (i = 0; i < n; i++) wv[i] = x[y[i]]; for (i = 0; i < m; i++) wss[i] = 0; for (i = 0; i < n; i++) wss[wv[i]]++; for (i = 1; i < m; i++) wss[i] += wss[i - 1]; for (i = n - 1; i >= 0; i--) sa[--wss[wv[i]]] = y[i]; for (swap(x, y), p = 1, x[sa[0]] = 0, i = 1; i < n; i++) { if (cmp(y, sa[i - 1], sa[i], j)) x[sa[i]] = p - 1; else x[sa[i]] = p++; } } } void calc(int *r, int *sa, int n) { int i, j, k = 0; for (i = 1; i <= n; i++) rak[sa[i]] = i; for (i = 0; i < n; height[rak[i++]] = k) for (k ? k-- : 0, j = sa[rak[i] - 1]; r[i + k] == r[j + k]; k++); for (i = n; i; i--) { rak[i] = rak[i - 1]; sa[i]++; } } int main() { scanf("%d", &T); while (T--) { scanf("%s", s + 1); int n = strlen(s + 1); for (int i = 1; i <= n; i++) cal[i] = s[i]; cal[n + 1] = 0; da(cal + 1, sa, n + 1, 150); calc(cal + 1, sa, n); int res = 0; for (int i = 1; i <= n; i++) res = res + n - sa[i] + 1 - height[i]; printf("%d ", res); } return 0; }
2. Substring
题目链接:hdu - 5769
题意:给你一个串和一个字符x,求包含字符x的子串个数
思路:和上题思路一样,计算每个后缀的贡献,不同的是子串中需要包含字符x,所以我们可以先预处理出每个位置后面离它最近的一个字符x的位置pos[],当加入suffix(sa[k])时,我们只能从pos[sa[k]]开始取前缀,所以suffix(sa[k])对答案的贡献就是n-sa[k]+1-max(height[k],pos[sa[k]]-sa[k])
#include <iostream> #include <algorithm> #include <cstdio> #include <cstring> using namespace std; typedef long long ll; const int N = 700010; int wa[N], wb[N], wv[N], wss[N]; int rak[N], height[N], cal[N], sa[N]; int T, icas, pos[N]; char s[N], ch[N]; bool cmp(int *r, int a, int b, int l) { return r[a] == r[b] && r[a + l] == r[b + l]; } void da(int *r, int *sa, int n, int m) { int i, j, p, *x = wa, *y = wb; for (i = 0; i < m; i++) wss[i] = 0; for (i = 0; i < n; i++) wss[x[i] = r[i]]++; for (i = 1; i < m; i++) wss[i] += wss[i - 1]; for (i = n - 1; i >= 0; i--) sa[--wss[x[i]]] = i; for (j = 1, p = 1; p < n; j *= 2, m = p) { for (p = 0, i = n - j; i < n; i++) y[p++] = i; for (i = 0; i < n; i++) if (sa[i] >= j) y[p++] = sa[i] - j; for (i = 0; i < n; i++) wv[i] = x[y[i]]; for (i = 0; i < m; i++) wss[i] = 0; for (i = 0; i < n; i++) wss[wv[i]]++; for (i = 1; i < m; i++) wss[i] += wss[i - 1]; for (i = n - 1; i >= 0; i--) sa[--wss[wv[i]]] = y[i]; for (swap(x, y), p = 1, x[sa[0]] = 0, i = 1; i < n; i++) { if (cmp(y, sa[i - 1], sa[i], j)) x[sa[i]] = p - 1; else x[sa[i]] = p++; } } } void calc(int *r, int *sa, int n) { int i, j, k = 0; for (i = 1; i <= n; i++) rak[sa[i]] = i; for (i = 0; i < n; height[rak[i++]] = k) for (k ? k-- : 0, j = sa[rak[i] - 1]; r[i + k] == r[j + k]; k++); for (i = n; i; i--) { rak[i] = rak[i - 1]; sa[i]++; } } int main() { scanf("%d", &T); while (T--) { scanf("%s%s", ch + 1, s + 1); int n = strlen(s + 1); for (int i = 1; i <= n; i++) cal[i] = s[i]; cal[n + 1] = 0; da(cal + 1, sa, n + 1, 200); calc(cal + 1, sa, n); int p = n + 1; for (int i = n; i >= 1; i--) { if (s[i] == ch[1]) p = i; pos[i] = p; } ll res = 0; for (int i = 1; i <= n; i++) { res = res + n - sa[i] + 1; res -= max(height[i], pos[sa[i]] - sa[i]); } printf("Case #%d: %lld ", ++icas, res); } return 0; }
3. Musical Theme
题目链接:poj - 1743
题意:给你一个串,找出最长、不可重叠、重复的子串
思路:二分答案,将问题转换为判定型问题,判断是否存在两个子串长度为k且不重叠,将排序后的后缀分为若干组,每组内所有元素的height大于等于k,那么每组内任意两个后缀的最长公共前缀长度都大于等于k,此时如果有一组内sa的最大值与最小值之差大于等于k,那么就一定存在两个子串长度为k且不重叠
#include <iostream> #include <algorithm> #include <cstdio> #include <cstring> using namespace std; const int N = 20010; const int INF = 0x3f3f3f3f; int wa[N], wb[N], wv[N], wss[N], a[N]; int n, rak[N], height[N], cal[N], sa[N]; bool cmp(int *r, int a, int b, int l) { return r[a] == r[b] && r[a + l] == r[b + l]; } void da(int *r, int *sa, int n, int m) { int i, j, p, *x = wa, *y = wb; for (i = 0; i < m; i++) wss[i] = 0; for (i = 0; i < n; i++) wss[x[i] = r[i]]++; for (i = 1; i < m; i++) wss[i] += wss[i - 1]; for (i = n - 1; i >= 0; i--) sa[--wss[x[i]]] = i; for (j = 1, p = 1; p < n; j *= 2, m = p) { for (p = 0, i = n - j; i < n; i++) y[p++] = i; for (i = 0; i < n; i++) if (sa[i] >= j) y[p++] = sa[i] - j; for (i = 0; i < n; i++) wv[i] = x[y[i]]; for (i = 0; i < m; i++) wss[i] = 0; for (i = 0; i < n; i++) wss[wv[i]]++; for (i = 1; i < m; i++) wss[i] += wss[i - 1]; for (i = n - 1; i >= 0; i--) sa[--wss[wv[i]]] = y[i]; for (swap(x, y), p = 1, x[sa[0]] = 0, i = 1; i < n; i++) { if (cmp(y, sa[i - 1], sa[i], j)) x[sa[i]] = p - 1; else x[sa[i]] = p++; } } } void calc(int *r, int *sa, int n) { int i, j, k = 0; for (i = 1; i <= n; i++) rak[sa[i]] = i; for (i = 0; i < n; height[rak[i++]] = k) for (k ? k-- : 0, j = sa[rak[i] - 1]; r[i + k] == r[j + k]; k++); for (i = n; i; i--) { rak[i] = rak[i - 1]; sa[i]++; } } bool check(int mid) { int imin = sa[1], imax = sa[1]; for (int i = 2; i <= n; i++) { if (height[i] >= mid) { imin = min(imin, sa[i]); imax = max(imax, sa[i]); } else { if (imax - imin > mid) return true; imin = imax = sa[i]; } } return imax - imin > mid; } int main() { while (scanf("%d", &n) && 0 != n) { for (int i = 1; i <= n; i++) scanf("%d", &a[i]); for (int i = 1; i <= n; i++) { cal[i] = a[i] - a[i - 1]; cal[i] += 90; } cal[n + 1] = 0; da(cal + 1, sa, n + 1, 200); calc(cal + 1, sa, n); int l = 0, r = n; while (l < r) { int mid = (l + r) / 2; if (check(mid)) l = mid + 1; else r = mid; } printf("%d ", l >= 5 ? l : 0); } return 0; }
4. Milk Patterns
题目链接:poj - 3261
题意:给你一个数组和一个数k,求出最长、可以重叠、重复k次的子串
思路:二分答案,将问题转换为判定型问题,判断是否存在k个子串长度为L、可以重叠的子串,将排序后的后缀分为若干组,每组内所有元素的height大于等于L,此时如果存在一组组内后缀的数目大于等于k,那么就一定存在k个子串长度为L、可以重叠的子串
#include <iostream> #include <algorithm> #include <cstdio> #include <cstring> using namespace std; const int N = 1000010; int wa[N], wb[N], wv[N], wss[N], a[N]; int n, k, rak[N], height[N], cal[N], sa[N]; bool cmp(int *r, int a, int b, int l) { return r[a] == r[b] && r[a + l] == r[b + l]; } void da(int *r, int *sa, int n, int m) { int i, j, p, *x = wa, *y = wb; for (i = 0; i < m; i++) wss[i] = 0; for (i = 0; i < n; i++) wss[x[i] = r[i]]++; for (i = 1; i < m; i++) wss[i] += wss[i - 1]; for (i = n - 1; i >= 0; i--) sa[--wss[x[i]]] = i; for (j = 1, p = 1; p < n; j *= 2, m = p) { for (p = 0, i = n - j; i < n; i++) y[p++] = i; for (i = 0; i < n; i++) if (sa[i] >= j) y[p++] = sa[i] - j; for (i = 0; i < n; i++) wv[i] = x[y[i]]; for (i = 0; i < m; i++) wss[i] = 0; for (i = 0; i < n; i++) wss[wv[i]]++; for (i = 1; i < m; i++) wss[i] += wss[i - 1]; for (i = n - 1; i >= 0; i--) sa[--wss[wv[i]]] = y[i]; for (swap(x, y), p = 1, x[sa[0]] = 0, i = 1; i < n; i++) { if (cmp(y, sa[i - 1], sa[i], j)) x[sa[i]] = p - 1; else x[sa[i]] = p++; } } } void calc(int *r, int *sa, int n) { int i, j, k = 0; for (i = 1; i <= n; i++) rak[sa[i]] = i; for (i = 0; i < n; height[rak[i++]] = k) for (k ? k-- : 0, j = sa[rak[i] - 1]; r[i + k] == r[j + k]; k++); for (i = n; i; i--) { rak[i] = rak[i - 1]; sa[i]++; } } bool check(int mid) { int cnt = 1; for (int i = 2; i <= n; i++) { if (height[i] >= mid) cnt++; else cnt = 1; if (cnt >= k) return true; } return false; } int main() { scanf("%d%d", &n, &k); for (int i = 1; i <= n; i++) { scanf("%d", &cal[i]); cal[i]++; } cal[n + 1] = 0; da(cal + 1, sa, n + 1, N); calc(cal + 1, sa, n); int l = 1, r = n; while (l < r) { int mid = (l + r + 1) / 2; if (check(mid)) l = mid; else r = mid - 1; } printf("%d ", l); return 0; }
5. Life Forms
题目链接:poj - 3294
题意:给你n个字符串,求在大于n/2个字符串中含有的最长公共子串
思路:将n个字符串拼接起来,中间用一个从没出现过且不相同的字符隔开,求出后缀数组后二分答案,将排序后的后缀分为若干组,判断每一组内的后缀是否出现在大于n/2个不同的字符串中(标记某个字符串是否出现过,统计不同字符串的数量)
#include <iostream> #include <algorithm> #include <cstdio> #include <cstring> using namespace std; typedef long long ll; const int N = 101500; const int M = 110; int wa[N], wb[N], wv[N], wss[N]; int rak[N], height[N], cal[N], sa[N]; int n, len, belong[N]; bool f[M]; bool cmp(int *r, int a, int b, int l) { return r[a] == r[b] && r[a + l] == r[b + l]; } void da(int *r, int *sa, int n, int m) { int i, j, p, *x = wa, *y = wb; for (i = 0; i < m; i++) wss[i] = 0; for (i = 0; i < n; i++) wss[x[i] = r[i]]++; for (i = 1; i < m; i++) wss[i] += wss[i - 1]; for (i = n - 1; i >= 0; i--) sa[--wss[x[i]]] = i; for (j = 1, p = 1; p < n; j *= 2, m = p) { for (p = 0, i = n - j; i < n; i++) y[p++] = i; for (i = 0; i < n; i++) if (sa[i] >= j) y[p++] = sa[i] - j; for (i = 0; i < n; i++) wv[i] = x[y[i]]; for (i = 0; i < m; i++) wss[i] = 0; for (i = 0; i < n; i++) wss[wv[i]]++; for (i = 1; i < m; i++) wss[i] += wss[i - 1]; for (i = n - 1; i >= 0; i--) sa[--wss[wv[i]]] = y[i]; for (swap(x, y), p = 1, x[sa[0]] = 0, i = 1; i < n; i++) { if (cmp(y, sa[i - 1], sa[i], j)) x[sa[i]] = p - 1; else x[sa[i]] = p++; } } } void calc(int *r, int *sa, int n) { int i, j, k = 0; for (i = 1; i <= n; i++) rak[sa[i]] = i; for (i = 0; i < n; height[rak[i++]] = k) for (k ? k-- : 0, j = sa[rak[i] - 1]; r[i + k] == r[j + k]; k++); for (i = n; i; i--) { rak[i] = rak[i - 1]; sa[i]++; } } bool check(int k) { memset(f, false, sizeof(f)); int cnt = 0; for (int i = 1; i <= len; i++) { if (height[i] >= k) { if (0 != belong[sa[i]] && false == f[belong[sa[i]]]) { cnt++; f[belong[sa[i]]] = true; } } else { memset(f, false, sizeof(f)); if (0 != belong[sa[i]]) { cnt = 1; f[belong[sa[i]]] = true; } else cnt = 0; } if (cnt > n / 2) return true; } return false; } int main() { while (scanf("%d", &n) && 0 != n) { int b = 125; len = 0; for (int i = 1; i <= n; i++) { char s[10 * M]; scanf("%s", s + 1); for (int k = 1; k <= strlen(s + 1); k++) { cal[++len] = s[k]; belong[len] = i; } if (i != n) { cal[++len] = b++; belong[len] = 0; } } cal[len + 1] = 0; belong[len + 1] = 0; da(cal + 1, sa, len + 1, 250); calc(cal + 1, sa, len); int l = 0, r = len; while (l < r) { int mid = (l + r + 1) / 2; if (check(mid)) l = mid; else r = mid - 1; } if (0 == l) { printf("? "); continue; } memset(f, false, sizeof(f)); int cnt = 0; for (int i = 1; i <= len; i++) { if (height[i] >= l) { if (0 != belong[sa[i]] && false == f[belong[sa[i]]]) { cnt++; f[belong[sa[i]]] = true; } } else { if (cnt > n / 2) { for (int j = 0; j < l; j++) printf("%c", char(cal[sa[i - 1] + j])); printf(" "); } memset(f, false, sizeof(f)); if (0 != belong[sa[i]]) { cnt = 1; f[belong[sa[i]]] = true; } else cnt = 0; } } printf(" "); } return 0; }
6. Relevant Phrases of Annihilation
题目链接:spoj - PHRASES
题意:给你n个字符串,求在每个字符串中不重叠的至少出现2次的最长子串
思路:将n个字符串拼接起来,中间用一个从没出现过且不相同的字符隔开,求后缀数组后二分答案,将排序后的后缀分为若干组,判断是否有一组后缀在每个原来的字符串中至少出现两次,并且在每个原来的字符串中,后缀的起始位置的最大值与最小值之差是否不小于当前二分的长度
#include <iostream> #include <algorithm> #include <cstdio> #include <cstring> using namespace std; const int N = 100500; const int M = 10010; const int INF = 0x3f3f3f3f; int wa[N], wb[N], wv[N], wss[N]; int rak[N], height[N], cal[N], sa[N]; int n, T, len, belong[N]; int f[12], imin[12], imax[12]; bool cmp(int *r, int a, int b, int l) { return r[a] == r[b] && r[a + l] == r[b + l]; } void da(int *r, int *sa, int n, int m) { int i, j, p, *x = wa, *y = wb; for (i = 0; i < m; i++) wss[i] = 0; for (i = 0; i < n; i++) wss[x[i] = r[i]]++; for (i = 1; i < m; i++) wss[i] += wss[i - 1]; for (i = n - 1; i >= 0; i--) sa[--wss[x[i]]] = i; for (j = 1, p = 1; p < n; j *= 2, m = p) { for (p = 0, i = n - j; i < n; i++) y[p++] = i; for (i = 0; i < n; i++) if (sa[i] >= j) y[p++] = sa[i] - j; for (i = 0; i < n; i++) wv[i] = x[y[i]]; for (i = 0; i < m; i++) wss[i] = 0; for (i = 0; i < n; i++) wss[wv[i]]++; for (i = 1; i < m; i++) wss[i] += wss[i - 1]; for (i = n - 1; i >= 0; i--) sa[--wss[wv[i]]] = y[i]; for (swap(x, y), p = 1, x[sa[0]] = 0, i = 1; i < n; i++) { if (cmp(y, sa[i - 1], sa[i], j)) x[sa[i]] = p - 1; else x[sa[i]] = p++; } } } void calc(int *r, int *sa, int n) { int i, j, k = 0; for (i = 1; i <= n; i++) rak[sa[i]] = i; for (i = 0; i < n; height[rak[i++]] = k) for (k ? k-- : 0, j = sa[rak[i] - 1]; r[i + k] == r[j + k]; k++); for (i = n; i; i--) { rak[i] = rak[i - 1]; sa[i]++; } } bool check(int k) { memset(f, 0, sizeof(f)); memset(imin, INF, sizeof(imin)); memset(imax, 0, sizeof(imax)); int cnt = 0; for (int i = 1; i <= len; i++) { if (height[i] >= k) { if (0 != belong[sa[i]] && 0 == f[belong[sa[i]]]) cnt++; f[belong[sa[i]]]++; imin[belong[sa[i]]] = min(imin[belong[sa[i]]], sa[i]); imax[belong[sa[i]]] = max(imax[belong[sa[i]]], sa[i]); } else { memset(f, 0, sizeof(f)); memset(imin, INF, sizeof(imin)); memset(imax, 0, sizeof(imax)); if (0 != belong[sa[i]]) { cnt = 1; f[belong[sa[i]]]++; imin[belong[sa[i]]] = min(imin[belong[sa[i]]], sa[i]); imax[belong[sa[i]]] = max(imax[belong[sa[i]]], sa[i]); } else cnt = 0; } if (cnt == n) { bool flag = true; for (int i = 1; i <= n; i++) { if (f[belong[sa[i]]] < 2) flag = false; if (imax[i] - imin[i] < k) flag = false; } if (flag) return true; } } return false; } int main() { scanf("%d", &T); while (T--) { scanf("%d", &n); len = 0; int b = 125; for (int i = 1; i <= n; i++) { char s[M]; scanf("%s", s + 1); for (int k = 1; k <= strlen(s + 1); k++) { cal[++len] = s[k]; belong[len] = i; } if (i != n) cal[++len] = ++b; } cal[len + 1] = 0; da(cal + 1, sa, len + 1, 150); calc(cal + 1, sa, len); int l = 0, r = len; while (l < r) { int mid = (l + r + 1) / 2; if (check(mid)) l = mid; else r = mid - 1; } printf("%d ", l); } return 0; }
7. Long Long Message
题目链接:poj - 2774
题意:给你两个串,求两个串的最长公共子串
思路:将第二个串写在第一个串的后面,中间用一个没有出现过的字符隔开,求这个新字符串的后缀数组,当sa[i-1]和sa[i]不属于同一个字符串时,height[i]的最大值就是答案
#include <iostream> #include <algorithm> #include <cstdio> #include <cstring> using namespace std; const int N = 200010; int wa[N], wb[N], wv[N], wss[N]; int n, T, rak[N], height[N], cal[N], sa[N]; char a[N], b[N]; bool cmp(int *r, int a, int b, int l) { return r[a] == r[b] && r[a + l] == r[b + l]; } void da(int *r, int *sa, int n, int m) { int i, j, p, *x = wa, *y = wb; for (i = 0; i < m; i++) wss[i] = 0; for (i = 0; i < n; i++) wss[x[i] = r[i]]++; for (i = 1; i < m; i++) wss[i] += wss[i - 1]; for (i = n - 1; i >= 0; i--) sa[--wss[x[i]]] = i; for (j = 1, p = 1; p < n; j *= 2, m = p) { for (p = 0, i = n - j; i < n; i++) y[p++] = i; for (i = 0; i < n; i++) if (sa[i] >= j) y[p++] = sa[i] - j; for (i = 0; i < n; i++) wv[i] = x[y[i]]; for (i = 0; i < m; i++) wss[i] = 0; for (i = 0; i < n; i++) wss[wv[i]]++; for (i = 1; i < m; i++) wss[i] += wss[i - 1]; for (i = n - 1; i >= 0; i--) sa[--wss[wv[i]]] = y[i]; for (swap(x, y), p = 1, x[sa[0]] = 0, i = 1; i < n; i++) { if (cmp(y, sa[i - 1], sa[i], j)) x[sa[i]] = p - 1; else x[sa[i]] = p++; } } } void calc(int *r, int *sa, int n) { int i, j, k = 0; for (i = 1; i <= n; i++) rak[sa[i]] = i; for (i = 0; i < n; height[rak[i++]] = k) for (k ? k-- : 0, j = sa[rak[i] - 1]; r[i + k] == r[j + k]; k++); for (i = n; i; i--) { rak[i] = rak[i - 1]; sa[i]++; } } int main() { scanf("%s%s", a + 1, b + 1); int la = strlen(a + 1), lb = strlen(b + 1); for (int i = 1; i <= la; i++) cal[i] = a[i]; cal[la + 1] = 0; for (int i = 1; i <= lb; i++) cal[i + la + 1] = b[i]; int n = la + lb + 1; cal[n + 1] = 0; da(cal + 1, sa, n + 1, 150); calc(cal + 1, sa, n); int res = 0; for (int i = 2; i <= n; i++) { if (sa[i - 1] < la + 1 && sa[i] > la + 1) res = max(res, height[i]); else if (sa[i - 1] > la + 1 && sa[i] < la + 1) res = max(res, height[i]); } printf("%d ", res); return 0; }
8. Substrings
题目链接:poj - 1226
题意:给你n个串,求顺序出现或者逆序出现在每个字符串的最长子串
思路:将每个字符串反过来写一遍,再将这2*n个字符串拼接起来,中间用一个从没出现过且不相同的字符隔开,求出后缀数组后二分答案,判断是否有一组后缀在每个原来的字符串或反转后
的字符串中出现,注意特判n=1的情况
#include <iostream> #include <algorithm> #include <cstdio> #include <cstring> using namespace std; const int N = 20500; const int M = 110; const int INF = 0x3f3f3f3f; int wa[N], wb[N], wv[N], wss[N]; int rak[N], height[N], cal[N], sa[N]; int n, T, len, belong[N]; bool f[M]; bool cmp(int *r, int a, int b, int l) { return r[a] == r[b] && r[a + l] == r[b + l]; } void da(int *r, int *sa, int n, int m) { int i, j, p, *x = wa, *y = wb; for (i = 0; i < m; i++) wss[i] = 0; for (i = 0; i < n; i++) wss[x[i] = r[i]]++; for (i = 1; i < m; i++) wss[i] += wss[i - 1]; for (i = n - 1; i >= 0; i--) sa[--wss[x[i]]] = i; for (j = 1, p = 1; p < n; j *= 2, m = p) { for (p = 0, i = n - j; i < n; i++) y[p++] = i; for (i = 0; i < n; i++) if (sa[i] >= j) y[p++] = sa[i] - j; for (i = 0; i < n; i++) wv[i] = x[y[i]]; for (i = 0; i < m; i++) wss[i] = 0; for (i = 0; i < n; i++) wss[wv[i]]++; for (i = 1; i < m; i++) wss[i] += wss[i - 1]; for (i = n - 1; i >= 0; i--) sa[--wss[wv[i]]] = y[i]; for (swap(x, y), p = 1, x[sa[0]] = 0, i = 1; i < n; i++) { if (cmp(y, sa[i - 1], sa[i], j)) x[sa[i]] = p - 1; else x[sa[i]] = p++; } } } void calc(int *r, int *sa, int n) { int i, j, k = 0; for (i = 1; i <= n; i++) rak[sa[i]] = i; for (i = 0; i < n; height[rak[i++]] = k) for (k ? k-- : 0, j = sa[rak[i] - 1]; r[i + k] == r[j + k]; k++); for (i = n; i; i--) { rak[i] = rak[i - 1]; sa[i]++; } } bool check(int k) { memset(f, false, sizeof(f)); int cnt = 0; for (int i = 1; i <= len; i++) { if (height[i] >= k) { if (0 != belong[sa[i]] && false == f[belong[sa[i]]]) { cnt++; f[belong[sa[i]]] = true; } } else { memset(f, false, sizeof(f)); if (0 != belong[sa[i]]) { cnt = 1; f[belong[sa[i]]] = true; } else cnt = 0; } if (cnt == n) return true; } return false; } int main() { scanf("%d", &T); while (T--) { scanf("%d", &n); if (1 == n) { char s[M]; scanf("%s", s + 1); printf("%d ", strlen(s + 1)); continue; } len = 0; int b = 125; for (int i = 1; i <= n; i++) { char s[M]; scanf("%s", s + 1); for (int k = 1; k <= strlen(s + 1); k++) { cal[++len] = s[k] + 1; belong[len] = i; } cal[++len] = ++b; for (int k = strlen(s + 1); k >= 1; k--) { cal[++len] = s[k] + 1; belong[len] = i; } if (i != n) cal[++len] = ++b; } cal[len + 1] = 0; da(cal + 1, sa, len + 1, 350); calc(cal + 1, sa, len); int l = 0, r = len; while (l < r) { int mid = (l + r + 1) / 2; if (check(mid)) l = mid; else r = mid - 1; } printf("%d ", l); } return 0; }
9. Mr. Panda and Fantastic Beasts
题目链接:Gym - 101194F
题意:给你n个字符串,找到第一个字符串字典序最小的最短子串,使得这个子串不是其他字符串的子串
思路:将n个字符串拼接起来,中间用一个从没出现过且不相同的字符隔开,求出后缀数组后,对所有来自第一个字符串的后缀进行判断,找到它左边第一个不是来自第一个串的后缀,求lcp为x,找到它右边第一个不是来自第一个串的后缀,求lcp为y,那么显然该后缀至少取前max(x,y)+1个字符做为子串,对每个后缀都进行一次判断,取最小值即可,注意需要判断max(x,y)+1是否超过了第一个字符串后缀的长度
#include <iostream> #include <algorithm> #include <cstring> #include <cstdio> #include <cmath> using namespace std; const int N = 300010; const int INF = 0x3f3f3f3f; int wa[N], wb[N], wv[N], wss[N]; int rak[N], height[N], cal[N], sa[N]; int rmq[N], imin[N][30], len1, icas; int n, len, T, belong[N], lef[N], rig[N]; char s[N]; bool cmp(int *r, int a, int b, int l) { return r[a] == r[b] && r[a + l] == r[b + l]; } void da(int *r, int *sa, int n, int m) { int i, j, p, *x = wa, *y = wb; for (i = 0; i < m; i++) wss[i] = 0; for (i = 0; i < n; i++) wss[x[i] = r[i]]++; for (i = 1; i < m; i++) wss[i] += wss[i - 1]; for (i = n - 1; i >= 0; i--) sa[--wss[x[i]]] = i; for (j = 1, p = 1; p < n; j *= 2, m = p) { for (p = 0, i = n - j; i < n; i++) y[p++] = i; for (i = 0; i < n; i++) if (sa[i] >= j) y[p++] = sa[i] - j; for (i = 0; i < n; i++) wv[i] = x[y[i]]; for (i = 0; i < m; i++) wss[i] = 0; for (i = 0; i < n; i++) wss[wv[i]]++; for (i = 1; i < m; i++) wss[i] += wss[i - 1]; for (i = n - 1; i >= 0; i--) sa[--wss[wv[i]]] = y[i]; for (swap(x, y), p = 1, x[sa[0]] = 0, i = 1; i < n; i++) { if (cmp(y, sa[i - 1], sa[i], j)) x[sa[i]] = p - 1; else x[sa[i]] = p++; } } } void calc(int *r, int *sa, int n) { int i, j, k = 0; for (i = 1; i <= n; i++) rak[sa[i]] = i; for (i = 0; i < n; height[rak[i++]] = k) for (k ? k-- : 0, j = sa[rak[i] - 1]; r[i + k] == r[j + k]; k++); for (i = n; i; i--) { rak[i] = rak[i - 1]; sa[i]++; } } void init(int n) { memset(imin, 0, sizeof(imin)); for (int i = 1; i <= n; i++) imin[i][0] = height[i]; int l = int(log((double)n) / log(2.0)); for (int j = 1; j <= l; j++) { for (int i = 1; i + (1 << (j - 1)) <= n; i++) { imin[i][j] = min(imin[i][j - 1], imin[i + (1 << (j - 1))][j - 1]); } } } int ask(int l, int r) { int k = int(log(double(r - l + 1)) / log(2.0)); return min(imin[l][k], imin[r - (1 << k) + 1][k]); } int lcp(int a, int b) { int ra = rak[a], rb = rak[b]; if (ra > rb) swap(ra, rb); return ask(ra + 1, rb); } int main() { scanf("%d", &T); while (T--) { scanf("%d", &n); int b = 125; len = 0; for (int i = 1; i <= n; i++) { scanf("%s", s + 1); if (1 == i) len1 = strlen(s + 1); int tl = strlen(s + 1); for (int k = 1; k <= tl; k++) { cal[++len] = s[k]; belong[len] = i; } if (i != n) { cal[++len] = ++b; belong[len] = 0; } } cal[len + 1] = 0; da(cal + 1, sa, len + 1, 50200); calc(cal + 1, sa, len); int lst = -1; for (int i = 1; i <= len; i++) { if (0 == belong[sa[i]]) continue; if (1 == belong[sa[i]]) lef[i] = lst; else lst = i; } lst = -1; for (int i = len; i >= 1; i--) { if (0 == belong[sa[i]]) continue; if (1 == belong[sa[i]]) rig[i] = lst; else lst = i; } init(len); int rc = INF, rp = 0; for (int i = 1; i <= len; i++) { if (1 != belong[sa[i]]) continue; int L = lef[i], R = rig[i], tc = 0; if (-1 != L) tc = max(tc, lcp(sa[i], sa[L])); if (-1 != R) tc = max(tc, lcp(sa[i], sa[R])); if (len1 - sa[i] + 1 > tc) { if (tc + 1 < rc) { rc = tc + 1; rp = sa[i]; } } } printf("Case #%d: ", ++icas); if (INF == rc) { printf("Impossible "); continue; } for (int i = 0; i < rc; i++) printf("%c", char(cal[rp + i])); printf(" "); } return 0; }
10. Good Article Good sentence
题目链接:hdu - 4416
题意:给出一个串A和若干串B,问A中有多少子串未在任一B串中出现过
思路:将所有的串拼接起来,中间用一个从没出现过且不相同的字符隔开,求出后缀数组,设A串的长度为lenA,对于A串中每个后缀,lenA-sa[i]+1-height[i]就是没有限制条件下第i个后缀的贡献,设j为i后面第一个来自B串的后缀,那么加上限制条件后,每个后缀的贡献就是lenA-sa[i]+1-max(height[i],lcp(i,j)),lcp(i,j)可以通过从后向前扫一遍求出来
#include <iostream> #include <algorithm> #include <cstdio> #include <cstring> #include <cmath> using namespace std; const int N = 300500; const int INF = 0x3f3f3f3f; typedef long long ll; int wa[N], wb[N], wv[N], wss[N]; int rak[N], height[N], cal[N], sa[N]; int T, n, len, len1, icas, lcp[N]; char s[N]; bool cmp(int *r, int a, int b, int l) { return r[a] == r[b] && r[a + l] == r[b + l]; } void da(int *r, int *sa, int n, int m) { int i, j, p, *x = wa, *y = wb; for (i = 0; i < m; i++) wss[i] = 0; for (i = 0; i < n; i++) wss[x[i] = r[i]]++; for (i = 1; i < m; i++) wss[i] += wss[i - 1]; for (i = n - 1; i >= 0; i--) sa[--wss[x[i]]] = i; for (j = 1, p = 1; p < n; j *= 2, m = p) { for (p = 0, i = n - j; i < n; i++) y[p++] = i; for (i = 0; i < n; i++) if (sa[i] >= j) y[p++] = sa[i] - j; for (i = 0; i < n; i++) wv[i] = x[y[i]]; for (i = 0; i < m; i++) wss[i] = 0; for (i = 0; i < n; i++) wss[wv[i]]++; for (i = 1; i < m; i++) wss[i] += wss[i - 1]; for (i = n - 1; i >= 0; i--) sa[--wss[wv[i]]] = y[i]; for (swap(x, y), p = 1, x[sa[0]] = 0, i = 1; i < n; i++) { if (cmp(y, sa[i - 1], sa[i], j)) x[sa[i]] = p - 1; else x[sa[i]] = p++; } } } void calc(int *r, int *sa, int n) { int i, j, k = 0; for (i = 1; i <= n; i++) rak[sa[i]] = i; for (i = 0; i < n; height[rak[i++]] = k) for (k ? k-- : 0, j = sa[rak[i] - 1]; r[i + k] == r[j + k]; k++); for (i = n; i; i--) { rak[i] = rak[i - 1]; sa[i]++; } } int main() { scanf("%d", &T); while (T--) { scanf("%d", &n); len = 0; int b = 125; for (int i = 1; i <= n + 1; i++) { scanf("%s", s + 1); if (1 == i) len1 = strlen(s + 1); int len2 = strlen(s + 1); for (int k = 1; k <= len2; k++) cal[++len] = s[k]; if (i != n + 1) cal[++len] = ++b; } cal[len + 1] = 0; da(cal + 1, sa, len + 1, 100500); calc(cal + 1, sa, len); int rl = 0; for (int i = len; i >= 1; i--) { if (sa[i] > len1) rl = height[i]; else { lcp[i] = rl; rl = min(rl, height[i]); } } ll res = 0; for (int i = 1; i <= len; i++) { if (sa[i] <= len1) { ll t = ll(len1 - sa[i] + 1 - max(lcp[i], height[i])); res += t; } } printf("Case %d: %lld ", ++icas, res); } return 0; }
11. string string string
题目链接:hdu - 6194
题意:给出一个字符串,求恰好出现k次的子串个数
思路:求出后缀数组后,枚举长度为k的段,一段一段的枚举,用rmq求出每一段的最长公共前缀长度lcp,即这段有lcp个子串是满足出现k次的,但又有可能子串过短而不止出现k次,所以要减去出现次数大于k次的子串个数,所以每段对答案的贡献就是lcp-max(height[i],height[i+k]),特判k=1的情况
#include <iostream> #include <algorithm> #include <cstring> #include <cstdio> #include <cmath> using namespace std; typedef long long ll; const int N = 100010; const int M = 30; const int INF = 0x3f3f3f3f; int wa[N], wb[N], wv[N], wss[N]; int rak[N], height[N], cal[N], sa[N]; int T, len, rmq[N], imin[N][M]; ll k; char s[N]; bool cmp(int *r, int a, int b, int l) { return r[a] == r[b] && r[a + l] == r[b + l]; } void da(int *r, int *sa, int n, int m) { int i, j, p, *x = wa, *y = wb; for (i = 0; i < m; i++) wss[i] = 0; for (i = 0; i < n; i++) wss[x[i] = r[i]]++; for (i = 1; i < m; i++) wss[i] += wss[i - 1]; for (i = n - 1; i >= 0; i--) sa[--wss[x[i]]] = i; for (j = 1, p = 1; p < n; j *= 2, m = p) { for (p = 0, i = n - j; i < n; i++) y[p++] = i; for (i = 0; i < n; i++) if (sa[i] >= j) y[p++] = sa[i] - j; for (i = 0; i < n; i++) wv[i] = x[y[i]]; for (i = 0; i < m; i++) wss[i] = 0; for (i = 0; i < n; i++) wss[wv[i]]++; for (i = 1; i < m; i++) wss[i] += wss[i - 1]; for (i = n - 1; i >= 0; i--) sa[--wss[wv[i]]] = y[i]; for (swap(x, y), p = 1, x[sa[0]] = 0, i = 1; i < n; i++) { if (cmp(y, sa[i - 1], sa[i], j)) x[sa[i]] = p - 1; else x[sa[i]] = p++; } } } void calc(int *r, int *sa, int n) { int i, j, k = 0; for (i = 1; i <= n; i++) rak[sa[i]] = i; for (i = 0; i < n; height[rak[i++]] = k) for (k ? k-- : 0, j = sa[rak[i] - 1]; r[i + k] == r[j + k]; k++); for (i = n; i; i--) { rak[i] = rak[i - 1]; sa[i]++; } } void init(int n) { for (int i = 1; i <= n; i++) imin[i][0] = height[i]; int l = int(log((double)n) / log(2.0)); for (int j = 1; j <= l; j++) { for (int i = 1; i + (1 << (j - 1)) <= n; i++) { imin[i][j] = min(imin[i][j - 1], imin[i + (1 << (j - 1))][j - 1]); } } } int ask(int l, int r) { int k = int(log(double(r - l + 1)) / log(2.0)); return min(imin[l][k], imin[r - (1 << k) + 1][k]); } int lcp(int a, int b) { int ra = rak[a], rb = rak[b]; if (ra > rb) swap(ra, rb); return ask(ra + 1, rb); } int main() { scanf("%d", &T); while (T--) { scanf("%lld%s", &k, s + 1); int ls = strlen(s + 1); len = 0; for (int i = 1; i <= ls; i++) cal[++len] = s[i]; cal[len + 1] = 0; da(cal + 1, sa, len + 1, 150); calc(cal + 1, sa, len); init(len); ll res = 0; if (1 == k) { for (int i = 1; i <= len; i++) res += max(0, len - sa[i] + 1 - max(height[i], i + 1 > len ? 0 : height[i + 1])); printf("%lld ", res); continue; } for (int i = 1; i + k - 1 <= len; i++) { int l = i, r = i + k - 1; int t = lcp(sa[l], sa[r]); res += max(0, t - max(height[l], r + 1 > len ? 0 : height[r + 1])); } printf("%lld ", res); for (int i = 1; i <= ls; i++) s[i] = '