POJ 3294 Life Forms(后缀数组)

Description

You may have wondered why most extraterrestrial life forms resemble humans, differing by superficial traits such as height, colour, wrinkles, ears, eyebrows and the like. A few bear no human resemblance; these typically have geometric or amorphous shapes like cubes, oil slicks or clouds of dust.

The answer is given in the 146th episode of Star Trek - The Next Generation, titled The Chase. It turns out that in the vast majority of the quadrant's life forms ended up with a large fragment of common DNA.

Given the DNA sequences of several life forms represented as strings of letters, you are to find the longest substring that is shared by more than half of them.

Input

Standard input contains several test cases. Each test case begins with 1 ≤ n ≤ 100, the number of life forms. n lines follow; each contains a string of lower case letters representing the DNA sequence of a life form. Each DNA sequence contains at least one and not more than 1000 letters. A line containing 0 follows the last test case.

Output

For each test case, output the longest string or strings shared by more than half of the life forms. If there are many, output all of them in alphabetical order. If there is no solution with at least one letter, output "?". Leave an empty line between test cases.

题目大意:给一大堆字符串,求出现在多于n/2个字符串中的最长子串(注意不能取等号),字典序输出所有答案。

思路:首先,把这一大堆字符串各用一个不同的字符全部都连起来,求后缀数组。

然后求height[]数组。

然后二分答案的长度L,检查这个长度。

检查的时候呢,按height[]数组的顺序从前往后扫,看是否有n/2以上的字符串的子串连在一起并且height[]都大于等于L。

然后呢先把二分的答案保存下来。

输出就跟检查差不多,从前往后扫一遍把符合要求的都存起来,然后用rank[]数组为基准排个序,然后输出就可以啦。

注意输出来的字符串不要重复了噢。

PS:貌似没有n=1的情况……

做后缀数组的时候都换成整数,什么要99个不用的分隔符都好解决啦。

代码(782MS):

  1 #include <cstdio>
  2 #include <algorithm>
  3 #include <iostream>
  4 #include <cstring>
  5 using namespace std;
  6 
  7 const int MAXN = 110000;
  8 
  9 char str[MAXN];
 10 int s[MAXN], id[MAXN];
 11 int sa[MAXN], rank[MAXN], height[MAXN], c[MAXN], tmp[MAXN];
 12 int n, m, k;
 13 
 14 void makesa(int m) {
 15     memset(c, 0, m * sizeof(int));
 16     for(int i = 0; i < n; ++i) ++c[rank[i] = s[i]];
 17     for(int i = 1; i < m; ++i) c[i] += c[i - 1];
 18     for(int i = 0; i < n; ++i) sa[--c[rank[i]]] = i;
 19     for(int k = 1; k < n; k <<= 1) {
 20         for(int i = 0; i < n; ++i) {
 21             int j = sa[i] - k;
 22             if(j < 0) j += n;
 23             tmp[c[rank[j]]++] = j;
 24         }
 25         int j = c[0] = sa[tmp[0]] = 0;
 26         for(int i = 1; i < n; ++i) {
 27             if(rank[tmp[i]] != rank[tmp[i - 1]] || rank[tmp[i] + k] != rank[tmp[i - 1] + k])
 28                 c[++j] = i;
 29             sa[tmp[i]] = j;
 30         }
 31         memcpy(rank, sa, n * sizeof(int));
 32         memcpy(sa, tmp, n * sizeof(int));
 33     }
 34 }
 35 
 36 void calheight() {
 37     for(int i = 0, k = 0; i < n; height[rank[i++]] = k) {
 38         k -= (k > 0);
 39         int j = sa[rank[i] - 1];
 40         while(s[i + k] == s[j + k]) ++k;
 41     }
 42 }
 43 
 44 int exist[MAXN];
 45 int stk[MAXN];
 46 int ans[MAXN];
 47 
 48 bool cmp(int a, int b) {
 49     return rank[a] < rank[b];
 50 }
 51 
 52 void print(int L) {
 53     memset(exist, 0, m * sizeof(int));
 54     int sum = 1, top = 0, cnt = 0;
 55     stk[++top] = id[sa[0]];
 56     ++exist[id[sa[0]]];
 57     height[n] = 0;
 58     for(int i = 1; i < n; ++i) {
 59         if(height[i] >= L) {
 60             if(++exist[id[sa[i]]] == 1) ++sum;
 61             stk[++top] = id[sa[i]];
 62             if(sum > k && height[i + 1] < L) ans[cnt++] = sa[i];
 63         } else {
 64             while(top) --exist[stk[top--]];
 65             sum = ++exist[id[sa[i]]];
 66             stk[++top] = id[sa[i]];
 67         }
 68     }
 69     sort(ans, ans + cnt, cmp);
 70     for(int i = 0; i < cnt; ++i) {
 71         for(int j = ans[i]; j < ans[i] + L; ++j) putchar(str[j]);
 72         puts("");
 73     }
 74 }
 75 
 76 bool check(int L) {
 77     memset(exist, 0, m * sizeof(int));
 78     int sum = 1, top = 0;
 79     stk[++top] = id[sa[0]];
 80     ++exist[id[sa[0]]];
 81     for(int i = 1; i < n; ++i) {
 82         if(height[i] >= L) {
 83             if(++exist[id[sa[i]]] == 1) ++sum;
 84             stk[++top] = id[sa[i]];
 85             if(sum > k) return true;
 86         } else {
 87             while(top) --exist[stk[top--]];
 88             sum = ++exist[id[sa[i]]];
 89             stk[++top] = id[sa[i]];
 90         }
 91     }
 92     return false;
 93 }
 94 
 95 int solve() {
 96     int l = 1, r = 1001;
 97     while(l < r) {
 98         int mid = (l + r) >> 1;
 99         if(check(mid)) l = mid + 1;
100         else r = mid;
101     }
102     return l - 1;
103 }
104 //n = 1
105 int main() {
106     bool flag = false;
107     while(scanf("%d", &m) != EOF && m) {
108         k = m / 2;
109         n = 0;
110         for(int i = 0; i < m; ++i) {
111             scanf("%s", str + n);
112             while(str[n]) {
113                 id[n] = i;
114                 s[n] = str[n] - 'a' + 1;
115                 ++n;
116             }
117             s[n++] = i + 100;
118         }
119         s[n - 1] = 0;
120         if(flag) puts("");
121         else flag = true;
122         if(m == 1) {
123             puts(str);
124             continue;
125         }
126         makesa(222);
127         calheight();
128         int t = solve();
129         if(t) print(t);
130         else puts("?");
131     }
132 }
View Code
原文地址:https://www.cnblogs.com/oyking/p/3537688.html