【POJ3294】 Life Forms (后缀数组+二分)

Life Forms

Description

You may have wondered why most extraterrestrial life forms resemble humans, differing by superficial traits such as height, colour, wrinkles, ears, eyebrows and the like. A few bear no human resemblance; these typically have geometric or amorphous shapes like cubes, oil slicks or clouds of dust.

The answer is given in the 146th episode of Star Trek - The Next Generation, titled The Chase. It turns out that in the vast majority of the quadrant's life forms ended up with a large fragment of common DNA.

Given the DNA sequences of several life forms represented as strings of letters, you are to find the longest substring that is shared by more than half of them.

Input

Standard input contains several test cases. Each test case begins with 1 ≤ n ≤ 100, the number of life forms. n lines follow; each contains a string of lower case letters representing the DNA sequence of a life form. Each DNA sequence contains at least one and not more than 1000 letters. A line containing 0 follows the last test case.

Output

For each test case, output the longest string or strings shared by more than half of the life forms. If there are many, output all of them in alphabetical order. If there is no solution with at least one letter, output "?". Leave an empty line between test cases.

Sample Input

3
abcdefg
bcdefgh
cdefghi
3
xxx
yyy
zzz
0

Sample Output

bcdefg
cdefgh

?


545


【题意】

  给定n个字符串,求出现在大于n/2个字符串中的最长子串。

【分析】

  把n个串拼起来,中间插入不同的特殊字符(注意要不同的)。然后求height数组。

  二分答案,根据二分的L分组,统计组内有多少个来自不同的串即可。

  最后输出的字符串要按字典序,不过不用排序,因为求出的sa表示它已经排好序了,所以就按照顺序输入就好了。

  傻逼的我RE了很久竟然是数组大小开的混乱~~呵呵~~

代码如下:

  1 #include<cstdio>
  2 #include<cstdlib>
  3 #include<cstring>
  4 #include<iostream>
  5 #include<algorithm>
  6 #include<queue>
  7 using namespace std;
  8 #define Maxn 410
  9 #define Maxl 4010
 10 #define Ml 400010
 11 
 12 int c[Ml];
 13 int n,cl;
 14 
 15 char s[Maxl];
 16 int p[Ml];
 17 void init()
 18 {
 19     cl=0;
 20     for(int i=1;i<=n;i++)
 21     {
 22         scanf("%s",s);
 23         int l=strlen(s);
 24         for(int j=0;j<l;j++) c[++cl]=s[j]-'a'+1,p[cl]=i;
 25         c[++cl]=26+i,p[cl]=0;
 26     }
 27 }
 28 
 29 int sa[Ml],rk[Ml],Rs[Ml],y[Ml],wr[Ml];
 30 void get_sa(int m)
 31 {
 32     memcpy(rk,c,sizeof(rk));
 33     for(int i=0;i<=m;i++) Rs[i]=0;
 34     for(int i=1;i<=cl;i++) Rs[rk[i]]++;
 35     for(int i=1;i<=m;i++) Rs[i]+=Rs[i-1];
 36     for(int i=cl;i>=1;i--) sa[Rs[rk[i]]--]=i;
 37     
 38     int ln=1,p=0;//p表示目前有多少个不一样的rk
 39     while(p<cl)
 40     {
 41         int k=0;
 42         for(int i=cl-ln+1;i<=cl;i++) y[++k]=i;
 43         for(int i=1;i<=cl;i++) if(sa[i]>ln) y[++k]=sa[i]-ln;
 44         for(int i=1;i<=cl;i++) wr[i]=rk[y[i]];
 45         
 46         for(int i=0;i<=m;i++) Rs[i]=0;
 47         for(int i=1;i<=cl;i++) Rs[wr[i]]++;
 48         for(int i=1;i<=m;i++) Rs[i]+=Rs[i-1];
 49         for(int i=cl;i>=1;i--) sa[Rs[wr[i]]--]=y[i];
 50         
 51         for(int i=1;i<=cl;i++) wr[i]=rk[i];
 52         for(int i=cl+1;i<=cl+ln;i++) wr[i]=0;
 53         p=1,rk[sa[1]]=1;
 54         for(int i=2;i<=cl;i++)
 55         {
 56             if(wr[sa[i]]!=wr[sa[i-1]]||wr[sa[i]+ln]!=wr[sa[i-1]+ln]) p++;
 57             rk[sa[i]]=p;
 58         }
 59         ln*=2;m=p;
 60     }
 61     sa[0]=rk[0]=0;
 62 }
 63 
 64 int height[Ml];
 65 void get_he()
 66 {
 67     int k=0;
 68     for(int i=1;i<=cl;i++) if(rk[i]!=1)
 69     {
 70         int j=sa[rk[i]-1];
 71         if(k) k--;
 72         while(c[i+k]==c[j+k]&&i+k<=cl&&j+k<=cl) k++;
 73         height[rk[i]]=k;
 74     }        
 75 }
 76 
 77 bool inq[Maxn];
 78 queue<int > q;
 79 int ans[2][Ml],now;
 80 bool check(int x)
 81 {
 82     if(x==0) return 1;
 83     ans[1-now][0]=0;
 84     int cnt=0;
 85     bool ok=0;
 86     for(int i=1;i<=cl-n;i++)
 87     {
 88         if(!inq[p[sa[i]]])
 89         {
 90             q.push(p[sa[i]]);
 91             inq[p[sa[i]]]=1;
 92             cnt++;
 93         }
 94         if(height[i+1]<x)//new group
 95         {
 96             if(cnt>n/2&&cnt!=1) ok=1,ans[1-now][++ans[1-now][0]]=sa[i];
 97             cnt=0;
 98             while(!q.empty()) {inq[q.front()]=0;q.pop();}
 99         }
100     }
101     if(ok) now=1-now;
102     return ok;
103 }
104 
105 void ffind()
106 {
107     now=1;
108     while(!q.empty()) q.pop();
109     memset(inq,0,sizeof(inq));
110     int l=0,r=cl;
111     while(l<r)
112     {
113         int mid=(l+r+1)>>1;
114         if(check(mid)) l=mid;
115         else r=mid-1;
116     }
117     if(l==0) printf("?
");
118     else
119     {
120         for(int i=1;i<=ans[now][0];i++)
121         {
122             for(int j=0;j<l;j++)
123              printf("%c",c[j+ans[now][i]]-1+'a');
124             printf("
");
125         }
126     }
127 }
128 
129 int main()
130 {
131     while(1)
132     {
133         scanf("%d",&n);
134         if(n==0) break;
135         init();
136         get_sa(30+n);
137         get_he();
138         ffind();
139         printf("
");
140     }
141     return 0;
142 }
143 
144 [POJ3294]
[POJ3294]

2016-07-18 11:04:28



原文地址:https://www.cnblogs.com/Konjakmoyu/p/5680465.html