SCU 4493 DNA


Time Limit: 1000 MS    Memory Limit: 131072 K


   




Description



Deoxyribonucleic acid (DNA) is a molecule that carries most of the genetic instructions used in the development,
functioning and reproduction of all known living organisms and many viruses.
Most DNA molecules consist of two biopolymer strands coiled around each other to form a double helix.
The two DNA strands are known as polynucleotides since they are composed of simpler units called nucleotides.
Each nucleotide is composed of a nitrogen-containing nucleobase—either cytosine (C), guanine (G), adenine (A), or thymine (T)—
as well as a monosaccharide sugar called deoxyribose and a phosphate group. According to base pairing rules (A with T, and C with G),
hydrogen bonds bind the nitrogenous bases of the two separate polynucleotide strands to make double-stranded DNA.
We define the length of a strand as the number of its nucleobases. Given a bunch of different DNA strands, for each strand,
find the length of the longest common pieces between the two complementary strands.



Input



The first line is the number of test cases, T, where 0 < T<=100.
Each line followed represents a DNA strand, whose length is no more than 5000.



Output



For each strand, print a number, indicating the answer illustrated above.

Sample Input



3
A
AT
ATAT


Sample Output




0
1
3


Author



mrxy_56

分析:题目中给了一个字符串 , 根据要求 再构造一个字符串

即找两个的公共字串的数目

需要用到后缀数组

代码如下:

#include <cstdio>
#include <iostream>
#include <algorithm> 
#include <cstring>
typedef long long ll;
using namespace std;
const int MAXN=200010;
int wa[MAXN],wb[MAXN],wv[MAXN],Ws[MAXN];
int cmp(int *r,int a,int b,int l)
{return r[a]==r[b]&&r[a+l]==r[b+l];}
void da(const char r[],int sa[],int n,int m)
{
      int i,j,p,*x=wa,*y=wb,*t; 
      for(i=0; i<m; i++) Ws[i]=0;
      for(i=0; i<n; i++) Ws[x[i]=r[i]]++;
      for(i=1; i<m; i++) Ws[i]+=Ws[i-1];
      for(i=n-1; i>=0; i--) sa[--Ws[x[i]]]=i;
      for(j=1,p=1; p<n; j*=2,m=p)
      {
            for(p=0,i=n-j; i<n; i++) y[p++]=i; 
            for(i=0; i<n; i++) if(sa[i]>=j) y[p++]=sa[i]-j;
            for(i=0; i<n; i++) wv[i]=x[y[i]];
            for(i=0; i<m; i++) Ws[i]=0;
            for(i=0; i<n; i++) Ws[wv[i]]++;
            for(i=1; i<m; i++) Ws[i]+=Ws[i-1];
            for(i=n-1; i>=0; i--) sa[--Ws[wv[i]]]=y[i];
            for(t=x,x=y,y=t,p=1,x[sa[0]]=0,i=1; i<n; i++)
                  x[sa[i]]=cmp(y,sa[i-1],sa[i],j)?p-1:p++;
      }
      return;
}
int sa[MAXN],Rank[MAXN],height[MAXN];

void calheight(const char *r,int *sa,int n)
{
      int i,j,k=0;
      for(i=1; i<=n; i++) Rank[sa[i]]=i;
      for(i=0; i<n; height[Rank[i++]]=k)
            for(k?k--:0,j=sa[Rank[i]-1]; r[i+k]==r[j+k]; k++);
      for(int i=n;i>=1;--i) ++sa[i],Rank[i]=Rank[i-1];
}
int main()
{
    char r1[MAXN];
    char r2[MAXN];
    int t;
    cin>>t;
     while(t--)
     {
     
      scanf("%s",r1);
    
        int L1=strlen(r1);
        for(int i=0;i<L1;i++)
        {
            if(r1[i]=='A')
            r2[i]='T';
            else if(r1[i]=='T')
            r2[i]='A';
            else if(r1[i]=='G')
            r2[i]='C';
            else if(r1[i]=='C')
            r2[i]='G';
        }
        int L2=L1;
        
        r1[L1]=126;
        int h=L1+1;
        for(int i=0;i<L2;i++)
        {
           r1[h++]=r2[i];
        }
        r1[h]='';
        int m=127;
        int len=h;
        da(r1,sa,h+1,m);
      
        calheight(r1,sa,h);
     
        int ans=0;
       
        for(int i=2;i<=len;i++)
        {
            if(height[i]>ans)
            {
                if(0<=sa[i-1]&&sa[i-1]<=L1&&L1<sa[i])
                ans=height[i];
                if(0<=sa[i]&&sa[i]<=L1&&L1<sa[i-1])
                ans=height[i];
            }         
            
        }
        cout<<ans<<endl;
}
}
原文地址:https://www.cnblogs.com/a249189046/p/6738472.html