数据结构 DNA序列 (KMP+暴力,或者STL+暴力)

Description

现有一个项目旨在从成千上万捐献的DNA分析地球上人类是如 何繁衍的。该项目邀请你写一个程序找出给定的DNA片段之间的相同之处,使得对个体的调查相关联。一个DNA碱基序列是指把分子中发现的氮基的序列给罗列 出来。有四种氮基:腺嘌呤(A)、胸腺嘧啶(T)、鸟嘌呤(G)和胞嘧啶(C),例如,一个6碱基DNA序列可以表示为TAGACC。给出一个DNA碱基 序列的集合,确定在所有序列中都出现的最长的字典序最小的碱基序列。

Input

第一行是一个整数,代表数据组数。每组数据第一行为一个整数n(2<=n<=60),代表有n个DNA串,接下来n行每行一个长度不超过500的字符串(每组数据中的字符串长度相同),代表DNA序列。

Output

每组数据输出一行,如果在所有序列都出现的最长碱基序列长度不小于3,输出这个最长碱基序列,否则输出“no significant commonalities”。

Sample Input

3
2
GATACCAGATACCAGATACCAGATACCAGATACCAGATACCAGATACCAGATACCAGATA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
3
GATACCAGATACCAGATACCAGATACCAGATACCAGATACCAGATACCAGATACCAGATA GATACTAGATACTAGATACTAGATACTAAAGGAAAGGGAAAAGGGGAAAAAGGGGGAAAA GATACCAGATACCAGATACCAGATACCAAAGGAAAGGGAAAAGGGGAAAAAGGGGGAAAA
3
CATCATCATCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC ACATCATCATAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AACATCATCATTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT

Sample Output

no significant commonalities
AGATAC
CATCATCAT

HINT

考察知识点:KMP算法。


都说是KMP了,不要乱暴了。会被rejudge的。


Append Code

析:这个题可以枚举第一个串的长度,然后去和其他的n-1个串进行匹配,也可以用STL中的 find 来找。

代码如下:

#pragma comment(linker, "/STACK:1024000000,1024000000")
#include <cstdio>
#include <string>
#include <cstdlib>
#include <cmath>
#include <iostream>
#include <cstring>
#include <set>
#include <queue>
#include <algorithm>
#include <vector>
#include <map>
#include <cctype>
#include <cmath>
#include <stack>
#define freopenr freopen("in.txt", "r", stdin)
#define freopenw freopen("out.txt", "w", stdout)
using namespace std;
typedef long long LL;
typedef pair<int, int> P;
const int INF = 0x3f3f3f3f;
const double inf = 0x3f3f3f3f3f3f;
const LL LNF = 0x3f3f3f3f3f3f;
const double PI = acos(-1.0);
const double eps = 1e-8;
const int maxn = 1e3 + 100;
const int mod = 1e9 + 7;
const int dr[] = {-1, 0, 1, 0};
const int dc[] = {0, 1, 0, -1};
const char *Hex[] = {"0000", "0001", "0010", "0011", "0100", "0101", "0110", "0111", "1000", "1001", "1010", "1011", "1100", "1101", "1110", "1111"};
int n, m;
const int mon[] = {0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31};
const int monn[] = {0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31};
inline int Min(int a, int b){ return a < b ? a : b; }
inline int Max(int a, int b){ return a > b ? a : b; }
inline LL Min(LL a, LL b){ return a < b ? a : b; }
inline LL Max(LL a, LL b){ return a > b ? a : b; }
inline bool is_in(int r, int c){
    return r >= 0 && r < n && c >= 0 && c < m;
}
vector<string> v;
string s;
char t[505];
int f[505];
string ans;

void getfail(){
    f[0] = f[1] = 0;
    for(int i = 1; i < s.size(); ++i){
        int j = f[i];
        while(j && s[i] != s[j])  j = f[j];
        f[i+1] = s[i] == s[j] ? j+1 : 0;
    }
}
 
bool match(int cnt){
    int j = 0;
    m = v[cnt].size();
    for(int i = 0; i < m; ++i){
        while(j && s[j] != v[cnt][i])  j = f[j];
        if(s[j] == v[cnt][i]) ++j;
        if(s.size() == j)   return true;
    }
    return false;
}

int main(){
    int T;   cin >> T;
    while(T--){
        scanf("%d", &n);
        int xx = INF;
        v.clear();
        for(int i = 0; i < n; ++i){
            scanf("%s", t);
            v.push_back(t);
        }
        xx = v[0].size();
        ans.clear();
        for(int j = xx; j >= 3 && j >= ans.size() ; --j){
            for(int i = 0; i + j <= xx; ++i){
                if(ans.size() > j)  continue;
                s = v[0].substr(i, j);
                if(ans.size() == j && ans <= s)  continue;
                bool ok = true;
                for(int k = 1; k < n; ++k)
                    if(v[k].find(s) == s.npos) { ok = false;  break; }
 
                if(ok)  ans = s;
 
            }
            if(ans.size() >= j)  break;
        }
        printf("%s
", ans.size() < 3 ? "no significant commonalities" : ans.c_str());
    }
    return 0;
}
原文地址:https://www.cnblogs.com/dwtfukgv/p/5875567.html