[算法] 最大公共子串

具体算法和推导,看链接。

参考:http://www.cnblogs.com/ider/p/longest-common-substring-problem-optimization.html

1、暴力法

#include <iostream>
#include <string>
#include <vector>
using namespace std;

//brute force方法
int longestCommonSubstring1(const string &s1, const string &s2)
{
    string::size_type size1 = s1.size();
    string::size_type size2 = s2.size();
    if(size1 == 0 || size2 == 0)
        return 0;
    
    int length = 0;
    int start1 = -1;
    int start2 = -1;
    int longest = 0;
    for(int i = 0; i < size1; ++i)
    {
        for(int j = 0; j < size2; ++j)
        {
            length = 0;
            //while循环计算S1的i~size1的后缀和S2的j~size2的后缀 是不是最长子串
            int m = i;
            int n = j;
            while(m < size1 && n < size2)
            {
                //只要子串中有一个数不想等,立即退出
                if(s1[m] != s2[n])
                {
                    break;
                }
                else
                {
                //如果两个后缀子串中,从开始到现在的字符想等,则++
                    ++m;
                    ++n;
                    ++length;
                }
            }
            //S1和S2的一个子串,结束后,看length的长度
            if(length > longest)
            {
                longest = length;
                start1 = i;
                start2 = j;
            }
        
        }
    }
    return longest;
    //cout << "(start1, start2, longest) = " << "(" << start1 << "," << start2 << "," << longest << ")" << endl;
}

2、动态规划法

(1)第一行和第一列先求,作为动态规划的初始化状态;

状态转移:如果两个字符相等,table[i][j] = table[i-1][j-1] + 1;

int longestCommonSubstring2(const string &s1, const string &s2)
{
    string::size_type size1 = s1.size();
    string::size_type size2 = s2.size();
    if(size1 == 0 || size2 == 0)
        return 0;
    int start1 = -1;
    int start2 = -1;
    int longest = 0;
    int length;

    //动态生成二维数组
    vector<vector<int>> table(size1, vector<int>(size2, 0));
    //第一行table赋值,第一行与S1的第一个字符比较
    //即动态规划的初始状态
    for(int j = 0; j < size2; ++j)
    {
        table[0][j] = (s1[0] == s2[j] ? 1 : 0);
    }
    //从第二行开始,从左到右依次遍历字符串
    for(int i = 1; i < size1; ++i)
    {
        //第一列的值,相当于初始状态
        table[i][0] = (s1[i] == s2[0] ? 1 : 0);
        for(int j = 1; j < size2; ++j)
        {
            if(s1[i] == s2[j])
            {
                //如果相等,那么+1;不想等,则table为0(在创建table时已经赋值了0)
                table[i][j] = table[i-1][j-1] + 1;
            }
        }
    }
    //找出table中的最大值
    for(int i = 0; i < size1; ++i)
    {
        for(int j = 0; j < size2; ++j)
        {
            if( longest < table[i][j])
            {
                longest = table[i][j];
                start1 = i - longest + 1;
                start2 = j - longest + 1;
            }
        }
    }

    cout << "(start1, start2, longest) = " << "(" << start1 << "," << start2 << "," << longest << ")" << endl;
}

(2)优化(求最大长度的循环进行优化,放到前面的循环中)

int longestCommonSubstring3(const string &s1, const string &s2)
{
    string::size_type size1 = s1.size();
    string::size_type size2 = s2.size();
    if(size1 == 0 || size2 == 0)
        return 0;
    int start1 = -1;
    int start2 = -1;
    int longest = 0;
    int length;
    vector<vector<int>> table(size1, vector<int>(size2, 0));
    for(int j = 0; j < size2; ++j)
    {
        table[0][j] = (s1[0] == s2[j] ? 1 : 0);
        if(table[0][j] > longest)
        {
            longest = table[0][j];
            start1 = 0;
            start2 = j;
        }
    }
    for(int i = 1; i < size1; ++i)
    {
        if(s1[i] == s2[0])
        {
            table[i][0] = 1;
            if(longest == 0)
            {
                longest = table[i][0];
                start1 = i;
                start2 = 0;
            }
        }
        for(int j = 1; j < size2; ++j)
        {
            if(s1[i] == s2[j])
            {
                table[i][j] = table[i-1][j-1] + 1;
                if( longest < table[i][j])
                {
                    longest = table[i][j];
                    start1 = i - longest + 1;
                    start2 = j - longest + 1;
                }
            }
            
        }
    }

    cout << "(start1, start2, longest) = " << "(" << start1 << "," << start2 << "," << longest << ")" << endl;
}

(3)优化空间复杂度

上面(1)(2)是按行求的,现在按斜对角线求。

int longestCommonSubstring4(const string &s1, const string &s2)
{
    string::size_type size1 = s1.size();
    string::size_type size2 = s2.size();
    if(size1 == 0 || size2 == 0)
        return 0;
    int start1 = -1;
    int start2 = -1;
    int longest = 0;
    int length;
    vector<vector<int>> table(size1, vector<int>(size2, 0));
    for(int i = 0; i < size1; ++i)
    {
        int m = i;
        int n = 0;
        int length = 0;
        while(m < size1 && n < size2)
        {
            if(s1[m] != s2[n])
            {
                //如果对角线上两个字符不相等,则后面继续计算最大公共子串时,要从0开始
                length = 0;
            }
            else
            {
                ++length;
                //在对角线计算时,子串到一定长度,后面不相等。
                //如果在if外更新longest的值,是错误的,是在字符相等时,更新值。
                if(longest < length)
                {
                    longest = length;
                    start1 = m - longest + 1;
                    start2 = n - longest + 1;
                }
            }
            ++m;
            ++n;
        }
        
    }
    for(int j = 1; j < size2; ++j)
    {
        int m = 0;
        int n = j;
        while(m < size1 && n < size2)
        {
            if(s1[m] != s2[n])
            {
                length = 0;
            }
            else
            {
                ++length;
                if(length > longest)
                {
                    longest = length;
                    start1 = m - longest + 1;
                    start2 = n - longest + 1;
                }
            }
        ++m;
        ++n;
        }
        
    }
    cout << "(start1, start2, longest) = " << "(" << start1 << "," << start2 << "," << longest << ")" << endl;
}
原文地址:https://www.cnblogs.com/pukaifei/p/5543849.html