C语言实现KMP模式匹配算法

next:

/*!
 * Description:
 * author scictor <scictor@gmail.com>
 * date 2018/7/4
 */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
// https://tekmarathon.com/2013/05/14/algorithm-to-find-substring-in-a-string-kmp-algorithm/
/*What is Partial Match Table?
It is an array of size (pattern_length+1) where, for each position of i in pattern p, b[i] is defined such that it takes the ‘length of the longest proper suffix of P[1…i-1]’ that matches with the ‘prefix of P’.
What is longest prefix/suffix match??? Proper prefix is a prefix which is not same as the substring. Recall proper set which is a subset of a set but is not same as the set.
Why a prefix should match suffix of the pattern? its because when we shift the pattern its the prefix of P which comes towards the suffix. And also the key idea is that if we have successfully matched prefix P[1…i-1] of the pattern with the substring T[j-(i-1)…j-1] of the input string and P(i)!=T(j), then we dont need to reprocess any of the suffix T[j-(i-1)…j-1] since we know this portion of the text string is the prefix of the pattern that we have just matched.
*/
//"ababacb"
/**
 * Pre processes the pattern array based on proper prefixes and proper
 * suffixes at every position of the array
 *
 * @param ptrn
 *            word that is to be searched in the search string
 * @return partial match table which indicates
 */
void kmp_next(const char *pattern, int patternLen, int *next) {
    int i = 0, j = -1;

    next[i] = j; // default next[0] = -1
    while (i < patternLen) {
        while (j >= 0 && pattern[i] != pattern[j]) {
            // if there is mismatch consider the next widest border
            // The borders to be examined are obtained in decreasing order from
            //  the values b[i], b[b[i]] etc.
            j = next[j];
        }
        i++;
        j++;
        next[i] = j;
    }
    for(int index = 0; index < patternLen; ++index) printf("%d ", next[index]);
    return;
}

/**
     * Based on the pre processed array, search for the pattern in the text
     *
     * @param text
     *            text over which search happens
     * @param ptrn
     *            pattern that is to be searched
     */

//int matchIndex[128] = {0};

int kmp_search(const char *text, int textLen, const char *pattern, int patternLen) {
    int i = 0, j = 0;

    // initialize new array and preprocess the pattern
    int next[patternLen + 1];
    memset(next, 0x00, sizeof(next));

//    int idx = 0;
//    memset(matchIndex, 0x00, sizeof(matchIndex));

    kmp_next(pattern, patternLen, next);

    while (i < textLen) {
        while (j >= 0 && text[i] != pattern[j]) {
            j = next[j];
        }
        i++;
        j++;

        // a match is found
        //        if (j == patternLen) {
        //            printf("found substring at index:" + (i - patternLen));
        //            j = next[j];
        //        }
        if (j == patternLen) {
            printf("found substring at index:%d", (i - patternLen));
            //j = next[j];
            //matchIndex[idx++] = i - patternLen;
            return (i - patternLen);
        }
    }

//    for(int k = 0; k < idx; ++k)
//    {
//        printf("%d ", matchIndex[k]);
//    }
    return -1;
}

/*
Index         0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16  17  18  19  20  21  22  23
Text(T)       a b c a b d a b c a b  d  a  b  c  a  b   d   a   b   d   a   b   c
Pattern(P)    a b c a b d a b c
PMT (next[i])   -1 0 0 0 1 2 0 1 2 3
 */
// 4ms
int kmp(const char *text, int textLen, const char *pattern, int patternLen)
{
    int *T;
    int i, j;

    if (pattern[0] == '')
        return 0;

    // Construct the lookup table
    T = (int*) malloc( (patternLen + 1) * sizeof(int) );
    T[0] = -1;
    for (i=0; pattern[i] != ''; i++) {
        T[i+1] = T[i] + 1;
        while (T[i+1] > 0 && pattern[i] != pattern[T[i+1]-1])
            T[i+1] = T[T[i+1]-1] + 1;
    }

    for(int k = 0; k < patternLen; ++k) printf("%d ", T[k]);

    // Perform the search
    for (i=j=0; text[i] != ''; ) {
        if (j < 0 || text[i] == pattern[j]) {
            ++i, ++j;
            if (pattern[j] == '') {
                return (i-j);
            }
        }
        else j = T[j];
    }

    free(T);
    return -1;
}

/*const char *kmp_search(const char *text, const char *pattern)
{
    int *T;
    int i, j;
    const char *result = NULL;

    if (pattern[0] == '')
        return text;

    // Construct the lookup table
    T = (int*) malloc((strlen(pattern)+1) * sizeof(int) );
    T[0] = -1;
    for (i=0; pattern[i] != ''; i++) {
        T[i+1] = T[i] + 1;
        while (T[i+1] > 0 && pattern[i] != pattern[T[i+1]-1])
            T[i+1] = T[T[i+1]-1] + 1;
    }

    // Perform the search
    for (i=j=0; text[i] != ''; ) {
        if (j < 0 || text[i] == pattern[j]) {
            ++i, ++j;
            if (pattern[j] == '') {
                result = text+i-j;
                break;
            }
        }
        else j = T[j];
    }

    free(T);
    return result;
}
*/

nextval:

void preKmp(char *x, int m, int kmpNext[]) {
   int i, j;

   i = 0;
   j = kmpNext[0] = -1;
   while (i < m) {
      while (j > -1 && x[i] != x[j])
         j = kmpNext[j];
      i++;
      j++;
      if (x[i] == x[j])
         kmpNext[i] = kmpNext[j];
      else
         kmpNext[i] = j;
   }
}


// text:y, len: n, match parttern:x, len: m
int KMP(char *y, int n, char *x, int m) {
   int i, j, kmpNext[m];

   /* Preprocessing */
   preKmp(x, m, kmpNext);

   /* Searching */
   i = j = 0;
   while (j < n) {
      while (i > -1 && x[i] != y[j])
         i = kmpNext[i];
      i++;
      j++;
      // if (i >= m) {
      // printf("j-i=%d
",j - i);
      // i = kmpNext[i];
      // }
       if (i == m) {
            //printf("j-i=%d
",j - i);
            return j - i;
            //i = kmpNext[i];
      }
   }
    return -1;
}

int strStr(char* haystack, char* needle) {
    int needleLen = strlen(needle);
    if(needleLen == 0) return 0;
    int hayLen = strlen(haystack);
    if(hayLen == 0) return -1;
    
    return KMP(haystack, hayLen, needle, needleLen);
}
原文地址:https://www.cnblogs.com/guxuanqing/p/9266333.html