[DataStructure] AC 自动机(Aho–Corasick Automata)

KMP(Knuth-Morris-Pratt)算法

  • 通常用来解决单个字符串快速匹配问题(时间复杂度为(O(N+M)))

  • 通过初始化fail数组使得匹配失败时按fail数组回退即可无需重复匹配

  • 扩展:

    • 扩展kmp算法(Extended KMP)

AC自动机(Aho–Corasick Automata)

  • 多个字符串快速匹配,是trie(字典树)和kmp的结合
  • 其主要流程如下:
    1. 选取待匹配字符串构建trie
    2. 利用BFS构建fail指针(即匹配失败的回退点)

472. 连接词 - 力扣(LeetCode) (leetcode-cn.com)

class TrieNode{
    public:
        TrieNode(int len,char value,int originIndex):fail(nullptr),value(value),len(len),originIndex(originIndex){
        }
        unordered_map<char,TrieNode*> children;
        int len=0;
        char value;
        int originIndex;
        TrieNode * fail;
};

class Trie{
    private:
        TrieNode * root;
    public:
        Trie():root(new TrieNode(0,' ',-1)){
        }
        void emplace(const string & s,int index){
            auto p = root;
            auto ptr = 0;
            while (ptr<s.size()){
                if (p->children.find(s[ptr])==p->children.end()){
                    p->children[s[ptr]] = new TrieNode(-(ptr+1),s[ptr],-1);
                }
                p=p->children[s[ptr++]];
            }
            p->len = s.size();
            p->originIndex = index;
        }
        void buildACAutomata(){
            queue<TrieNode *> q;
            for (auto const &[key,value]:root->children){
                q.emplace(value);
                value->fail = root;
            }
            while (!q.empty()){
                auto cur = q.front();
                q.pop();
                for (auto const &[key,value]:cur->children){
                    auto failto = cur->fail;
                    while (true){
                        if (failto==nullptr){
                            value->fail = root;
                            break;
                        }else if (failto->children.find(key)!=failto->children.end() && value->value==key){
                            value->fail = failto->children[key];
                            break;
                        }else{
                            failto=failto->fail;
                        }
                    };
                    q.emplace(value);
                }   
            }
        }
        bool match(const string &s,int index){
            if (s.size()==0) return false;
            vector<int> matchSize(s.size());
            auto p = root;
            for (auto i=0;i<s.size();++i){
                // 下一个节点不匹配,则需要回退
                if (p->children.find(s[i])==p->children.end() || p->children[s[i]]->originIndex==index){
                    if (p->fail==nullptr){ // 无法退回,返回false
                        return false;
                    }else{
                        p=p->fail; // 回退一个节点
                        while (p!=nullptr && (p->children.find(s[i])==p->children.end() || matchSize[i-abs(p->len)-1]==0)){ 
                            //判断回退后是否可匹配,否则继续回退
                            p=p->fail;
                        }
                        if (p==nullptr) return false;
                    }
                };
                p=p->children[s[i]];
                if (p==nullptr) return false;
                if (p->len>0){
                    matchSize[i] = p->len;
                }
                auto fa = p->fail;
                while (fa!=root){
                    if (fa->len>0 && matchSize[i-fa->len]>0){
                        matchSize[i] = fa->len;
                        break;
                    }
                    fa=fa->fail;
                }
            }
            auto last = matchSize.size()-1;
            return matchSize[last]>0;
        }
};

class Solution {
public:
    vector<string> findAllConcatenatedWordsInADict(vector<string>& words) {
        Trie trie;
        for (auto i=0;i<words.size();++i){
            trie.emplace(words[i],i);
        }
        trie.buildACAutomata();
        vector<string> ans;
        for (auto i=0;i<words.size();++i){
            if (trie.match(words[i],i)){
                ans.emplace_back(words[i]);
            }
        }
        return ans;
    }
};
原文地址:https://www.cnblogs.com/minskiter/p/14699677.html