字符串模式匹配算法系列(三):Trie树及AC改进算法

Trie树的python实现(leetcode 208)

  1 #!/usr/bin/env python
  2 #-*- coding: utf-8 -*-
  3 import sys
  4 import pdb
  5 
  6 reload(sys)
  7 sys.setdefaultencoding('utf-8')
  8 
  9 
 10 class TrieNode(object):
 11     """Trie节点
 12 
 13     Attributes:
 14         _val: 本节点的值(非None即作为结束判断条件)
 15         _next: 后继节点
 16     """
 17     def __init__(self, value=None):
 18         self._val = value
 19         self._next = {}
 20 
 21     def set_value(self, value=None):
 22         """为当前节点设置值
 23         """
 24         self._val = value
 25 
 26     def get_value(self):
 27         """获取当前节点的值
 28         """
 29         return self._val
 30 
 31     def set_next(self, key, value=None):
 32         """为当前节点添加一个后继节点
 33         """
 34         if key not in self._next:
 35             self._next[key] = TrieNode(value)
 36         return self._next[key]
 37 
 38     def get_next(self, key):
 39         """从当前节点获取指定的后继节点
 40         """
 41         if key not in self._next:
 42             return None
 43         return self._next[key]
 44 
 45 
 46 class Trie(object):
 47     """Trie树
 48     Attribures:
 49         _root: 根节点
 50     """
 51     def __init__(self):
 52         # 生成root节点
 53         self._root = TrieNode()
 54 
 55     def insert(self, word):
 56         """将一个单词插入trie树
 57         """
 58         curr = self._root
 59 
 60         for char in word:
 61             curr = curr.set_next(char)
 62         curr.set_value(True)
 63 
 64     def search(self, word):
 65         """检索一个单词是否trie树中存在
 66         """
 67         curr = self._root
 68         ret = False
 69 
 70         for i, c in enumerate(word):
 71             curr = curr.get_next(c)
 72             if curr is None:
 73                 break
 74             if i + 1 == len(word) and curr.get_value() is True:
 75                 ret = True
 76                 break
 77         return ret
 78 
 79     def startsWith(self, prefix):
 80         """检索trie树中是否有prefix开头的单词
 81         """
 82         curr = self._root
 83         ret = True
 84 
 85         for c in prefix:
 86             curr = curr.get_next(c)
 87             if curr is None:
 88                 ret = False
 89                 break
 90         return ret
 91 
 92 
 93 def main():
 94     trie = Trie()
 95     trie.insert("app")
 96     trie.insert("apple")
 97     print trie.search("app")
 98 
 99 
100 if __name__ == '__main__':
101     main()

AC改进算法python实现

  1 #!/usr/bin/env python
  2 #-*- coding: utf-8 -*-
  3 import sys
  4 import pdb
  5 
  6 reload(sys)
  7 sys.setdefaultencoding('utf-8')
  8 
  9 
 10 class ACTrieNode(object):
 11     """ACTrie节点
 12 
 13     Attributes:
 14         val: 本节点的值(非None即作为结束判断条件)
 15         children: 孩子节点
 16         fail: 失配跳转指针
 17     """
 18     def __init__(self, value=None):
 19         self.val = value
 20         self.children = {}
 21         self.fail = None
 22 
 23     def get_next(self, key):
 24         """从本节点开始,找到children中包含key的节点,如果找不到就返回根节点
 25         """
 26         if key in self.children.keys():
 27             return self.children[key]
 28         if self.fail is None:
 29             # fail为None就是根节点
 30             return self
 31         return self.fail.get_next(key)
 32 
 33 
 34 class ACTrie(object):
 35     """ACTrie树
 36 
 37     Attribures:
 38         _root: 根节点
 39     """
 40     def __init__(self):
 41         self._root = ACTrieNode() # 生成root节点
 42 
 43     def insert(self, word):
 44         """将一个单词插入trie树
 45         """
 46         curr = self._root
 47         for char in word:
 48             if char not in curr.children:
 49                 curr.children[char] = ACTrieNode()
 50             curr = curr.children[char]
 51         curr.val = word
 52 
 53     def update_failure(self):
 54         """更新failure跳转
 55         """
 56         bfs_queue = [self._root] # 利用list作为bfs缓存队列
 57 
 58         while len(bfs_queue) > 0:
 59             father = bfs_queue.pop(0) # 取出队列头部元素
 60 
 61             # BFS遍历父节点的所有子节点,为他们设置failure
 62             for key, child in father.children.items():
 63                 bfs_queue.append(child) # 将当前元素放入队列尾部
 64 
 65                 if father == self._root:
 66                     # 当前父节点是root时,其子节点的failure也指向root
 67                     child.fail = self._root
 68                 else:
 69                     # 当前父节点不是root时,其子节点的failure尝试指向"(迭代)父节点的failure的同名子节点"
 70                     child.fail = father.fail.get_next(key)
 71 
 72     def search(self, text):
 73         """从源字符串中寻找目标字符串
 74         """
 75         match_set = set()
 76         curr = self._root
 77 
 78         for char in text:
 79             curr = curr.get_next(char)
 80             # 搜集匹配上的单词
 81             tmp_node = curr
 82             while tmp_node:
 83                 if tmp_node.val:
 84                     match_set.add(tmp_node.val)
 85                 tmp_node = tmp_node.fail
 86         return match_set
 87 
 88 
 89 def main():
 90     trie = ACTrie()
 91     trie.insert("abcd")
 92     trie.insert("ab")
 93     trie.insert("bc")
 94     trie.insert("cf")
 95     trie.insert("cde")
 96 
 97     trie.update_failure()
 98 
 99     text = 'abcdefg'
100     ret = trie.search(text)
101     print ret
102 
103 if __name__ == '__main__':
104     main()
原文地址:https://www.cnblogs.com/zhongmiaozhimen/p/11258165.html