基于Trie树实现自动补齐(autocomplete)功能

"""
Trie树
"""

__author__ = 'youngf'


class TrieNode:
    def __init__(self):
        self.children = {}
        self.last = False


class Trie:
    def __init__(self):
        self.root = TrieNode()

    def from_keys(self, words):
        """ 批量添加词 """
        for word in words:
            self.insert(word)

    def insert(self, word):
        """ 单条添加 """
        node = self.root
        for a in list(word):
            if not node.children.get(a):
                node.children[a] = TrieNode()
            node = node.children[a]
        node.last = True

    def search_word(self, word):
        """ 全匹配搜索 """
        node = self.root
        if not node:
            return False

        for a in list(word):
            if not node.children.get(a):
                return False
            node = node.children[a]
        return node.last

    def search_prefix(self, prefix):
        """ 前缀搜索 """
        node = self.root
        if not node:
            return False

        for a in list(prefix):
            if not node.children.get(a):
                return False
            node = node.children[a]
        return True

    def auto_complete(self, prefix):
        """ 返回所有以prefix为前缀的词 """

        def iter_words(node, word, word_list):
            if node.last:
                word_list.append(word)  # trick: 只要不改变word_list的id,就能保持引用传递
            if node.children:
                for ch, children_node in node.children.items():  # 深度优先遍历
                    iter_words(children_node, word + ch, word_list)

        node = self.root
        temp_word = ''
        candidate_words = []

        for ch in list(prefix):
            if not node.children.get(ch):
                # raise Exception('no such prefix')
                return []
            temp_word += ch
            node = node.children[ch]

        iter_words(node, temp_word, candidate_words)
        return candidate_words


if __name__ == '__main__':
    t = Trie()
    t.from_keys(['her', 'hell', 'hello', 'hers', 'cat'])
    print(t.search_prefix('he'))
    print(t.search_word('he'))
    print(t.auto_complete('he'))

原文地址:https://www.cnblogs.com/YoungF/p/14016892.html