Trie tree实践

1、Trie树

Trie树即字典树或前缀树,

2、实践

代码实践如下:

  1 package cn.edu.buaa.trie;
  2 
  3 import java.util.HashSet;
  4 
  5 /**
  6  * @author zsm
  7  * @date 2016年10月25日 上午11:03:13
  8  * @version 1.0
  9  * @parameter
 10  * @return
 11  */
 12 public class Trie {
 13     private TrieNode trieRoot;
 14     private int treeSize;
 15 
 16     public Trie() {
 17         trieRoot = new TrieNode();
 18         treeSize = 0;
 19     }
 20 
 21     public TrieNode getRoot() {
 22         return trieRoot;
 23     }
 24 
 25     public int getTreeSize() {
 26         return treeSize;
 27     }
 28 
 29     /**
 30      * 添加单词
 31      */
 32     public void addWord(String word, int wordId) {
 33         addWord(trieRoot, word, wordId);
 34     }
 35 
 36     public void addWord(TrieNode root, String word, int wordId) {
 37         // 输入单词为空
 38         if (null == word || word.length() == 0) {
 39             return;
 40         }
 41 
 42         // 确定第一个字符在放在哪个孩子节点中
 43         int k = TrieNode.getCharPosition(word.charAt(0));
 44 
 45         // 该孩子为空,初始化
 46         if (root.childNodes[k] == null) {
 47             root.childNodes[k] = new TrieNode();
 48             treeSize++;
 49             root.childNodes[k].nodeChar = word.charAt(0);
 50         }
 51         // 单词出现在该孩子节点中
 52         root.childNodes[k].wordSet.add(wordId);
 53 
 54         word = word.substring(1);
 55         // 说明是最后一个字符,该词词频数加1
 56         if (word.length() == 0) {
 57             root.childNodes[k].freq++;
 58         } else {
 59             addWord(root.childNodes[k], word, wordId);
 60         }
 61     }
 62 
 63     /**
 64      * 删除单词
 65      */
 66     public void deleteWord(String word, int wordId) {
 67         deleteWord(trieRoot, word, wordId);
 68     }
 69 
 70     enum DELETERES {
 71         FAIL_EMPTYWORLD, FAIL_WORLD_NOT_EXIST, SUCCESS
 72     };
 73 
 74     public DELETERES deleteWord(TrieNode root, String word, int wordId) {
 75         // 输入单词为空
 76         if (null == word || word.length() == 0) {
 77             return DELETERES.FAIL_EMPTYWORLD;
 78         }
 79 
 80         int k = TrieNode.getCharPosition(word.charAt(0));
 81 
 82         // 第一个字符不在树中,说明没有要删除的单词
 83         if (root.childNodes[k] == null) {
 84             return DELETERES.FAIL_WORLD_NOT_EXIST;
 85         }
 86 
 87         // 第一个字符在树中
 88         DELETERES res;
 89         {
 90             word = word.substring(1);
 91             // 找到该单词
 92             if (word.length() == 0 && root.childNodes[k].freq > 0) {
 93                 root.childNodes[k].freq--;
 94                 res = DELETERES.SUCCESS;
 95             } else {
 96                 res = deleteWord(root.childNodes[k], word, wordId);
 97             }
 98 
 99             if (res == DELETERES.SUCCESS) {
100                 // 从沿途节点移除该单词
101                 root.childNodes[k].wordSet.remove(wordId);
102                 // 没单词了,释放节点
103                 if (root.childNodes[k].wordSet.size() == 0) {
104                     root.childNodes[k] = null;
105                     treeSize--;
106                 }
107             }
108             return res;
109         }
110     }
111 
112     /**
113      * 修改单词
114      */
115     public void updateWord(String newWord, String oldWord, int wordId) {
116         updateWord(trieRoot, newWord, oldWord, wordId);
117     }
118 
119     public void updateWord(TrieNode root, String newWord, String oldWord, int wordId) {
120         if (deleteWord(root, oldWord, wordId) == DELETERES.SUCCESS) {
121             addWord(root, newWord, wordId);
122         }
123     }
124 
125     /**
126      * 找以给定单词为前缀的所有单词的id
127      */
128     public HashSet<Integer> searchPrefixWord(String word) {
129         return searchPrefixWord(trieRoot, word);
130     }
131 
132     public HashSet<Integer> searchPrefixWord(TrieNode root, String word) {
133 
134         HashSet<Integer> wordSet = new HashSet<Integer>();
135 
136         // 输入单词为空
137         if (null == word || word.length() == 0) {
138             return wordSet;
139         }
140 
141         int k = TrieNode.getCharPosition(word.charAt(0));
142         // 单词里某个字符在树种不存在,说明没有该单词
143         if (root.childNodes[k] == null) {
144             return wordSet;
145         }
146 
147         word = word.substring(1);
148 
149         if (word.length() == 0) {
150             wordSet = root.childNodes[k].wordSet;
151         } else {
152             wordSet = searchPrefixWord(root.childNodes[k], word);
153         }
154         return wordSet;
155     }
156 
157     /**
158      * 统计给定单词出现的次数
159      */
160     public int wordCount(String word) {
161         return wordCount(trieRoot, word);
162     }
163 
164     public int wordCount(TrieNode root, String word) {
165 
166         // 输入单词为空
167         if (null == word || word.length() == 0) {
168             return 0;
169         }
170 
171         int k = TrieNode.getCharPosition(word.charAt(0));
172         // 单词里某个字符在树种不存在,说明没有该单词
173         if (root.childNodes[k] == null) {
174             return 0;
175         }
176 
177         int count = 0;
178         word = word.substring(1);
179 
180         if (word.length() == 0) {
181             count = root.childNodes[k].freq;
182         } else {
183             count = wordCount(root.childNodes[k], word);
184         }
185 
186         return count;
187     }
188 }
189 
190 /**
191  * Trie树的节点<br>
192  * 假定单词都由26个英文字母组成,Trie树根节点不存字符
193  */
194 class TrieNode {
195     // 孩子节点
196     public TrieNode[] childNodes;
197     // 该节点的字符
198     public char nodeChar;
199 
200     // 以该节点为结束的单词的词频
201     public int freq;
202     // 包含该节点的单词的id
203     public HashSet<Integer> wordSet;
204 
205     // 初始化
206     public TrieNode() {
207         childNodes = new TrieNode[CHILD_NUM];
208         freq = 0;
209         wordSet = new HashSet<Integer>();
210     }
211 
212     private static final int CHILD_NUM = 26;
213 
214     public static int getCharPosition(char ch) {
215         return (ch - 'a');
216     }
217 }
View Code

测试:

 1 package cn.edu.buaa.trie;
 2 
 3 /**
 4  * @author zsm
 5  * @date 2016年10月25日 下午3:12:02
 6  * @version 1.0
 7  * @parameter
 8  * @return
 9  */
10 public class Main_Trie {
11 
12     public static void main(String[] args) {
13         // TODO Auto-generated method stub
14         Trie trie = new Trie();
15         String wd1 = "ab";
16         String wd2 = "ac";
17         String wd3 = "acd";
18 
19         String wd4 = "add";
20 
21         trie.addWord(wd1, 1);
22         trie.addWord(wd2, 2);
23         trie.addWord(wd2, 3);
24         trie.addWord(wd3, 4);
25 
26         // wd1,wd2,wd2,wd3
27         System.out.println(trie.wordCount(wd2));// 2
28         System.out.println(trie.wordCount(wd3));// 1
29         System.out.println(trie.getTreeSize());// 4
30         System.out.println();
31 
32         trie.deleteWord(wd3, 4);
33         // wd1,wd2,wd2
34         System.out.println(trie.wordCount(wd2));// 2
35         System.out.println(trie.wordCount(wd3));// 0
36         System.out.println(trie.getTreeSize());// 3
37         System.out.println();
38 
39         trie.addWord(wd3, 4);
40         // wd1,wd2,wd2,wd3
41         System.out.println(trie.wordCount(wd2));// 2
42         System.out.println(trie.wordCount(wd3));// 1
43         System.out.println(trie.getTreeSize());// 4
44         System.out.println();
45 
46         trie.deleteWord(wd2, 2);
47         trie.deleteWord(wd2, 3);
48         // wd1,wd3
49         System.out.println(trie.wordCount(wd2));// 0
50         System.out.println(trie.wordCount(wd3));// 1
51         System.out.println(trie.getTreeSize());// 4
52         System.out.println(trie.searchPrefixWord("a"));// [1,4]
53         System.out.println();
54 
55         trie.updateWord(wd3, wd4, 4);
56         // wd1,wd3
57         System.out.println(trie.searchPrefixWord("a"));// [1,4]
58         System.out.println(trie.wordCount(wd2));// 0
59         System.out.println(trie.wordCount(wd3));// 1
60         System.out.println(trie.wordCount(wd4));// 0
61         System.out.println(trie.getTreeSize());// 4
62         System.out.println();
63 
64         trie.updateWord(wd4, wd3, 4);
65         // wd1,wd4
66         System.out.println(trie.searchPrefixWord("a"));// [1,4]
67         System.out.println(trie.wordCount(wd2));// 0
68         System.out.println(trie.wordCount(wd3));// 0
69         System.out.println(trie.wordCount(wd4));// 1
70         System.out.println(trie.getTreeSize());// 4
71         System.out.println();
72     }
73 }
View Code

3、参考资料

http://www.cnblogs.com/huangxincheng/archive/2012/11/25/2788268.html

原文地址:https://www.cnblogs.com/z-sm/p/5997286.html