Java词频统计

public class WordCount {

	public static void main(String[] args) {
		String[] stopWords = { "", ",", "." };
		List<String> stopWordList = Arrays.asList(stopWords);
		String strWorld = "Regular expressions are used in search engines, search and replace dialogs of word processors and text editors, in text processing utilities such as sed and AWK and in lexical analysis. Many programming languages provide regex capabilities, built-in, or via libraries.";
		String[] words = strWorld.split(" |,|\.");
		System.out.println(Arrays.toString(words));

		List<String> wordList = Arrays.asList(words);
		System.out.println(wordList);

		Multiset<String> wordSet = HashMultiset.create();
		wordSet.addAll(wordList);
		wordSet.removeAll(stopWordList);
		System.out.println("word count:" + wordSet.size());
		System.out.println("unique word count:" + wordSet.elementSet().size());
		for (String key : wordSet.elementSet()) {
			System.out.println(key + ":" + wordSet.count(key));
		}

	}
}
原文地址:https://www.cnblogs.com/acode/p/6812567.html