读取JDK API文档,并根据单词出现频率排序

1,拿到 API 文档

登录 https://docs.oracle.com/javase/8/docs/api/

选中特定的类,然后 copy 其中的内容,

放入 TXT 文件中 ,

2,读取TXT内容,并排序

package com.lgx.test;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.TreeMap;

public class ReadVectorAPI {

	public static String filename = "src/Vector.txt";
	public static StringBuffer sb = null;

	public static void main(String[] args) {
		try {
			//根据单词字母排序
			countWordOrderByWord();
			//根据单词频率排序
			countWordOrderByCount();
		} catch (FileNotFoundException e) {
			e.printStackTrace();
		}
	}

	// 计算单词的出现次数,依据单词排序
	public static void countWordOrderByWord() throws FileNotFoundException {
		BufferedReader br = null;
		try {
			br = new BufferedReader(new FileReader(new File(filename)));
			String inputLine = null;
			TreeMap<String, Integer> map = new TreeMap<String, Integer>();
			while ((inputLine = br.readLine()) != null) {
				String[] words = inputLine.split("[ 
	
.,;:!?(){}]");
				for (int i = 0; i < words.length; i++) {
					String key = words[i].toLowerCase();
					if (key.length() > 0) {
						if (map.get(key) == null) {
							map.put(key, 1);
						} else {
							int times = map.get(key).intValue();
							times++;
							map.put(key, times);
						}
					}
				}
			}
			Set<Map.Entry<String, Integer>> entrySet = map.entrySet();
			System.out.println("=====根据单词字母排序=====");
			for (Map.Entry<String, Integer> entry : entrySet) {
				System.out.println(entry.getKey() + " 在API文档中出现了 " + entry.getValue() + " 次");
			}
		} catch (IOException e) {
			e.printStackTrace();
		}
	}

	// 计算单词的出现次数,依据次数排序
	public static void countWordOrderByCount() throws FileNotFoundException {
		BufferedReader br = null;
		try {
			br = new BufferedReader(new FileReader(new File(filename)));
			String inputLine = null;
			TreeMap<String, Integer> map = new TreeMap<String, Integer>();
			while ((inputLine = br.readLine()) != null) {
				String[] words = inputLine.split("[ 
	
.,;:!?(){}]");
				for (int i = 0; i < words.length; i++) {
					String key = words[i].toLowerCase();
					if (key.length() > 0) {
						if (map.get(key) == null) {
							map.put(key, 1);
						} else {
							int times = map.get(key).intValue();
							times++;
							map.put(key, times);
						}
					}
				}
			}

			List<Map.Entry<String, Integer>> list = new ArrayList<Map.Entry<String, Integer>>(map.entrySet());
			Collections.sort(list, new Comparator<Map.Entry<String, Integer>>() {
				public int compare(Entry<String, Integer> o1, Entry<String, Integer> o2) {
					return o2.getValue().compareTo(o1.getValue());
				}
			});
			System.out.println("=====根据单词频率排序=====");
			for (Map.Entry<String, Integer> mapping : list) {
				System.out.println(mapping.getKey() + " 在API文档中出现了 " + mapping.getValue() + " 次");
			}
		} catch (IOException e) {
			e.printStackTrace();
		}
	}
}

输出结果前部分截图为;

原文地址:https://www.cnblogs.com/lgx211/p/10181737.html