获取文章中的字母个数 和单词个数

获取文章中各个字母个数的代码 

1 package text1; 
2 import java.io.File; 
3 import java.io.FileReader;
 
4 import java.io.IOException;
 
5 import  java.text.DecimalFormat;

6
public class Text{ 7
8 public static void main(String[] args) { 9 double num=0; 10 int[] freArray = new int[52]; 11 char[] charArray = loadCharArrayFromFileName("D://article//Harry Potter and the Sorcerer's Stone.txt"); 12 computeFrequency(freArray, charArray); 13 for(int i = 0; i < freArray.length; i++) { 14 num+=freArray[i]; 15 } 16 System.out.println(num); 17 DecimalFormat df = new DecimalFormat("######0.00"); 18 for (int i = 0; i < freArray.length/2; i++) { 19 System.out.println(((char) ('a' + i)) + ":" + freArray[i]+" "+df.format((100*freArray[i])/num)+"%"); 20 } 21 for (int i = 26; i < freArray.length; i++) { 22 System.out.println(((char) ('A' + i-26)) + ":" + freArray[i]+" "+df.format((100*freArray[i])/num)+"%"); 23 24 } 25 }26 public static void computeFrequency(int[] freArray, char[] charArray) { 27 for (int i = 0; i < charArray.length; i++) { 28 if (charArray[i] >= 'A' && charArray[i] <= 'Z') { 29 freArray[charArray[i] - 'A'+26]++; 30 } 31 if (charArray[i] >= 'a' && charArray[i] <= 'z') { 32 freArray[charArray[i] - 'a']++; 33 } 34 } 35 } 36 public static char[] loadCharArrayFromFileName(String name) { 37 char[] charArray = new char[5000000]; 38 File file = new File(name); 39 FileReader fr = null; 40 try { 41 fr = new FileReader(file); 42 fr.read(charArray); 43 return charArray; 44 }
catch (IOException e) {
45 e.printStackTrace(); 46 } finally { 47 try { 48 fr.close(); 49 } catch (IOException e) { 50 51 e.printStackTrace(); 52 } 53 } 54 return null; 55 } 56 57 }

程序运行截图:

获取文章中各个单词个数,并输出前n个最常用单词和所有单词的代码:

  1 package text1;
  2 
  3 import java.io.*;
  4 import java.util.ArrayList;
  5 import java.util.Collections;
  6 import java.util.Comparator;
  7 import java.util.Date;
  8 import java.util.HashMap;
  9 import java.util.List;
 10 import java.util.Map;
 11 import java.util.Scanner;
 12 import java.util.Set;
 13 import java.util.TreeMap;
 14 import java.util.stream.Collectors;
 15 
 16 
 17 public class Test2 {
 18 
 19 //找到目标文件,创建字符输入流对象,
 20 public static Reader findFile(){
 21    File f=new File("D://article//Harry Potter and the Sorcerer's Stone.txt");
 22    Reader in=null;
 23    try{
 24       in=new FileReader(f);
 25    }catch(IOException e){
 26       e.printStackTrace();
 27    }
 28    return in;
 29 }
 30 //缓存流
 31 public static BufferedReader inputPipe(Reader in){
 32    BufferedReader br=null;
 33    br=new BufferedReader(in);
 34    return br;
 35 }
 36 //读取文章内容
 37 public static String readAll(BufferedReader br,Reader in){
 38    String str;
 39    Map<String,Integer> map=new HashMap<>();
 40    StringBuilder words=null;
 41    String allwords=null;
 42    try {
 43       StringBuilder sb = new StringBuilder();
 44    while ((str = br.readLine()) != null) {
 45 
 46       words = sb.append(str);
 47       allwords=sb.toString();
 48    }
 49    br.close();
 50    in.close();
 51    }catch(IOException e){
 52       e.printStackTrace();
 53    }
 54 
 55    return allwords;
 56 }
 57 
 58 public static void spiltAndCount(String allwords, Map<String,Integer> map) {
 59    String regex = "\W+";
 60    String[] words = allwords.split(regex);//截获单词,并存放到数组中
 61    for (int i = 0; i < words.length; i++) {
 62       if (map.containsKey(words[i])) {
 63       map.put(words[i], map.get(words[i])+1);
 64    } else {
 65       map.put(words[i], 1);
 66    }
 67    }
 68    Set<String> keys = map.keySet();
 69 
 70    System.out.println("总单词数:"+words.length);
 71    List<Map.Entry<String, Integer>> list = new ArrayList<Map.Entry<String, Integer>>(map.entrySet()); //转换为list
 72    //按照 value值排序
 73    Collections.sort(list, new Comparator<Map.Entry<String, Integer>>() {
 74    @Override
 75    public int compare(Map.Entry<String, Integer> o1, Map.Entry<String, Integer> o2) {
 76       return o2.getValue().compareTo(o1.getValue());
 77    }
 78    });
 79    int n;
 80    n=list.size();
 81    /*System.out.println("你想获取前几个单词?");
 82 
 83    Scanner cin=new Scanner(System.in);
 84    n=cin.nextInt();
 85    */
 86    for (int i = 0; i < n; i++) {
 87       System.out.println(list.get(i).getKey() + ": " + list.get(i).getValue());
 88    } 
 89 
 90 }
 91 
 92 
 93 public static void main(String[] args) {
 94    long star =System.currentTimeMillis();
 95 
 96    Map<String,Integer> map=new HashMap<>();
 97    Reader in= Test2.findFile();
 98    BufferedReader br=Test2.inputPipe(in);
 99    String allwords= Test2.readAll(br,in);
100    Test2.spiltAndCount(allwords, map);
101    long end=System.currentTimeMillis();
102 
103 }
104 }

  程序运行截图:

原文地址:https://www.cnblogs.com/wendi/p/11805178.html