4.28课堂练习—统计文本文件英文字母单词

统计文本文件26个字母的出现频率

源代码：

 1 package text;
 2 import java.io.BufferedReader;
 3 import java.io.FileReader;
 4 import java.io.IOException;
 5 import java.text.NumberFormat;
 6 
 7 public class word3 {
 8     
 9      private static String formattedDecimalToPercentage(double decimal)
10         {
11             //获取格式化对象
12             NumberFormat nt = NumberFormat.getPercentInstance();
13             //设置百分数精确度2即保留两位小数
14             nt.setMinimumFractionDigits(2);
15             return nt.format(decimal);
16         }
17     
18 public static void main(String []args) {
19     String a1;
20     char a='A';
21     int a2[]=new int[27];
22     char b1[]=new char[26];
23     char b2[]=new char[26];
24     
25     for(int i=0;i<26;i++)
26     {
27     b1[i]=a;
28     b2[i]=(char)(a+32);
29     a++;
30     }    
31     try {
32         BufferedReader in = new BufferedReader(new FileReader("E:\eclipse\p.txt"));
33         String str;
34         while ((str = in.readLine()) != null) {
35            
36             char[] d=str.toCharArray();
37             for(int i=0;i<d.length-1;i++) {
38                 for(int j=0;j<26;j++) {
39                     if(b1[j]==d[i]||b2[j]==d[i]) {
40                         a2[j]++;
41                     }
42                           
43                 }
44                 
45             }         
46         }  
47         a2[26]=0;
48         for(int i=0;i<26;i++) {
49             a2[26]=a2[i]+a2[26];
50         }
51         for(int i=0;i<26;i++) {
52             System.out.print(b2[i]);
53             System.out.print("出现的次数为：");
54             System.out.println(a2[i]);
55             double d=(double)((double)a2[i]/(double)a2[26]);
56             String result2=formattedDecimalToPercentage(d);
57             System.out.println("频率 = "+result2);
58         }
59     } catch (IOException e) {
60     }    
61 }
62 }

运行结果·：

统计文本文档中每个单词出现的次数：

 1 package text;
 2 import java.io.BufferedReader;
 3 import java.io.FileReader;
 4 import java.util.*;
 5 public class word1 {
 6     public static void main(String [] args) throws Exception {
 7 
 8         BufferedReader br = new BufferedReader(new FileReader("E:\eclipse\p.txt"));
 9 
10         StringBuffer sb = new StringBuffer();
11         String text =null;
12         while ((text=br.readLine())!= null){
13             sb.append(text);// 将读取出的字符追加到stringbuffer中
14         }
15         br.close();  // 关闭读入流
16 
17         String str = sb.toString().toLowerCase(); // 将stringBuffer转为字符并转换为小写
18         String[] words = str.split("[^(a-zA-Z)]+");  // 非单词的字符来分割，得到所有单词
19         Map<String ,Integer> map = new HashMap<String, Integer>() ;
20 
21         for(String word :words){
22             if(map.get(word)==null){  // 若不存在说明是第一次，则加入到map,出现次数为1
23                 map.put(word,1);
24             }else{
25                 map.put(word,map.get(word)+1);  // 若存在，次数累加1
26             }
27         }
28 
29         // 排序
30         List<Map.Entry<String ,Integer>> list = new ArrayList<Map.Entry<String,Integer>>(map.entrySet());
31 
32         Comparator<Map.Entry<String,Integer>> comparator = new Comparator<Map.Entry<String, Integer>>() {
33             public int compare(Map.Entry<String, Integer> left, Map.Entry<String, Integer> right) {
34                 return (left.getValue().compareTo(right.getValue()));
35             }
36         };
37         // 集合默认升序升序
38         Collections.sort(list,comparator);
39 
40         for(int i=0;i<list.size();i++){// 由高到低输出
41             System.out.println(list.get(list.size()-i-1).getKey() +":"+list.get(list.size()-i-1).getValue());
42         }
43 
44     }
45 }

运行结果：