java-读取文件并统计文本中字母和单词出现的频率

一、统计字母的频率,并按照由大到小的频率输出

package org.yuan.HelloWorld;

import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.List;

public class TestFile1 {
    public static void main(String[] args)throws IOException
    {
        List<Integer> list=new ArrayList<>();
        DecimalFormat df=new DecimalFormat("######0.00");  
        FileInputStream fip = new FileInputStream("d:/Harry Potter and the Sorcerer's Stone.txt");
        InputStreamReader reader = new InputStreamReader(fip, "gbk");
        StringBuffer sb = new StringBuffer();
        while (reader.ready()) {
            sb.append((char) reader.read());
        }
        reader.close();
        fip.close();
        
        int i;
        String A=sb.toString();
        String M="abcdefghijklmnopqrstuvwxyz";
        char NUM[]=new char[A.length()];
        char Z[]=new char[26];
        int X[]=new int[26];
        Z=M.toCharArray();
        for(int k=0;k<26;k++)
        {
            X[k]=0;
        for(i=0;i<A.length();i++)
        {
            NUM[i]=A.charAt(i);
            if(Z[k]==NUM[i]||Z[k]==ch(NUM[i]))
            {
                X[k]++;
            }
        }
        }
        double sum=0;
        for(i=0;i<25;i++)
            for(int k=0;k<25-i;k++)
            {
                if(X[k]<X[k+1])
                {
                    int temp2=X[k];
                    X[k]=X[k+1];
                    X[k+1]=temp2;
                    char temp3=Z[k];
                    Z[k]=Z[k+1];
                    Z[k+1]=temp3;
                }
            }
        for(i=0;i<26;i++)
        {
        sum=sum+X[i];
        }
        System.out.println("一共有"+sum+"个字母。");
        System.out.println("各字母频率如下:");
        for(i=0;i<26;i++)
        {
            double jkl=(X[i])/sum*100;
            System.out.println(Z[i]+":"+df.format(jkl)+"%");
        }
    }
    static char ch(char c)
    {
        if(!(c>=97&&c<=122))
            c+=32;
        return c;
    }
    
    
    
    

}

二、统计单词的数量,并输出出现频率最高的前N个单词(N手动输入)

package org.yuan.HelloWorld;

import java.io.*;
import java.util.*;
import java.util.Map.Entry;

public class TestFile2
{
    public static int n=0;
    public static void main(String[] args) {
    Scanner sc=new Scanner(System.in);
    String s;
    int count=0;
    int num=1;
    //作为FileReader和FileWriter读取的对象
    String file1="d:/Harry Potter and the Sorcerer's Stone.txt";
    try
    {
      BufferedReader a=new BufferedReader(new FileReader(file1));
      StringBuffer c=new StringBuffer();
      //将文件内容存入StringBuffer中
      while((s = a.readLine()) != null) 
      {
            //用于拼接字符串
            c.append(s);
      }
      //将StringBuffer转换成String,然后再将所有字符转化成小写字符
      String m=c.toString().toLowerCase();
      //匹配由数字和26个字母组成的字符串
      String [] d=m.split("[^a-zA-Z0-9]+");
      //遍历数组将其存入Map<String, Integer>中
      Map<String , Integer> myTreeMap=new  TreeMap<String, Integer>();
      for(int i = 0; i < d.length; i++) {
          //containsKey()方法用于检查特定键是否在TreeMap中映射
            if(myTreeMap.containsKey(d[i])) {
                count = myTreeMap.get(d[i]);
                myTreeMap.put(d[i], count + 1);
            }
            else {
                myTreeMap.put(d[i], 1);
            }
        } 
    //通过比较器实现排序
      List<Map.Entry<String, Integer>> list = new ArrayList<Map.Entry<String, Integer>>(myTreeMap.entrySet());
    //按降序排序
      Collections.sort(list, new Comparator<Map.Entry<String, Integer>>() {
          
            public int compare(Entry<String, Integer> k1, Entry<String, Integer> k2) {
                //返回两个单词出现次数较多的那个单词的出现次数
                return k2.getValue().compareTo(k1.getValue());
            }
            
        });
       System.out.println("请输入N:");
        n=sc.nextInt();
      for(Map.Entry<String, Integer> map : list) {
            if(num <= n) {
                //输出到程序控制台
                System.out.println(map.getKey() + ":" + map.getValue());
                num++;
            }
            //输出完毕退出
            else break;
        }
        //关闭文件指针
        a.close();
       // b.close();
    }
    catch(FileNotFoundException e)
    {
        System.out.println("找不到指定文件");
    }
    catch(IOException e)
    {
        System.out.println("文件读取错误");
    }
}
}
原文地址:https://www.cnblogs.com/tianwenjing123-456/p/11808127.html