单词 统计续

第1步:输出单个文件中的前 N 个最常出现的英语单词。

功能1:输出文件中所有不重复的单词,按照出现次数由多到少排列,出现次数同样多的,以字典序排列。

功能2: 指定文件目录,对目录下每一个文件执行统计的操作。 

功能3:指定文件目录,是会递归遍历目录下的所有子目录的文件进行统计单词的功能。

功能4:输出出现次数最多的前 n 个单词,

package test;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Scanner;
import java.util.StringTokenizer;

public class test2 {
public static void main(String[] args)throws IOException
{
    
    List<Integer> list=new ArrayList<>();
    Scanner scan=new Scanner(System.in);
    File f = new File("D:\飘c1.txt");
     FileInputStream fip = new FileInputStream(f);
    InputStreamReader reader = new InputStreamReader(fip, "gbk");
    StringBuffer sb = new StringBuffer();
    while (reader.ready()) {
        sb.append((char) reader.read());
    }

    reader.close();
    fip.close();
    int i;
    int option=10;
    while(option!=0)
    {
         System.out.println("1、统计字母的个数  2、统计单词个数 3、统计出现最多次数的几个单词  4、统计删除无用表后的单词 0、退出");
        option=scan.nextInt();
    if(option==1)
  tongjizimu(sb.toString());
   if(option==2)
       tongjidanci(sb.toString());
   if(option==3)
   {
       int sum1=0;
       System.out.println("显示前n个出现最多的单词,请输入n");
       sum1=scan.nextInt();
       tongjidanci1(sb.toString(),sum1);
   }
    
    if(option==4) {
         tongjidanci2(sb.toString());
    }
    if(option==0)
    {
        System.out.println("已退出。");
    }
}}
static char ch(char c)
{
    if(!(c>=97&&c<=122))
        c+=32;
    return c;
}
static String[] StatList(String str) {
       StringBuffer sb = new StringBuffer();
       HashMap<String ,Integer> has = new HashMap<String ,Integer> (); // 打开一个哈希表
       String[] slist = str.split("\W+");
       int sum=0;
       int sum1=0;
       for (int i = 0; i < slist.length; i++) {
             if (!has.containsKey(slist[i])) { // 若尚无此单词
                     has.put(slist[i], 1);
                     sum++;
                     sum1++;
             } else {//如果有,就在将次数加1
                    Integer nCounts = has.get(slist[i]);
                    
                     has.put(slist[i],nCounts+1 );
            }
         }
       int temp=0;
       int temp1=0;
       String []a=new String[sum];
       int []b=new int[sum1];
       Iterator iterator = has.keySet().iterator();
          while(iterator.hasNext()){
                 String word = (String) iterator.next();
                 a[temp]=word;
                 temp++;
          }
          return a;
}
static int[] StatList1(String str) {
       StringBuffer sb = new StringBuffer();
       HashMap<String ,Integer> has = new HashMap<String ,Integer> (); // 打开一个哈希表
       String[] slist = str.split("\W+");
       int sum=0;
       int sum1=0;
       for (int i = 0; i < slist.length; i++) {
             if (!has.containsKey(slist[i])) { // 若尚无此单词
                     has.put(slist[i], 1);
                     sum++;
                     sum1++;
             } else {//如果有,就在将次数加1
                    Integer nCounts = has.get(slist[i]);
                     has.put(slist[i],nCounts+1 );
                     
            }
         }
       int temp=0;
       int temp1=0;
       String []a=new String[sum];
    int []b=new int[sum1];
       Iterator iterator = has.keySet().iterator();
          while(iterator.hasNext()){
                 String word = (String) iterator.next();
                 b[temp1]=has.get(word);
                 temp1++;
          }
          return b;
}
public static void tongjizimu(String a)
{
    DecimalFormat df=new DecimalFormat("######0.00");
    int i;
    String A=a;
    String M="abcdefghijklmnopqrstuvwxyz";
   String temp = "";
    char NUM[]=new char[A.length()];
    char Z[]=new char[26];
    int X[]=new int[26];
    int MAX=0;
    Z=M.toCharArray();
    for(int k=0;k<26;k++)
    {
        X[k]=0;
    for(i=0;i<A.length();i++)
    {
        NUM[i]=A.charAt(i);
        if(Z[k]==NUM[i]||Z[k]==ch(NUM[i]))
        {
            X[k]++;
        }
    }
    }
    System.out.println("这篇文章中英文字母个数分别为:");
    double sum=0;
    System.out.println("排序如下:");
    for(i=0;i<25;i++)
        for(int k=0;k<25-i;k++)
        {
            if(X[k]<X[k+1])
            {
                int temp2=X[k];
                X[k]=X[k+1];
                X[k+1]=temp2;
                char temp3=Z[k];
                Z[k]=Z[k+1];
                Z[k+1]=temp3;
            }
        }
    for(i=0;i<26;i++)
    {
    System.out.println(Z[i]+"字母个数为:"+X[i]);
    sum=sum+X[i];
    }
    for(i=0;i<26;i++)
    {
        double jkl=(X[i])/sum*100;
        System.out.println(Z[i]+"字母频率为:"+df.format(jkl)+"%");
    }
}
public static void tongjidanci(String a)
{
    int i;
     StringTokenizer st = new StringTokenizer(a,",.! 
");
        String []a1=StatList(a);
        int[]b1=StatList1(a);
        System.out.println("//////////////////////////////");
        for(i=0;i<a1.length-1;i++)
            for(int j=0;j<a1.length-1-i;j++)
            {
                if(b1[j]<b1[j+1])
                {
                    int temp6=b1[j];
                    b1[j]=b1[j+1];
                    b1[j+1]=temp6;
                    String temp7=a1[j];
                    a1[j]=a1[j+1];
                    a1[j+1]=temp7;
                }
            }
       for(i=0;i<a1.length-1;i++)
       {
           System.out.println("单词:"+a1[i]+"  且出现的次数:"+b1[i]);
       }
}
public static void tongjidanci1(String a,int n)
{
    int i;
     StringTokenizer st = new StringTokenizer(a,",.! 
");
        String []a1=StatList(a);
        int[]b1=StatList1(a);
        System.out.println("//////////////////////////////");
        for(i=0;i<a1.length-1;i++)
            for(int j=0;j<a1.length-1-i;j++)
            {
                if(b1[j]<b1[j+1])
                {
                    int temp6=b1[j];
                    b1[j]=b1[j+1];
                    b1[j+1]=temp6;
                    String temp7=a1[j];
                    a1[j]=a1[j+1];
                    a1[j+1]=temp7;
                }
            }
       for(i=0;i<n;i++)
       {
           System.out.println("单词:"+a1[i]+"  且出现的次数:"+b1[i]);
       }
}
public static void tongjidanci2(String a)
{
    int i;
     StringTokenizer st = new StringTokenizer(a,"");
        String []a1=StatList(a);
        int[]b1=StatList1(a);
        System.out.println("//////////////////////////////");
        for(i=0;i<a1.length-1;i++)
            for(int j=0;j<a1.length-1-i;j++)
            {
                if(b1[j]<b1[j+1])
                {
                    int temp6=b1[j];
                    b1[j]=b1[j+1];
                    b1[j+1]=temp6;
                    String temp7=a1[j];
                    a1[j]=a1[j+1];
                    a1[j+1]=temp7;
                }
            }
       for(i=0;i<a1.length-1;i++)
       {                                                        
           System.out.println("单词:"+a1[i]+"  且出现的次数:"+b1[i]);
       }
}

}

原文地址:https://www.cnblogs.com/zlj843767688/p/11001201.html