单词统计

这次的实验测试分为很多个小部分,由于个人能力有限,我只完成了前三个部分。其中第一个是统计文本中26个英文字幕出现的次数与比例,并降序排序:是自行确定欠多少个最多出现的单词

package piao;

import java.io.BufferedReader;
import java.io.FileReader;
import java.text.NumberFormat;

public class text0{
    public static void main(String[] args) throws Exception {
        BufferedReader br = new BufferedReader(new FileReader("D:\java/eclipse/测试/piao.txt"));
        int[] count  = new int[26];
        char[] c = new char[1];
        int len = br.read(c);
        while(len!=-1) {
            if(c[0]>='A'&&c[0]<='Z') {
                int number = c[0];
                count[number-65]++;
            }
            if(c[0]>='a'&&c[0]<='z') {
                int number = c[0];
                count[number-97]++;
            }
            len = br.read(c);
        }
        count=Paixu(count);
        Print(count);
        br.close();    
    }
    public static int[] Paixu(int[] count) {
        int temp;
        int size=count.length;
        for(int i=0;i<size-1;i++) {
            for(int j=i+1;j<size;j++) {
                if(count[i]<count[j]){
                    temp=count[j];
                    count[j]=count[i];
                    count[i]=temp;
                }
            }
        }
        return count;    
    }
    public static void Print(int[] count) {
        NumberFormat numberFormat = NumberFormat.getInstance();     
        // 设置精确到小数点后2位
        numberFormat.setMaximumFractionDigits(2);
        int sum=0;
        for(int i=0;i<count.length;i++) {
            sum=count[i]+sum;
        }
        String[] a=new String[count.length];
        for(int i=0;i<count.length;i++) {
             a[i] = numberFormat.format((float) count[i] / (float) sum * 100);
        }
        for(int i=0;i<26;i++) {
            if(count[i]>0) {
                char lowerCase = (char)(i+97);
                System.out.println(lowerCase+"("+count[i]+")"+"("+a[i]+"%)");
            }
        }    
    }
}

第二部分是统计所有单词出现的次数并降序排序:

package piao;

import java.io.BufferedReader;
import java.io.FileReader;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class text1 {     
    public static void main(String[] args) throws Exception {
        BufferedReader re = new BufferedReader(new FileReader("D:\java/eclipse/测试/piao.txt"));
        StringBuffer buffer = new StringBuffer();
        String line = null;
        while ((line = re.readLine()) != null) {
              buffer.append(line);
        }
        re.close();
        Pattern expression = Pattern.compile("[a-zA-Z]+");// 定义正则表达式匹配单词
        String string = buffer.toString();
        Matcher matcher = expression.matcher(string);
        Map<String, Integer> map = new TreeMap<String, Integer>();
        String word = "";
        int times = 0;
        while (matcher.find()) {// 是否匹配单词
            word = matcher.group();// 得到一个单词-树映射的键
            if (map.containsKey(word)) {// 如果包含该键,单词出现过
                times = map.get(word);// 得到单词出现的次数
                map.put(word, times + 1);
            } 
            else {
                map.put(word, 1);// 否则单词第一次出现,添加到映射中
            }
        }
        List<Map.Entry<String, Integer>>list = new ArrayList<Map.Entry<String,Integer>>(map.entrySet());
        Collections.sort(list, new Comparator<Map.Entry<String, Integer>>(){// 排序,打印
            public int compare(Map.Entry<String, Integer> left,Map.Entry<String, Integer> right) {
                    return (left.getValue()).compareTo(right.getValue());
                }
            });
        int last = list.size() - 1;
        for (int i = last; i > 0; i--) {
            String key = list.get(i).getKey();
            Integer value = list.get(i).getValue();
            System.out.println(key + " :" + value);
        }
    }
}

第三部分是自行确定欠多少个最多出现的单词:

package piao;

import java.io.BufferedReader;
import java.io.FileReader;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.Scanner;
import java.util.TreeMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class text2 {     
    public static void main(String[] args) throws Exception {
        BufferedReader re = new BufferedReader(new FileReader("D:\java/eclipse/测试/piao.txt"));
        StringBuffer buffer = new StringBuffer();
        String line = null;
        while ((line = re.readLine()) != null) {
              buffer.append(line);
        }
        re.close();
        Pattern expression = Pattern.compile("[a-zA-Z]+");// 定义正则表达式匹配单词
        String string = buffer.toString();
        Matcher matcher = expression.matcher(string);
        Map<String, Integer> map = new TreeMap<String, Integer>();
        String word = "";
        int times = 0;
        while (matcher.find()) {// 是否匹配单词
            word = matcher.group();// 得到一个单词-树映射的键
            if (map.containsKey(word)) {// 如果包含该键,单词出现过
                times = map.get(word);// 得到单词出现的次数
                map.put(word, times + 1);
            } 
            else {
                map.put(word, 1);// 否则单词第一次出现,添加到映射中
            }
        }
        List<Map.Entry<String, Integer>>list = new ArrayList<Map.Entry<String,Integer>>(map.entrySet());
        Collections.sort(list, new Comparator<Map.Entry<String, Integer>>(){// 排序,打印
            public int compare(Map.Entry<String, Integer> left,Map.Entry<String, Integer> right) {
                    return (left.getValue()).compareTo(right.getValue());
                }
            });
        @SuppressWarnings("resource")
        Scanner in=new Scanner(System.in);
        System.out.println("输入前n个最常出现的单词:");
        int n=in.nextInt();
        int last = list.size() - 1;
        for (int i = last; i > last - n; i--) {
            String key = list.get(i).getKey();
            Integer value = list.get(i).getValue();
            System.out.println(key + " :" + value);
        }
    }
}
原文地址:https://www.cnblogs.com/yuanxiaochou/p/11065633.html