单词统计

1统计字母出现的概率(不分大小写)

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.text.DecimalFormat;
import java.text.NumberFormat;

public class a {

    private static NumberFormat nf = new DecimalFormat("0.00");

    public static void main(String[] args) throws IOException {
        BufferedReader br = new BufferedReader(new FileReader("f:/piao.txt"));
        StringBuilder sb = new StringBuilder();
        while (true) {
            String line = br.readLine();
            if (line == null)
                break;
            sb.append(line);
        }
        br.close();
        int[] characters = new int[128];
        for (byte bt : sb.toString().getBytes())
            if (bt > 0 && bt < 128)
                characters[bt]++;

        int totalCount = 0;
        for (int i = 'a'; i <= 'z'; i++) {
            totalCount += (characters[i] + characters[i - 32]);
        }
        double c;
        
      

        for (int i = 'a'; i <= 'z'; i++) {
            c=(double)(Math.round((characters[i] + characters[i - 32]) * 100 / totalCount)/100.0);
        
            System.out.print((char) i + "=" + (characters[i] + characters[i - 32]) + "(" +c + "),");
             
        }

    }
}

截图:

2统计单词出现的次数

import java.io.BufferedReader;
import java.io.FileReader;
import java.util.*;


public class b {
    public static void main(String [] args) throws Exception {
    	
        BufferedReader br = new BufferedReader(new FileReader("f:/飘c1.txt"));

        StringBuffer sb = new StringBuffer();
        String text =null;
        while ((text=br.readLine())!= null){
            sb.append(text);// 将读取出的字符追加到stringbuffer中
        }
        br.close();  // 关闭读入流

        String str = sb.toString().toLowerCase(); // 将stringBuffer转为字符并转换为小写
        String[] words = str.split("[^(a-zA-Z)]+");  // 非单词的字符来分割,得到所有单词
        Map<String ,Integer> map = new HashMap<String, Integer>() ;

        for(String word :words){
            if(map.get(word)==null ){  // 若不存在说明是第一次,则加入到map,出现次数为1
                map.put(word,1);
            }else
            {
                map.put(word,map.get(word)+1);  // 若存在,次数累加1
            }
        }

        // 排序
        List<Map.Entry<String ,Integer>> list = new ArrayList<Map.Entry<String,Integer>>(map.entrySet());

        Comparator<Map.Entry<String,Integer>> comparator = new Comparator<Map.Entry<String, Integer>>() {
            public int compare(Map.Entry<String, Integer> left, Map.Entry<String, Integer> right) {
                return (left.getValue().compareTo(right.getValue()));
            }
        };
        // 集合默认升序升序
        Collections.sort(list,comparator);

        for(int i=0;i<list.size();i++){// 由高到低输出
            System.out.println(list.get(list.size()-i-1).getKey() +":"+list.get(list.size()-i-1).getValue());
        }

    }
}

  截图

3去除无用词

public class English_word {
 
 
 public static void main(String[] args) throws FileNotFoundException {
  File file = new File("D:\Englis_letter.txt");// 读取文件
  String words[] = new String [100000];
  int out_words[] = new int [100000];
  if (!file.exists()) {// 如果文件打不开或不存在则提示错误
   System.out.println("文件不存在");
   return;
  }
  Scanner x = new Scanner(file);
  HashMap<String, Integer> hashMap = new HashMap<String, Integer>();
  while (x.hasNextLine()) {
   String line = x.nextLine();
   String[] lineWords = line.split("[\s+	”“();,.?!
]");
   Set<String> wordSet = hashMap.keySet();
   for (int i = 0; i < lineWords.length; i++) {
    if (wordSet.contains(lineWords[i])) {
     Integer number = hashMap.get(lineWords[i]);
     number++;
     hashMap.put(lineWords[i], number);
    } else {
     hashMap.put(lineWords[i], 1);
    }
   }
  }
  Iterator<String> iterator = hashMap.keySet().iterator();
  int max = 0,i=0;
  while (iterator.hasNext()) {
   String word = iterator.next();
   if(!"".equals(word)&&word!=null&&!"a".equals(word)&&!"the".equals(word)&&!"  ".equals(word)) {
    System.out.println(word);
    words[i]=word;
    out_words[i]=hashMap.get(word);
    i++;
   }
  }
  int change=0;
  String change_word=null;
  for(int j=0;j<=i;j++)
  {
   for(int k=j;k<=i;k++)
   {
    if(out_words[k]>out_words[j])
    {
     change=out_words[j];
     change_word=words[j];
     out_words[j]=out_words[k];
     words[j]=words[k];
     out_words[k]=change;
     words[k]=change_word;
    }
    
   }
  }
  Scanner scan = new Scanner(System.in);
  int ms = scan.nextInt();
  for(int j=0;j<ms;j++)
  {
   System.out.println(words[j]+" 出现次数:"+out_words[j]);
  }
  
 }

  

4遍历文件统计

public class test {
 
    static String words[] = new String [100000];
    static   int out_words[] = new int [100000];
    static int i=0;
    static HashMap<String, Integer> hashMap = new HashMap<String, Integer>();
 
 public static void English_words(File ms) throws FileNotFoundException {
  File file = new File(ms.toString());// 读取文件
  if (!file.exists()) {// 如果文件打不开或不存在则提示错误
   System.out.println("文件不存在");
   return;
  }
  Scanner x = new Scanner(file);
  while (x.hasNextLine()) {
   String line = x.nextLine();
   String[] lineWords = line.split("[\s+	”“();,.?!
]");
   Set<String> wordSet = hashMap.keySet();
   for (int i = 0; i < lineWords.length; i++) {
    if (wordSet.contains(lineWords[i])) {
     Integer number = hashMap.get(lineWords[i]);
     number++;
     hashMap.put(lineWords[i], number);
    } else {
     hashMap.put(lineWords[i], 1);
    }
   }
  }
  
  
  
 }
 
 public static void main(String[] args) throws FileNotFoundException {
  String path = "d:/";
  File file = new File(path);
  File[] tempList = file.listFiles();
  for (int i = 0; i < tempList.length; i++) {
   if (tempList[i].toString().endsWith("txt")) {
    System.out.println("文     件:" + tempList[i]);
    English_words(tempList[i]);
   }
  }
  Iterator<String> iterator = hashMap.keySet().iterator();
  int max = 0;
  while (iterator.hasNext()) {
   String word = iterator.next();
   if(!"".equals(word)&&word!=null&&!"a".equals(word)&&!"the".equals(word)&&!"  ".equals(word)) {
    words[i]=word;
    out_words[i]=hashMap.get(word);
    i++;
   }
  }
  int change=0;
  String change_word=null;
  for(int j=0;j<=i;j++)
  {
   for(int k=j;k<=i;k++)
   {
    if(out_words[k]>out_words[j])
    {
     change=out_words[j];
     change_word=words[j];
     out_words[j]=out_words[k];
     words[j]=words[k];
     out_words[k]=change;
     words[k]=change_word;
    }
    
   }
  }
  Scanner scan = new Scanner(System.in);
  int ms = scan.nextInt();
  for(int j=0;j<ms;j++)
  {
   System.out.println(words[j]+" 出现次数:"+out_words[j]);
  }
  
 }
}
原文地址:https://www.cnblogs.com/xuange1/p/10994422.html