将一个无法一次读入内存的大文件排序

弄了一整天才弄出来 直接上代码

  1 package com.test;
  2 
  3 import java.io.BufferedReader;
  4 import java.io.BufferedWriter;
  5 import java.io.File;
  6 import java.io.FileReader;
  7 import java.io.FileWriter;
  8 import java.io.IOException;
  9 import java.util.Collections;
 10 import java.util.Comparator;
 11 import java.util.Iterator;
 12 import java.util.LinkedList;
 13 import java.util.List;
 14 import java.util.PriorityQueue;
 15 import java.util.Random;
 16 
 17 /**
 18  * 
 19  * @author wangyuyuan
 20  * 将一个大文件中的数据排序 无法一次读入内存情况的处理方法
 21  *
 22  */
 23 public class LargeDataSortTest {
 24     static File file = new File("E:"+File.separator+"dataTest"+File.separator+"data.txt");
 25     static File file1 = new File("E:"+File.separator+"dataTest"+File.separator+"dataSorted.txt");
 26     public static void main(String[] args) throws Exception{
 27         createData();
 28         System.out.println("大文件写入成功");
 29         separateFile();
 30         System.out.println("文件拆分成功");
 31     
 32         everySingleFileSort();
 33         System.out.println("小文件排序完成");
 34         mergeFile();
 35         System.out.println("所有排序都已完成");
 36         
 37     }
 38     public static void createData() throws IOException{
 39         FileWriter fw = new FileWriter(file);
 40         BufferedWriter bw = new BufferedWriter(fw);
 41         Random random = new Random();
 42         for(int i=0;i<1000000;i++){
 43             bw.write(random.nextInt(Integer.MAX_VALUE)+"
");
 44         }
 45         bw.close();
 46         fw.close();
 47     }
 48     public static void separateFile() throws IOException{
 49         FileReader fr = new FileReader(file);
 50         BufferedReader br = new BufferedReader(fr);
 51         FileWriter fw = null;
 52         BufferedWriter bw = null;
 53         List<FileWriter> fwList = new LinkedList<FileWriter>();
 54         List<BufferedWriter> bwList = new LinkedList<BufferedWriter>();
 55         for(int i=0;i<20;i++){
 56             fw = new FileWriter("E:"+File.separator+"dataTest"+File.separator+"data"+i+".txt");
 57             bw = new BufferedWriter(fw);
 58             //把对象放入集合
 59             fwList.add(fw);
 60             bwList.add(bw);
 61         }
 62         
 63         while(br.ready()){
 64             for(Iterator<BufferedWriter> iterator=bwList.iterator();iterator.hasNext();){
 65                 BufferedWriter it = iterator.next();
 66                 it.write(br.readLine()+"
");
 67                 continue;//第一个bw读完后让下一个读 然后写入小文件
 68             }
 69         }
 70         br.close();
 71         fr.close();
 72         //遍历关闭所有子文件流   
 73         for (Iterator iterator = bwList.iterator(); iterator.hasNext();) {
 74             BufferedWriter it = (BufferedWriter) iterator.next();
 75             it.close();
 76         }
 77         
 78         for (Iterator iterator = fwList.iterator(); iterator.hasNext();) {
 79             FileWriter it = (FileWriter) iterator.next();
 80             it.close();
 81         }
 82     }
 83     //对每个小文件进行排序
 84     public static void everySingleFileSort() throws Exception{
 85         LinkedList<Integer> numbers ;
 86         for(int i=0;i<20;i++){
 87             numbers = new LinkedList<Integer>();
 88             String path = "E:"+File.separator+"dataTest"+File.separator+"data"+i+".txt";
 89             FileReader fr = new FileReader(path);
 90             BufferedReader br = new BufferedReader(fr);
 91             while(br.ready()){
 92                 numbers.add(Integer.parseInt(br.readLine()));
 93             }
 94             Collections.sort(numbers);
 95             numbersWrite(numbers,path);
 96             br.close();
 97             fr.close();
 98         }
 99     }
100     //将排好序的没个文件写回到小文件中
101     public static void numbersWrite(LinkedList<Integer> numbers,String path) throws IOException{
102         FileWriter fw  = new FileWriter(path);
103         BufferedWriter bw = new BufferedWriter(fw);
104         for(Iterator<Integer> iterator=numbers.iterator();iterator.hasNext();){
105             Integer num = (Integer)iterator.next();
106             bw.write(num+"
");
107         }
108         bw.close();
109         fw.close();
110     }
111     //再将所有小文件整合到一个大文件中
112     public static void mergeFile() throws Exception{
113         PriorityQueue<Obj> queue = new PriorityQueue<Obj>(20,new Obj());
114         FileReader fr = null;
115         BufferedReader br = null;
116         FileWriter fw = new FileWriter(file1);
117         BufferedWriter bw = new BufferedWriter(fw);
118         List<FileReader> frList = new LinkedList<FileReader>();
119         List<BufferedReader> brList = new LinkedList<BufferedReader>();
120         int n;
121         for(int i=0;i<20;i++){
122             String path = "E:"+File.separator+"dataTest"+File.separator+"data"+i+".txt";
123             fr = new FileReader(path);
124             br = new BufferedReader(fr);
125             frList.add(fr);
126             brList.add(br);
127         }
128         //把每个小文件的第一个数读入队列中
129         for(int i=0;i<=20;i++){
130             BufferedReader buffR;
131             if(i==20){
132                 while(queue.size()!=0){
133                     Obj obj = queue.poll();
134                     bw.write(obj.a+"
");
135                     buffR = brList.get(obj.b);
136                     while(buffR.ready()&&queue.size()<20){
137                         n = Integer.parseInt(buffR.readLine());
138                         queue.add(new Obj(n,obj.b));
139                     }
140                 }
141                 break;
142             }
143             buffR = brList.get(i);
144             while(buffR.ready()&&queue.size()<20){
145                 n = Integer.parseInt(buffR.readLine());
146                 Obj obj = new Obj(n,i);
147                 queue.add(obj);
148                 break;
149             }
150         }
151         bw.close();
152         fw.close();
153         //遍历关闭所有子文件流   
154         for (Iterator iterator = brList.iterator(); iterator.hasNext();) {
155             BufferedReader it = (BufferedReader) iterator.next();
156             it.close();
157         }
158         
159         for (Iterator iterator = frList.iterator(); iterator.hasNext();) {
160             FileReader it = (FileReader) iterator.next();
161             it.close();
162         }
163     }
164 }
165 
166 class Obj implements Comparator<Obj>{
167     int a,b;
168     Obj(){}
169     Obj(int a,int b){
170         this.a =a;
171         this.b=b;
172     }
173     public int compare(Obj o1, Obj o2) {
174         return o1.a-o2.a;
175     }
176 }
原文地址:https://www.cnblogs.com/yaoboyyao/p/3663121.html