一分三十一秒生成十亿行随机数的文本文件

程序短而简单,稍微可以说一下的是如果一行行写要三四分钟,采用了一次写十万行共写十万次的方式是提高效率的关键。

代码:

import java.io.BufferedWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.nio.charset.Charset;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.StandardOpenOption;

public class BigFileProducer {
    public static void main(String[] args) {
        long startMs=System.currentTimeMillis();
        Path filepath=Paths.get("c:\temp\20200229.txt");
        
        Charset u8set=Charset.forName("utf-8");
        
        try(BufferedWriter shadowWriter=Files.newBufferedWriter(filepath, u8set,StandardOpenOption.CREATE)){
            PrintWriter writer=new PrintWriter(shadowWriter);    
            
            for(int i=0;i<10000;i++) { // 运行一万次
                System.out.println("#"+i);
                writer.print(getTenThoasandNums()); // 每次十万行
            }
            
        }catch(IOException ex) {
            ex.printStackTrace();
        }
        
        long endMs=System.currentTimeMillis();
        System.out.println("File:'"+filepath+"' created,time elapsed:"+ms2DHMS(startMs,endMs));
    }
    
    private static String getTenThoasandNums() {
        StringBuilder sb=new StringBuilder();
        
        for(int i=0;i<100000;i++) {
            sb.append(rnd(0,100)+"
");
        }
        
        return sb.toString();
    }
    
    /**
     * get a random integer between min and max
     * @param min
     * @param max
     * @return
     */
    private static int rnd(int min,int max) {
         return (int)(min+Math.random()*(max-min+1));
    }
    
    /**
     * change seconds to DayHourMinuteSecond format
     * 
     * @param startMs
     * @param endMs
     * @return
     */
    private static String ms2DHMS(long startMs, long endMs) {
        String retval = null;
        long secondCount = (endMs - startMs) / 1000;
        String ms = (endMs - startMs) % 1000 + "ms";

        long days = secondCount / (60 * 60 * 24);
        long hours = (secondCount % (60 * 60 * 24)) / (60 * 60);
        long minutes = (secondCount % (60 * 60)) / 60;
        long seconds = secondCount % 60;

        if (days > 0) {
            retval = days + "d" + hours + "h" + minutes + "m" + seconds + "s";
        } else if (hours > 0) {
            retval = hours + "h" + minutes + "m" + seconds + "s";
        } else if (minutes > 0) {
            retval = minutes + "m" + seconds + "s";
        } else if(seconds > 0) {
            retval = seconds + "s";
        }else {
            return ms;
        }

        return retval + ms;
    }
}

输出:

#9997
#9998
#9999
File:'c:	emp20200229.txt' created,time elapsed:1m31s369ms

生成文本文件片段:

52
87
44
62
99
33
53
50
32
58
13
62
84
52
4
95
57
34
40
33
76
38
73
19
86
63
35
5
30
25
37
75
58
59
34
48
82
96
21
76
7
6
97
24
12
69
84
92
22
35
46
68
67
87
62
45
77
84

--2020年2月29日--

原文地址:https://www.cnblogs.com/heyang78/p/12382125.html