磁盘文件排序 编程珠玑

  开始看编程珠玑了,第一个就是进行磁盘排序的问题,想到了也只是归并排序,但题目要求1M内存,这个算法不可行。编程珠玑写到使用位图(分两次操作读写可以成功实现,小于内存1M),详情看编程珠玑第一章。

题目:给定10^7数据,对大数据进行排序。要求内存只有1M,时间可以接受,较短。

解决方法:1.多路归并。2.位图操作。

在这里看了july的文章,写的真心不错。推荐:http://blog.csdn.net/v_JULY_v/article/details/6451990

其中有个生成不同随机数的程序附下:

 1 #include <iostream>  
 2 #include <time.h>  
 3 #include <assert.h>  
 4 using namespace std;  
 5   
 6 const int size = 10000000;  
 7 int num[size];  
 8   
 9 int main()  
10 {  
11     int n;  
12     FILE *fp = fopen("data.txt", "w");  
13     assert(fp);  
14   
15     for (n = 1; n <= size; n++)    
16         //之前此处写成了n=0;n<size。导致下面有一段小程序的测试数据出现了0,特此订正。  
17         num[n] = n;  
18     srand((unsigned)time(NULL));  
19     int i, j;  
20   
21     for (n = 0; n < size; n++)  
22     {  
23         i = (rand() * RAND_MAX + rand()) % 10000000;  
24         j = (rand() * RAND_MAX + rand()) % 10000000;  
25         swap(num[i], num[j]);  
26     }  
27   
28     for (n = 0; n < size; n++)  
29         fprintf(fp, "%d ", num[n]);  
30     fclose(fp);  
31     return 0;  
32 }  

位图程序实现排序,首先排前5000000(每个数字对应一个bit)个后排剩下的5000000数据(每次都小于1M),代码如下(模拟两次):

 1 #include <iostream>  
 2 #include <bitset>  
 3 #include <assert.h>  
 4 #include <time.h>  
 5 using namespace std;  
 6   
 7 const int max_each_scan = 5000000;  
 8   
 9 int main()  
10 {  
11     clock_t begin = clock();  
12     bitset<max_each_scan> bit_map;  
13     bit_map.reset();  
14       
15     // open the file with the unsorted data  
16     FILE *fp_unsort_file = fopen("data.txt", "r");  
17     assert(fp_unsort_file);  
18     int num;  
19   
20     // the first time scan to sort the data between 0 - 4999999  
21     while (fscanf(fp_unsort_file, "%d ", &num) != EOF)  
22     {  
23         if (num < max_each_scan)  
24             bit_map.set(num, 1);  
25     }  
26       
27     FILE *fp_sort_file = fopen("sort.txt", "w");  
28     assert(fp_sort_file);  
29     int i;  
30       
31     // write the sorted data into file  
32     for (i = 0; i < max_each_scan; i++)  
33     {  
34         if (bit_map[i] == 1)  
35             fprintf(fp_sort_file, "%d ", i);  
36     }  
37       
38     // the second time scan to sort the data between 5000000 - 9999999  
39     int result = fseek(fp_unsort_file, 0, SEEK_SET);  
40     if (result)  
41         cout << "fseek failed!" << endl;  
42     else  
43     {  
44         bit_map.reset();  
45         while (fscanf(fp_unsort_file, "%d ", &num) != EOF)  
46         {  
47             if (num >= max_each_scan && num < 10000000)  
48             {  
49                 num -= max_each_scan;  
50                 bit_map.set(num, 1);  
51             }  
52         }  
53         for (i = 0; i < max_each_scan; i++)  
54         {  
55             if (bit_map[i] == 1)  
56                 fprintf(fp_sort_file, "%d ", i + max_each_scan);  
57         }  
58     }  
59       
60     clock_t end = clock();  
61     cout<<"用位图的方法,耗时:"<<endl;  
62     cout << (end - begin) / CLK_TCK << "s" << endl;  
63     fclose(fp_sort_file);  
64     fclose(fp_unsort_file);  
65     return 0;  
66 }  
原文地址:https://www.cnblogs.com/zCoderJoy/p/3863916.html