59.排序好的大数据创建索引文件,并实现大文件的二分查找,根据索引百万数据秒读数据

  • 创建索引
    1 //创建索引
    2 struct index
    3 {
    4     //保存每行偏移的位置
    5     int *pindex;
    6     //文件的总长度
    7     int length;
    8 }allindex;//索引
  • 初始化索引数组并把索引写入到文件
     1 //初始化索引数组,并把索引写入到文件
     2 void init(char *path)
     3 {
     4     printf("
    索引数组开始分配");
     5     allindex.length = N;
     6     //分配内存
     7     allindex.pindex = calloc(N, sizeof(int));
     8     printf("
    索引数组完成分配");
     9 
    10     printf("
    开始读取");
    11     //二进制读取文件 避免/r/n读取成/n
    12     FILE *pf = fopen("filesort.txt", "rb");
    13     if (pf == NULL)
    14     {
    15         return -1;
    16     }
    17     else
    18     {
    19         int alllength = 0;
    20         for (int i = 0; i < N; i++)
    21         {
    22             char str[50] = { 0 };
    23             fgets(str, 50, pf);
    24             //每一个首地址的偏移
    25             allindex.pindex[i] = alllength;
    26 
    27             int length = strlen(str);
    28             alllength += length;
    29         }
    30         fclose(pf);
    31     }
    32     printf("
    结束读取");
    33 
    34     printf("
    开始写入");
    35     //二进制方式打开文件,并写入索引
    36     FILE *pfw = fopen("index.txt", "wb");
    37     //写入
    38     fwrite(allindex.pindex, sizeof(int), allindex.length, pfw);
    39     //关闭文件
    40     fclose(pfw);
    41     printf("
    结束写入");
    42 
    43     //释放内存
    44     free(allindex.pindex);
    45 }
  • 从文件中读取索引到索引数组中
     1 //从文件中读取索引
     2 void qucik()
     3 {
     4     printf("
    索引数组开始分配");
     5     allindex.length = N;
     6     allindex.pindex = calloc(N, sizeof(int));//分配内存
     7     printf("
    索引数组完成分配");
     8 
     9 
    10     printf("
    开始读取");
    11     //以二进制读的方式读取索引
    12     FILE *pfw = fopen("index.txt", "rb");
    13     //读取
    14     fread(allindex.pindex, sizeof(int), allindex.length, pfw);
    15     //关闭文件
    16     fclose(pfw);
    17     printf("
    结束读取");
    18 }
  • 测试函数
     1   FILE *pf1 = fopen("index.txt", "rb");
     2     FILE *pf2 = fopen("filesort.txt", "rb");
     3     while (1)
     4     {
     5         printf("
    请输入要读取的行数");
     6         int num = 0;
     7         scanf("%d", &num);
     8 
     9         int indexnum = 0;
    10         fseek(pf1, num*sizeof(int), SEEK_SET);
    11         fread(&indexnum, sizeof(int), 1, pf1);//读索引到indexnum
    12 
    13         fseek(pf2, indexnum, SEEK_SET);
    14         char str[128] = { 0 };
    15         fgets(str, 128, pf2);//读取
    16         printf("
    %s", str);
    17 
    18     }
    19     fclose(pf1);
    20     fclose(pf2);
  • 根据索引文件对已经排序好的文件进行二分查找
     1 void binsearch(char *searchstr)
     2 {
     3     //头部
     4     int tou = 0;
     5     //尾部
     6     int wei = N - 1;
     7     //是否找到的标识
     8     int flag = 0;
     9     //如果头小于尾
    10     while (tou <= wei)
    11     {
    12         //获取中部
    13         int zhong = (tou + wei) / 2;
    14         //读取中部索引的内容
    15         char zhongstr[256] = { 0 };
    16         {
    17             //打开索引文件
    18             FILE *pf1 = fopen("index.txt", "rb");
    19             //打开排序好的文件
    20             FILE *pf2 = fopen("filesort.txt", "rb");
    21 
    22             //读zhong对应的地址存到indexnum中
    23             int indexnum = 0;
    24             fseek(pf1, zhong * sizeof(int), SEEK_SET);
    25             fread(&indexnum, sizeof(int), 1, pf1);
    26 
    27             //根据读取的位置读取文件到zhongstr中
    28             fseek(pf2, indexnum, SEEK_SET);
    29             fgets(zhongstr, 128, pf2);
    30 
    31             fclose(pf1);
    32             fclose(pf2);
    33         }
    34         //消除'
    或者
    '
    35         eatN(zhongstr);
    36         char pnewzhongstr[256] = { 0 };
    37         sprintf(pnewzhongstr, zhongstr);
    38         //进行处理,遇到-终止
    39         eatg(pnewzhongstr);
    40         //比较是否找到
    41         int res = strcmp(pnewzhongstr, searchstr);//1 0  -1
    42         if (res == 0)
    43         {
    44             flag = 1;
    45             printf("%s", zhongstr);
    46             break;
    47         }
    48         //如果中比searchstr要大
    49         else if (res == 1)
    50         {
    51             wei = zhong - 1;
    52         }
    53         //如果中比searchstr小
    54         else
    55         {
    56             tou = zhong + 1;
    57         }
    58 
    59 
    60     }
    61     //判断是否找到
    62     if (flag)
    63     {
    64         printf("
    find");
    65     } 
    66     else
    67     {
    68         printf("
     not find");
    69     }
    70 }
  • 遇到'-'结束
     1 //遇到'-'结束
     2 void eatg(char *str)
     3 {
     4     while (*str!='')
     5     {
     6 
     7         if (*str=='-')
     8         {
     9             *str = '';
    10         }
    11         str++;
    12     }
    13 
    14 }
  • 测试函数
     1 void main()
     2 {
     3 
     4 
     5     char str[256] = { 0 };
     6     scanf("%s", str);
     7     binsearch(str);
     8 
     9 
    10 
    11     system("pause");
    12 }
原文地址:https://www.cnblogs.com/xiaochi/p/8437140.html