程序结构清晰的大文本数据排序

1. main.c

//#include "createsort.h"
//#include "createindex.h"
#include "binsearch.h"
void initall()
{
    strcpy(filepath, "Z:\I\尹成清华终极版C语言视频源码文档20150131\大数据相关数据\1E~001OK.txt");
    strcpy(sortpath, "Z:\I\尹成清华终极版C语言视频源码文档20150131\大数据相关数据\1E~001sort.txt");
    strcpy(indexpath, "Z:\I\尹成清华终极版C语言视频源码文档20150131\大数据相关数据\1E~001index.txt");

}

void main()
{
    initall();
    /*initmem();
    sort();
    writetofile();*/

    //init();
    
    while (1)
    {
        char str[256] = { 0 };
        scanf("%s", str);
        binsearch(str);
    }
    system("pause");

}

2.index.h

#define  _CRT_SECURE_NO_WARNINGS
#include<stdio.h>
#include<stdlib.h>
#define N 10000000

struct index
{
    int *pindex;
    int length;
};

char **g_pp;//保存指针数组
char filepath[256];
char sortpath[256];
char indexpath[256];
struct index allindex;//索引

int getN();//函数声明
void eatg(char *str);
void eatN(char *str);

2.index.c

#include"index.h"

char **g_pp = NULL;//保存指针数组
char filepath[256] = { 0 };
char sortpath[256] = { 0 };
char indexpath[256] = { 0 };
struct index allindex = { 0 };//索引

int getN()
{
    FILE *pf = fopen("file.txt", "r");
    if (pf == NULL)
    {
        return -1;
    }
    else
    {
        int i = 0;
        while (!feof(pf))
        {
            char str[50] = { 0 };
            fgets(str, 50, pf);//读取
            i++;
        }
        fclose(pf);
        return i;

    }
}
void eatg(char *str)
{
    while (*str != '')
    {

        if (*str == '-')
        {
            *str = '';
        }
        str++;
    }

}
void eatN(char *str)
{
    while (*str != '')
    {
        if (*str == '
' || *str == '
')
        {
            *str = '';
        }

        str++;
    }

}

3.binsearch.h

#include "index.h"
void binsearch(char *searchstr);

3.binsearch.c

#include "binsearch.h"

void binsearch(char *searchstr)
{
    int tou = 0;
    int wei = N - 1;
    int flag = 0;
    while (tou <= wei)
    {
        int zhong = (tou + wei) / 2;
        char zhongstr[256] = { 0 };
        {
            FILE *pf1 = fopen(indexpath, "rb");
            FILE *pf2 = fopen(sortpath, "rb");


            int indexnum = 0;
            fseek(pf1, zhong*sizeof(int), SEEK_SET);
            fread(&indexnum, sizeof(int), 1, pf1);//读索引zhong到indexnum

            fseek(pf2, indexnum, SEEK_SET);
            fgets(zhongstr, 128, pf2);//读取

            fclose(pf1);
            fclose(pf2);
        }
        eatN(zhongstr);
        char pnewzhongstr[256] = { 0 };
        sprintf(pnewzhongstr, zhongstr);
        eatg(pnewzhongstr);//遇到-终止
        int res = strcmp(pnewzhongstr, searchstr);//1 0  -1


        if (res == 0)
        {
            flag = 1;
            printf("%s", zhongstr);
            break;
        }
        else if (res == 1)
        {
            wei = zhong - 1;
        }
        else
        {
            tou = zhong + 1;
        }


    }


    if (flag)
    {
        printf("
find");
    }
    else
    {
        printf("
 not find");
    }


}

4.createsort.h

#include "index.h"

void initmem();
int com(void *p1, void*p2);
void sort();
void show();
void writetofile();

4.createsort.c

#include "createsort.h"
void initmem()
{
    g_pp = calloc(N, sizeof(char*));//分配指针数组
    FILE *pf = fopen(filepath, "r");
    if (pf == NULL)
    {
        return -1;
    }
    else
    {
        for (int i = 0; i < N; i++)
        {
            char str[50] = { 0 };
            fgets(str, 50, pf);//读取
            g_pp[i] = calloc(strlen(str) + 1, sizeof(char));//分配
            if (g_pp[i]!=NULL)
            {
                //sprintf(g_pp[i], str);//打印进去
                strcpy(g_pp[i], str);
                eatN(g_pp[i]);
            }
            
            //printf("%s", g_pp[i]);//显示测试


        }


        fclose(pf);


    }






}

int com(void *p1, void*p2)
{
    char **pp1 = p1;
    char **pp2 = p2;

    return strcmp(*pp1, *pp2);

}

void sort()
{
    qsort(g_pp, N, sizeof(char*), com);


}
void show()
{
    printf("
此时状态
");
    for (int i = 0; i < N; i++)
    {
        printf("
%s", g_pp[i]);
    }
}
void writetofile()
{
    FILE *pf = fopen(sortpath, "w");
    for (int i = 0; i < N; i++)
    {
        char temp[100] = { 0 };
    //    printf("
%s", g_pp[i]);
        sprintf(temp, "%s
", g_pp[i]);
    //    printf("
%s", temp);
        fputs(temp, pf);
    }

    fclose(pf);
}

5.createindex.h

#include "index.h"
void init();
void qucik();

5.createindex.c

#include "createindex.h"


void init()
{
    printf("
索引数组开始分配");
    allindex.length = N;
    allindex.pindex = calloc(N, sizeof(int));//分配内存
    printf("
索引数组完成分配");

    printf("
开始读取");
    FILE *pf = fopen(sortpath, "rb");//
->

    if (pf == NULL)
    {
        return -1;
    }
    else
    {
        int alllength = 0;
        for (int i = 0; i < N; i++)
        {
            char str[50] = { 0 };
            fgets(str, 50, pf);
            allindex.pindex[i] = alllength;//错位从0开始

            int length = strlen(str);
            alllength += length;

        }



        fclose(pf);
    }
    printf("
结束读取");

    printf("
开始写入");
    FILE *pfw = fopen(indexpath, "wb");//写入索引
    fwrite(allindex.pindex, sizeof(int), allindex.length, pfw);
    fclose(pfw);//关闭
    printf("
结束写入");


    free(allindex.pindex);


}
void qucik()
{
    printf("
索引数组开始分配");
    allindex.length = N;
    allindex.pindex = calloc(N, sizeof(int));//分配内存
    printf("
索引数组完成分配");

    printf("
开始读取");
    FILE *pfw = fopen("index.txt", "rb");//写入索引
    fread(allindex.pindex, sizeof(int), allindex.length, pfw);
    fclose(pfw);//关闭
    printf("
结束读取");
}
原文地址:https://www.cnblogs.com/sjxbg/p/5882060.html