大文件文本数据处理

1.切割与合并

 #define  _CRT_SECURE_NO_WARNINGS
#include <stdio.h>
#include <stdlib.h>
char *path = "Z:\I\尹成清华终极版C语言视频源码文档20150131\大数据相关数据\dangdangwang.txt";
char *newpath = "Z:\I\尹成清华终极版C语言视频源码文档20150131\大数据相关数据\dangdangwangN.txt";
#define N 13180820

int getN(char *path)
{
    FILE *pf = fopen(path, "r");
    if (pf==NULL)
    {
        return -1;
    } 
    else
    {


        int i = 0;
        while (!feof(pf))
        {
            char str[256] = { 0 };
            fgets(str, 256, pf);
            i++;
        }


        fclose(pf);
        return i;
    }





、、
}

void space(char *path, int num)
{
    char ** pathes = malloc(sizeof(char*)*num);
    for (int i = 0; i < num;i++)
    {
        pathes[i] = malloc(sizeof(char) * 256);
        sprintf(pathes[i], "Z:\I\尹成清华终极版C语言视频源码文档20150131\大数据相关数据\dangdangwang%d.txt", i + 1);
        //printf("
%s", pathes[i]);
    }
    //100  10  10
    //100 9  8*12+4
    FILE *pf = fopen(path, "r");
    if (pf == NULL)
    {
        return -1;
    }
    else
    {
        //100/10=10

        if (N%num == 0)
        {
            //num个,N/num
            for (int i = 0; i <  num;i++)
            {
                FILE *pfw = fopen(pathes[i], "w");//写入
                for (int j = 0; j < N/num;j++)
                {
                    char str[1024] = { 0 };
                    fgets(str, 1024, pf);
                    fputs(str, pfw);//读取一行写入一行
                }
                fclose(pfw);
            }
        }
        else
        {
            //100  9  8*12+4
            for (int i = 0; i < num-1; i++)
            {
                FILE *pfw = fopen(pathes[i], "w");//写入
                for (int j = 0; j < N / (num-1); j++)
                {
                    char str[1024] = { 0 };
                    fgets(str, 1024, pf);
                    fputs(str, pfw);//读取一行写入一行


                }
                fclose(pfw);

            }

            {
                FILE *pfw = fopen(pathes[num-1], "w");//写入
                for (int j = 0; j < N %(num-1); j++)
                {
                    char str[1024] = { 0 };
                    fgets(str, 1024, pf);
                    fputs(str, pfw);//读取一行写入一行
                }
                fclose(pfw);
            }
        }
        fclose(pf);
    }
}

void merge(char *newpath,int n)
{
    char ** pathes = malloc(sizeof(char*)*n);
    for (int i = 0; i < n; i++)
    {
        pathes[i] = malloc(sizeof(char) * 256);
        sprintf(pathes[i], "Z:\I\尹成清华终极版C语言视频源码文档20150131\大数据相关数据\dangdangwang%d.txt", i + 1);
        //printf("
%s", pathes[i]);
    }
    FILE *pf = fopen(newpath, "w");
    if (pf == NULL)
    {
        return -1;
    }
    else
    {
        for (int i = 0; i < n;i++)
        {
            FILE *pfr = fopen(pathes[i], "r");

            while (!feof(pfr))
            {
                char str[1024] = { 0 };
                fgets(str, 1024, pfr);
                fputs(str, pf);//读取一行写入一行
            }
            fclose(pfr);
        }
        fclose(pf);
    }
}

void main()
{
    //int num = getN(path);
    //printf("%d", num);获取行数
    int  num;
    scanf("%d", &num);

    space(path, num);
    merge(newpath, num);

    system("pause");
}
原文地址:https://www.cnblogs.com/sjxbg/p/5867073.html