哈夫曼树压缩、解压缩文件

Tips:注意二进制读写文件回车为:

代码详细分析改天再填坑。。。

还有单纯形算法-->  github:https://github.com/caitian521/algorithm

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <algorithm>
#include <iostream>
#include <list>
#include <string>
using namespace std;

template<class Type> 
struct huffnode{
    int weight;
    Type ch;
    char code[50];
    int parent,leftchild,rightchild;
public:
    huffnode():ch(Type()),weight(0),parent(-1),leftchild(-1),rightchild(-1)
    {}

};
class hufftree
{
    list<huffnode<char> > hufflist;
    huffnode<char> *nodetable;
    char file[200];
    int num;
public:
    hufftree(const char *filename);
    ~hufftree();
    void Addchar(const char s);
    void CreateTree();
    bool SelectMin(const int pos,int &minst);
    void CreateCode();
    void Compression();
    void showcode();
    void CreateConfig();
    void Decompression();
    int Match(const char prebuff[],const char buff[],int &endat,FILE *fout,int &cnt);
    void print(int p,FILE *fout,int &cnt);
};

hufftree::hufftree(const char *filename)
{
    strcpy(file,filename);
    FILE *fin1 = fopen(file,"rb");
    char s;
    s = fgetc(fin1);
    while(s !=EOF)
    {
        if(s=='
')
            s = fgetc(fin1);
        Addchar(s);
        s = fgetc(fin1);
    }
    num = hufflist.size();
    printf("%d
",num);
    nodetable = new huffnode<char>[num*2-1];
    list<huffnode<char> >::iterator iter = hufflist.begin();
    for(int i = 0;i<num && iter!=hufflist.end();i++,iter++)
    {
        nodetable[i].ch = iter->ch;
        nodetable[i].weight = iter->weight;
    }
    fclose(fin1);
}

void hufftree::Addchar(const char s)
{
    list<huffnode<char> >::iterator iter = hufflist.begin();
    for(;iter!=hufflist.end();iter++)
    {
        if(iter->ch==s)
        {
            iter->weight++;
            break;
        }
    }
    if(iter==hufflist.end())
    {
        huffnode<char> node;
        node.weight = 1;
        node.ch = s;
        hufflist.push_back(node);
    }

}

void hufftree::CreateTree()
{
    int left = 0,right=0;
    for(int i = num;i<2*num-1;i++)
    {
        SelectMin(i-1,left);
        nodetable[left].parent = i;
        SelectMin(i-1,right);
        nodetable[right].parent = i;
        nodetable[i].weight = nodetable[left].weight+nodetable[right].weight;
        nodetable[i].leftchild = left;
        nodetable[i].rightchild = right;
    }
}
bool hufftree::SelectMin(const int pos,int &minst)
{
    int i=0,minnum=0;
    while(minnum<pos)
    {
        if(nodetable[i].parent==-1)
            break;
        minnum++;
    }
    if(minnum>pos)//上一个while的目的
        return false;//ruguo false???
    while(i<=pos)
    {
        if(nodetable[i].parent == -1 && nodetable[minnum].weight>nodetable[i].weight)
            minnum = i;
        i++;
    }
    minst = minnum;
}
void hufftree::CreateCode()
{
    int i;
    int c,p;
    int start;
    char *tmpcode = new char[num];
    tmpcode[num] = '';
    for(i = 0;i<num;i++)
    {
        start = num;
        c = i;
        while((p=nodetable[c].parent)>0)
        {
            start--;
            tmpcode[start]=(nodetable[p].leftchild==c)?'0':'1';
            c = p;
        }
        strcpy(nodetable[i].code,&tmpcode[start]);//
    }
}

void hufftree::Compression()
{
    //freopen(file,"r",stdin);
    FILE *fin = fopen(file,"rb");
    FILE *fout = fopen("compression.txt","wb");
    char s;
    char *thiscode = new char[256];
    unsigned char inch = 0;
    int index = 0;
    s = fgetc(fin);
    int countch = 10;
    while(s != EOF)
    {
        if(s=='
')
            s = fgetc(fin);
        for(int i = 0;i<num;i++)
        {
            if(nodetable[i].ch==s)
            {
                thiscode = nodetable[i].code;
                break;
            }
        }
        countch--;
        if(countch>0)
            cout<<s<<"  "<<thiscode<<endl;
        for(int t = 0;t<strlen(thiscode);t++)
        {

            inch <<= 1;
            if(thiscode[t]=='1')
                inch|=1;
            index++;
            if(index==8)
            {
                fputc(inch,fout);
                index = 0;
                inch = 0;
            }
        }
        s = fgetc(fin);
    }
    if(index>0)
    {
        inch =inch<<(8-index);
        fputc(inch,fout);
    }
    fclose(fin);
    fclose(fout);
}
hufftree::~hufftree()
{

}
void hufftree::showcode()
{
    for(int i = 0;i<num;i++)
    {
        cout<<nodetable[i].ch<<"--"<<nodetable[i].code<<endl;
    }
}
void hufftree::CreateConfig()
{
    char configname[200];
    strcpy(configname,"huffman.config");
    cout<<configname<<endl;
    FILE *fout = fopen(configname,"w");
    for(int i = 0;i<num;i++)
    {
        fputs(nodetable[i].code,fout);
        fputc(',',fout);
        fputc(nodetable[i].ch,fout);
        fputc('
',fout);
    }
    fclose(fout);
}
void hufftree::print(int p,FILE *fout,int &cnt)
{
    if(cnt>0)
    {
        if(nodetable[num*2-2].weight-cnt<10)
            cout<<"yes:"<<nodetable[p].ch<<endl;
        char ch;
        ch = nodetable[p].ch;
        if(ch=='
')
            fputc('
',fout);
        fputc(nodetable[p].ch,fout);
    }
    cnt--;
}
int hufftree::Match(const char prebuff[],const char buff[],int &endat,FILE *fout,int &cnt)
{
    int p = num*2-2;
    int Child = nodetable[p].leftchild;
    int flag = 0;
    int start = 0;
    int sign = 0;
    for(int i = 0;prebuff[i]!='';i++)
    {
        if(Child == -1)
        {
            print(p,fout,cnt);
            p = num*2-2;
            start = i;
        }
        if(prebuff[i]=='0')
        {
            p = nodetable[p].leftchild;
        }
        else{
            p = nodetable[p].rightchild;
        }
        Child = nodetable[p].leftchild;
    }
    if(Child == -1)//
    {
        print(p,fout,cnt);
        p = num*2-2;
        start = 0;
    }
    else
        if(prebuff[0]!='')
            flag = 1;
    for(int i = 0;i<8;i++)
    {
        if(Child == -1)
        {
            print(p,fout,cnt);
            p = num*2-2;
            sign = i;
            flag = 0;
        }
        if(buff[i]=='0')
        {
            p = nodetable[p].leftchild;
        }
        else{
            p = nodetable[p].rightchild;
        }
        Child = nodetable[p].leftchild;
    }
    if(Child == -1)
    {
        print(p,fout,cnt);
        sign = 8;//
        endat = sign;
    }
    else{
            endat = sign;
        if(flag)
            return start;
    }
    return -1;
}

void hufftree::Decompression()
{
    FILE *fin = fopen("compression.txt","rb");
    FILE *fout = fopen("new.txt","wb");
    unsigned char readnum;
    readnum = fgetc(fin);
    int endat = 0;
    int sign = 0;
    char buff[9];
    char prebuff[33];
    prebuff[0] = '';
    buff[8] = '';
    for(int i = 0;i<8;i++)
        buff[i] = '0';
    cout<<"depression"<<endl;
    int cnt = nodetable[num*2-2].weight;

    while(!feof(fin))//readnum!=EOF
    {

        for(int i = 7;i>=0;i--)
        {
            buff[i]=(readnum&1)?'1':'0';
            readnum >>= 1;
        }
        sign = Match(prebuff,buff,endat,fout,cnt);
        if(sign>=0)
        {
            char temp[32];
            strcpy(temp,&prebuff[sign]);
            prebuff[0]='';
            strcpy(prebuff,temp);
            strcat(prebuff,buff);
            cout<<"doushengle:"<<prebuff<<endl;
        }
        else
        {
            if(endat<8)
                strcpy(prebuff,&buff[endat]);
            else
                prebuff[0] = '';
        }
        readnum = fgetc(fin);
        endat = 0;
        sign = 0;
    }
    fclose(fin);
    fclose(fout);
}
int main()
{
    hufftree hf("E:\我会编程啦\我的练习\算法设计-研究生\哈夫曼\Aesop_Fables.txt");
    hf.CreateTree();
    hf.CreateCode();
    hf.showcode();
    hf.Compression();
    hf.Decompression();
}
//这里要说一个背景,那就是在windows下,它会做一个处理,就是写文件时,换行符会被转换成回车,换行符存在磁盘文件上,而读磁盘上的文件时,它又会进行逆处理,就是把文件中连续的回车,换行符转换成换行符。
//因此,在读取一个磁盘文件时,文本方式读取到文件内容很有可能会比二进制文件短,因为文本方式读取要把回车,换行两个字符变成一个字符,相当于截短了文件。但是为什么仅仅是可能呢?因为可能文中中不存在连着的45,42这两个字节(45是CR回车的ASCII码,42是换行符CL的ASCII码),也就不存在“截短”操作了,因此读到的内容是一样的。
//具体的来说,文件文件(以文本方式写的),最好以文本方式读。二进制文件(以二进制方式写的),最好以二进制方式读。不然可能会不正确。上面的已经分析了。
原文地址:https://www.cnblogs.com/caitian/p/8001148.html