文本压缩版本三

Heap.h

#pragma once

#include <vector>
#include <algorithm>

using namespace std;

/********************************************************************************
* @File name: Heap.h
* @Author: SevenDuke
* @Version: 1.2
* @Date: 2018.12.18
* @Description: 构建最小堆
********************************************************************************/

/**
*仿函数
*如果左边的值小于右边的值返回true,否则返回false
*/
template<class T>
struct Lesser
{
bool operator()(const T& l, const T& r)
{
return l < r;
}
};

/**
*仿函数
*如果左边的值大于右边的值返回false否者返回true
*/
template<class T>
struct Greater
{
bool operator()(const T& l, const T& r)
{
return l > r;
}
};

template<class T, class Compare = Lesser<T> >
class Heap
{
public:
Heap()
{}

/**
*a 指向的是一个T型的数组
*size 数组的长度
*/
Heap(const T* a, size_t size)
{
//将数组中的元素加入到vector容器_a中
for(size_t i = 0; i < size; i++)
{
_a.push_back(a[i]);
}

for(int i = (_a.size() - 2) / 2; i >= 0; i--)
{
_AdjustDown(i);
}
}

//将元素x加入到堆中,并且进行顺序维护
void Push(const T& x)
{
_a.push_back(x);
_AdjustUp(_a.size() - 1);
}

//删除堆顶元素,并且进行顺序维护
void Pop()
{
swap(_a[0], _a[_a.size() - 1]);
_a.pop_back();
_AdjustDown(0);
}

//返回堆顶元素,要进行非空判断
T& Top()
{
if(!_a.empty())
return _a[0];
}

//判空堆
bool Empty()
{
if(_a.empty())
return false;
else
return true;
}

//返回堆的大小
size_t Size()
{
if(!_a.empty())
return _a.size();
}

protected:
//向上排序,从孩子到父母
void _AdjustUp(int child)
{

/**
* 0
* 1 2
* 3 4 5 6
* lchild = parent * 2 + 1;
* rchile = parent * 2 + 2;
* 随意知道一个孩子的序号 child
* parent = (child - 1) / 2;
*/

int parent = (child - 1) / 2;
Compare cmp;
while(child > 0)
{
if(cmp(_a[child], _a[parent]))
{
swap(_a[child], _a[parent]);
child = parent;
parent = (child - 1) / 2;
}
else
break;
}
}

//向下排序,从父母到孩子
void _AdjustDown(int parent)
{
int child = parent * 2 + 1;
Compare cmp;
while(child < _a.size())
{
if(child + 1 < _a.size() && cmp(_a[child+1], _a[child]))
{
++child;
}

if(cmp(_a[child],_a[parent]))
{
Swap(_a[child], _a[parent]);
parent = child;
child = parent * 2 + 1;
}
else
break;
}
}

//交换函数
void Swap(T& l, T& r)
{
T temp = l;
l = r;
r = temp;
}
protected:
vector<T> _a;
};

HuffmanTree.h

#pragma once
#include "Heap.h"
#include <stdio.h>

/********************************************************************************
* @File name: HuffmanTree.h
* @Author: SevenDuke
* @Version: 1.2
* @Date: 2018.12.18
* @Description: 构建HuffmanTree
********************************************************************************/

template<class T>
struct HuffmanTreeNode
{
HuffmanTreeNode<T> *lchild; //左节点
HuffmanTreeNode<T> *rchild; //右节点
HuffmanTreeNode<T> *parent; //父节点
T _weight; //附带权值的结构体,其中包含了哈夫曼编码等

HuffmanTreeNode(const T& weight)
:lchild(NULL)
,rchild(NULL)
,parent(NULL)
,_weight(weight)
{}
};

template<class T>
class HuffmanTree
{
typedef HuffmanTreeNode<T> Node; //重定义HuffmanTreeNode<T>为Node
public:
/** HuffmanTrr的默认构造器*/
HuffmanTree()
:_root(NULL)
{}


/** 销毁HuffmanTree*/
~HuffmanTree()
{
_Destory(_root);
}

/** 带参构造器*/
HuffmanTree(const T* a, size_t size, const T& invalid)
{
_root = _CreateTree(a, size, invalid);
}

Node* GetRoot()
{
return _root;
}
protected:

/********************************************************
* HuffmanTree的带参构造器
* 功能:通过给定的数据,构造一棵HuffmanTree
* 参数:
* @a: 指向T型数组的首地址
* @size: 传入的T型数组的长度
* @invalid: 判断非法数据的数值
* return: 一个构建成功的树的节点
*******************************************************/
Node* _CreateTree(const T* a, size_t size, const T& invalid)
{
/**
* 将传入的数据构建成最小堆
* 1.构建仿函数
* 2.声明并且构建最小堆
*/

Heap<Node*, compare> minHeap;
for(int i = 0; i < size; i++)
{
if(a[i] != invalid)
{
minHeap.Push(new Node(a[i]));
}
}

/** 按照Huffman树的构建方法构建HuffmanTree*/
while(minHeap.Size() > 1)
{
Node* lchild = minHeap.Top();
minHeap.Pop();
Node* rchild = minHeap.Top();
minHeap.Pop();

Node* parent = new Node(lchild->_weight + rchild->_weight);
parent->lchild = lchild;
parent->rchild = rchild;
lchild->parent = parent;
rchild->parent = parent;
minHeap.Push(parent);
}
return minHeap.Top();
}

/** 传入输得根节点,销毁一棵树*/
void _Destory(Node* root)
{
if(root == NULL)
{
return;
}
_Destory(root->lchild);
_Destory(root->rchild);
delete root;
}

struct compare
{
bool operator()(const Node* l, const Node* r)
{
return l->_weight < r->_weight;
}
};

protected:
Node* _root;
};

FileCompress.h

#pragma once
#include "HuffmanTree.h"
#include "MyStack.h"
#include <Windows.h>
#include <string>
#include <stdio.h>
#include <algorithm>

using namespace std;
/********************************************************************************
* @File name: FileComprees.h
* @Author: SevenDuke
* @Version: 1.2
* @Date: 2018.12.18
* @Description: 文件的压缩和解压
********************************************************************************/

/******************************************
* @_ch: 字符
* @_count: _ch字符出现的个数
* @_code: 该字符的Huffman编码
* 扩展:对相关的运算符进行了重载
*******************************************/
struct CharInfo
{
char _ch;
int _count;
string _code;

CharInfo(unsigned char ch = 0)
:_ch(ch)
,_count(0)
{}

/** 重载了+, !=, <*/
CharInfo operator+(const CharInfo &x)
{
CharInfo temp;
temp._count = _count + x._count;
return temp;
}

bool operator!=(const CharInfo &x) const
{
return _count != x._count;
}

bool operator<(const CharInfo &x) const
{
return _count < x._count;
}
};

template<class T>
class FileCompress
{
public:
FileCompress()
{
for(size_t i = 0; i < 256; i++)
{
_infos[i] = i;
}
}
/** 对filename参数指向的文件进行压缩*/
void Compress(const char* filename)
{
/**
* 第一步: 将文件中的数据统计到 _infos数组里面去,用charCount来记录字符的总个数
* 第二步: 用_infos数组里面的数据来构造HuffmanTree,并且得到Huffman编码
* 第三部: 对得到的文件中的数据进行压缩
*
*
*/

/** 第一步*/
FILE* fOutFile = fopen(filename, "rb"); //以二进制的形式读取
// assert(fOutFile); //检查是否创建文件指针成功,不成功就报错
int charCount = 0; //统计字符的总个数
char ch = fgetc(fOutFile);

while(ch != EOF)
{
charCount++;
_infos[(size_t)ch]._count++;
ch = fgetc(fOutFile);

}

/** 第二步*/
CharInfo invalid(0); //声明违法数据
HuffmanTree<CharInfo> tree(_infos, 256, invalid); //通过HuffmanTree类的带参构造构造HuffmanTree
_NorOfGenerateHuffmanCode(tree.GetRoot()); //生成Huffman编码


/** 第三步
* 将压缩之后的数据写入另外一个名叫一个文件中去
*/

string compressFilename = filename;
compressFilename += "Compress";
FILE* fInCompress = fopen(compressFilename.c_str(), "wb"); //以二进制的形式写入文件

/**
*关于fseek()函数3个参数的说明
* 1 文件指针
* 2 偏移量(为负向左便宜,为正向右偏移)
* 3 SEEK_SET 从头开始 SEEK_CUR 从当前位置开始 SEEK_END 从尾部开始
*/

fseek(fOutFile, 0L, SEEK_SET); //重新对fOutFile指向的文件中进行遍历,重置文件指针设置文件指针从头开始
ch = fgetc(fOutFile);
// unsigned char value = 0; //用来记录转换成的二进制编码,通过位运算去记录
int size = 0; //已经转化的位数
char value = 0;
while(ch != EOF)
{
/**
* 步骤:
* 1.得到该字符的哈夫曼编码
* 2.将哈夫曼编码转换为二进制数
*/
string &code = _infos[(size_t)ch]._code;

for(size_t i = 0; i < code.size(); i++)
{
value <<= 1;

if(code[i] == '1')
{
value |= 1;
}
++size;
if(size == 8)
{
fputc(value, fInCompress);
value = 0;
size = 0;
}
}
ch = fgetc(fOutFile);
}
if(size > 0)
{
value <<= (8-size);
fputc(value, fInCompress);
}

/**
* 写_infos数组的配置文件,方便解压时候使用
*/

string configFilename = filename;
configFilename += "Config";

/**
* int fputs(const char *s, FILE *stream);
* char * fgets(char * s, int n,FILE *stream);
* 与c语言兼容,在c语言中没有string类型,故必须通过string类对象的成员函数c_str()把string 对象转换成c中的字符串样式。
*/
FILE* fInConfig = fopen(configFilename.c_str(), "wb");

string line;
char buffer[128];

//将字符的总个数写入配置文件的第一行

line += itoa(charCount, buffer, 10);
line += " ";
fputs(line.c_str(), fInConfig);
line.clear(); //清空line字符串里面的数据

for(size_t i = 0; i < 256; i++)
{
if(_infos[i]._count)
{
line += _infos[i]._ch;
line += ',';
line += itoa(_infos[i]._count, buffer, 10);
line += ' ';

fputs(line.c_str(), fInConfig);
line.clear();
}
}

//关闭文件指针
fclose(fOutFile);
fclose(fInCompress);
fclose(fInConfig);
}

/** 反编译函数,将Huffman编码转化为正常的字符数据*/
void UnCompress(const char* filename)
{
/**
* 步骤:
* 第一步: 将配置文件中的数据读取到infos数组里面去
* 第二步: 重新构建HuffmanTree
* 第三步: 对转录而成的huffman编码进行反转录并写入到文件中去
*/

/** 第一步*/

string ConfigFilename = filename;
ConfigFilename += "Config";
FILE* fOutConfig = fopen(ConfigFilename.c_str(), "rb");

string line;

_ReadLine(fOutConfig, line);
int charCount = atoi(line.c_str()); //int atoi ( const char * str );

line.clear();

while( _ReadLine(fOutConfig, line))
{
if(!line.empty())
{
unsigned char ch = line[0];
_infos[ch]._count = atoi(line.substr(2).c_str());
}
else
{
line.clear();
_ReadLine(fOutConfig, line);

unsigned char ch = ' ';
_infos[ch]._count = atoi(line.substr(1).c_str());
}
}

/** 第二步:重建HuffmanTree*/

CharInfo invalid(0);
HuffmanTree<CharInfo> tree(_infos, 256, invalid);

/** 第三步:读.compress文件,写入到.uncompress文件*/

string compressFilename = filename;
compressFilename += "Compress";
FILE* fOutCompress = fopen(compressFilename.c_str(), "rb");

string uncompressFilename = filename;
uncompressFilename += "Uncompress";
FILE* fInUncompress = fopen(uncompressFilename.c_str(), "wb");

HuffmanTreeNode<CharInfo>* root = tree.GetRoot();
HuffmanTreeNode<CharInfo>* cur = root;

int pos = 7;
char ch = fgetc(fOutCompress);

while(true)
{

if(ch & (1<<pos))
cur = cur->rchild;
else
cur = cur->lchild;

//已经找到字符,进行提取
if(cur->lchild == NULL && cur->rchild == NULL)
{
fputc(cur->_weight._ch, fInUncompress);

cur = root;

if(--charCount == 0) //huffman编码字符已经用完
{
break;
}
}
--pos;
if(pos == -1)
{
ch = fgetc(fOutCompress);
pos = 7;
}
}
fclose(fOutCompress);
fclose(fInUncompress);
}

protected:
/**
* 传入一棵构建好的哈夫曼树,生成HuffmanCode,保存在CharInfo._code
* 递归实现
*/

void _GenerateHuffmanCode(HuffmanTreeNode<CharInfo>* root)
{
if(root == NULL)
{
return;
}
_GenerateHuffmanCode(root->lchild);
_GenerateHuffmanCode(root->rchild);

Translate(root);
}

/**
* 传入一棵构建好的哈夫曼树,生成HuffmanCode,保存在CharInfo._code
* 非递归实现
*/

bool _NorOfGenerateHuffmanCode(HuffmanTreeNode<CharInfo>* root)
{
typedef HuffmanTreeNode<CharInfo>* t;
MyStack<t > s;
while(!s._isEmpty())
{
s._Pop();
}

t cur = NULL; //当前结点
t pre = NULL; //前一次访问的结点

if(root == NULL)
{
fprintf(stderr, "the HuffmanTree is null. ");
return false;
}

s._Push(root);

int i = 0;
while(!s._isEmpty())
{
cur = s._Top()->date;
if(cur->lchild == NULL && cur->rchild == NULL)
{
s._Pop();
Translate(cur);
pre = cur;
}
else if(pre != NULL && (cur->lchild == pre || cur->rchild == pre))
{
s._Pop();
pre = cur;
}
else
{
if(cur->rchild != NULL)
{
s._Push(cur->rchild);
}
if(cur->lchild != NULL)
{
s._Push(cur->lchild);
}
}
}
return true;
}


/** 生成转译编码*/
void Translate(HuffmanTreeNode<CharInfo>* root)
{
if(root->lchild == NULL && root->rchild == NULL)
{

HuffmanTreeNode<CharInfo>* cur = root;
HuffmanTreeNode<CharInfo>* parent = cur->parent;

string& code = _infos[(size_t)(cur->_weight._ch)]._code;

while(parent)
{
if(parent->lchild == cur)
{
code += '0';
}
if(parent->rchild == cur)
{
code += '1';
}

cur = parent;
parent = cur->parent;
}

//反转译一下得到的字符编码
reverse(code.begin(), code.end());
}
}

/** 读取文件的其中一行数据,并写入line字符串*/
bool _ReadLine(FILE* filename, string& line)
{
char ch = fgetc(filename);

if(ch == EOF)
{
return false;
}

line.clear();
while(ch != EOF && ch != ' ')
{
line += ch;
ch = fgetc(filename);
}
return true;
}

protected:
CharInfo _infos[256];
};

 MyStack.h

#pragma once
#include<cstdio>

template<class T>
struct Node
{
T date;
struct Node<T> * next;

Node():next(NULL)
{}

Node(const T& date)
:date(date)
,next(NULL)
{}
};

template<class T>
class MyStack
{
typedef Node<T> Node;

public:
MyStack()
{
_Create();
}

~MyStack()
{
_Clear();
}

public:
bool _Create()
{
Nodecount = 0;
head = new Node;
}

bool _Clear()
{
if(head->next != NULL)
{
Node * tmp = head->next;
head->next = head->next->next;
delete tmp;
}
}

bool _Push(const T& x)
{
Node* pNode = new Node(x);
pNode->next = head->next;
head->next = pNode;

Nodecount++;
}

T& _Pop()
{
Node* tmp = head->next;
head->next = head->next->next;
T t = tmp->date;
delete tmp;
Nodecount--;
return t;
}

size_t _Size()
{
return Nodecount;
}

Node* & _Top()
{
if(!_isEmpty())
return head->next;
}

bool _isEmpty()
{
if(Nodecount)
return false;
else
return true;
}
private:
int Nodecount;
Node *head;
};

FileUtil.h

#include <cstdio>

#include "FileCompress.h"

class FileUtil{
public:
FileUtil()
{
m_NoCopyTime = 0;
m_CopyTime = 0;
}
public:
void Time();

void ViewFile(const string & m_Filename);

bool FileSize(const string & m_Filename);

void FileCopy(const string & m_Filename);

void CompressTest(const string & m_Filename);

void UncompressTest(const string & m_Filename);
private:
int m_NoCopyTime;
int m_CopyTime;
};

void FileUtil::Time(){
printf("压缩和解压缩总共用时:%d ", m_NoCopyTime);
printf("复制文件总共用时:%d ", m_CopyTime);
}

void FileUtil::ViewFile(const string & Filename)
{
FILE * fOutput = fopen(Filename.c_str(), "r");
char ch = fgetc(fOutput);
while(ch != EOF)
{
cout << ch;
ch = fgetc(fOutput);
}
fclose((fOutput));
}


bool FileUtil::FileSize(const string & Filename)
{
FILE *fp = fopen(Filename.c_str(), "r");
if( fp == NULL )
{
return false;
}
fseek(fp,0L,SEEK_END);

int Filesize = ftell(fp);
int divisor = 1;
while(Filesize / divisor > 1000)
{
divisor *= 10;
}
cout << "文件的大小:" << Filesize / divisor * divisor * 1.0/1000000 << "M" << endl;

fclose(fp);
return true;
}

void FileUtil::FileCopy(const string & Filename){
FILE * fOutFile = fopen(Filename.c_str(), "rb");

string FileCopyname = Filename;
FileCopyname += "OfCopyFile";
FILE * fInFile = fopen(FileCopyname.c_str(), "wb");

int CopyBegin = GetTickCount();

char ch = fgetc(fOutFile);
while(ch != EOF){
fputc(ch, fInFile);
ch = fgetc(fOutFile);
}

int CopyEnd = GetTickCount();
m_CopyTime += CopyEnd - CopyBegin;
printf("复制所用时间:%d ", m_CopyTime);

printf("复制完成!!!");
fclose(fOutFile);
fclose(fInFile);
}

void FileUtil::CompressTest(const string & Filename)
{
//压缩
FileCompress<CharInfo> compress;

int CompressBegin = GetTickCount();

compress.Compress(Filename.c_str()); //对文件Input里面的内容进行压缩

int CompressEnd = GetTickCount();

cout << "文件:" + Filename << "压缩成功!!!" << endl;

cout<<"压缩用时:"<<CompressEnd-CompressBegin<<" ms"<<endl;
m_NoCopyTime += CompressEnd-CompressBegin;
}

void FileUtil::UncompressTest(const string &Filename)
{
//解压缩
FileCompress<CharInfo> uncompress;

int UncompressBegin = GetTickCount();

uncompress.UnCompress(Filename.c_str()); //传入文件名称,对文件的编码进行解码

int UncompressEnd = GetTickCount();

cout << "文件:" + Filename << "解压成功!!!" << endl;

cout<<"解压缩用时:"<<UncompressEnd-UncompressBegin<<" ms"<<endl;
m_NoCopyTime += UncompressEnd-UncompressBegin;
}

View.h

#include <cstdio>
#include <iostream>
#include <string>
#include "FileCompress.h"
#include "FileUtil.h"

using namespace std;

FileUtil Util;

void MainView()
{
void ViewFile(const string & Filename); //读取文件件的内容
bool FileSize(const string & Filename); //获取文件的大小

printf("******************************************************* ");
printf("********** 1.压缩文件 2.解压文件 ************ ");
printf("********** 3.查阅源文件 4.查阅压缩文件 ************ ");
printf("********** 5.查阅解压缩文件 6.复制源文件 ************ ");
printf("********** 7.查阅时间 8.退出程序 ************ ");


int choose = -1;
cin >> choose;

switch(choose)
{
case 1:
printf("请输入需要压缩的文件名称:");
{
string Filename;
cin >> Filename;
Util.CompressTest(Filename);
}
break;
case 2:
printf("请输入需要压缩的文件名称: ");
{
string Filename;
cin >> Filename;
Util.UncompressTest(Filename);
}
break;
case 3:
{
Util.FileSize("Input");
Util.ViewFile("Input");
}

break;
case 4:
{
Util.FileSize("InputCompress");
Util.ViewFile("InputCompress");
}

break;
case 5:
{
Util.FileSize("InputUncompress");
Util.ViewFile("InputUncompress");
}

break;
case 6:
{
Util.FileCopy("Input");
}
break;
case 7:
{
Util.Time();
}
break;
case 8:
exit(1);
default:
printf("输入有错,请重新输入!!!");
}


for(int i = 0; i < 1000000000; i++);
system("cls");
MainView();

}

ViewText.cpp

#include <iostream>
#include <Heap.h>

using namespace std;

int main(){
MyStack<int> s;

for(int i = 0; i < 10; i++){
s._Push(i+ i%2);
}

while(!s._isEmpty()){
cout << s._Pop() <<endl;
}

return;
}

原文地址:https://www.cnblogs.com/854594834-YT/p/10166877.html