哈夫曼(huffman )的编码

#pragma once
#include "stdafx.h"
#include "BinTree.h"
#include "BitsOper.h"
namespace Huffman
{
    using namespace std;
    std::map<std::string, int> mp;
    using line = std::vector<string>;
    using vctLine = std::vector<line>;
    using char_int = std::pair<char, int>;
    std::map<char, string> ms;
    std::map<char, string> codeMap;

    template<typename T>
    struct HuffTree
    {
        enum ChildState {
            LeftChild = 0,
            RightChild = 1,
            Root = 2    //特点 父节点为空
        };
        T data{ T() };    // 0 或者1
        HuffTree* Left{ nullptr };
        HuffTree* Right{ nullptr };
        HuffTree* Pairent{ nullptr };

        ChildState localState;
        void InsertLeft(HuffTree* ht)
        {
            Left = ht;
        }
        void InsertRight(HuffTree* ht)
        {
            Right = ht;
        }
        void Free()
        {
            //if (Left != nullptr) FreeHuffChild(Left);
            //if (Right != nullptr) FreeHuffChild(Right);
            FreeHuffTree(Left);
            FreeHuffTree(Right);
        }
        void FreeHuffTree(HuffTree* tree)
        {
            if (tree->Left)
            {
                FreeHuffTree(tree->Left);
                //delete tree->Left;
                tree->Left = nullptr;
            }
            if (tree->Right)
            {
                FreeHuffTree(tree->Right);
                //delete tree->Right;
                tree->Right = nullptr;
            }
            delete tree;
        }

        std::vector<T> ToList()
        {
            std::vector<T> vct;
            PushNodeToVct(vct, this);
            return vct;
        }
        //Breadth-First Search


        int ndepth = 0;
        string ss = "";
        void PushNodeToVct(std::vector<T>& vct, HuffTree* hu)
        {
            vct.push_back(hu->data);
            cout << "(" << hu->data.first << "," << hu->data.second << ")";
            //cout << (int)hu->data.second;
            ss += std::to_string(hu->data.second);
            if (hu->Left != nullptr)
            {
                ss += "->";
                cout << "->";
                PushNodeToVct(vct, hu->Left);
                ndepth++;
            }
            else {
                ndepth--;
                //ss.substr(0, ss.size() - 3);
                cout << endl;
                //return;
            }
            if (hu->Right != nullptr)
            {
                //cout << hu->data.first << endl;
                ss += "->";
                cout << "->";
                PushNodeToVct(vct, hu->Right);
                ndepth++;
            }
            else {
                //ss.substr(0, ss.size());
                cout << "
";
                //ss += "
";
                //cout << ss;
            }
            ndepth++;
        }
    };
    using HInfoT = HuffTree<char_int>;
    HInfoT* huffTree = nullptr;    //构造结果

    int BinaryStringToInt(string s)
    {
        uint64_t val = 0;
        for (int i = 0; i < s.size(); i++)
        {
            int asd = s[i] == '1' ? 1 : 0;
            val += (asd << s.size() - 1 - i);
        }
        return val;
    }
    std::string ToBinaryString(uint8_t byte)
    {
        string s = "";
        //
        for (int i = 7; i >= 0; i--)
        {
            uint8_t b = 1 << i;
            uint8_t bb = byte & b;
            if (bb == 0)
            {
                s += "0";
            }
            else {
                s += "1";
            }
        }
        return s;
    }
    std::string DecodeHuffman(std::vector<unsigned char> buff)
    {
        string bits = "";
        for (int i = 0; i < buff.size(); i++)
        {
            bits += ToBinaryString(buff[i]);
        }
        std::string rs = "";
        HInfoT* pCur = huffTree;
        for (int i = 0; i < bits.length(); i++)
        {
            if (bits[i] == '0')
            {
                if (pCur->Left != nullptr) {
                    pCur = pCur->Left;
                }
                else {
                    rs.push_back(pCur->data.first);
                    pCur = huffTree->Left;
                }
            }
            if (bits[i] == '1')
            {
                if (pCur->Right != nullptr)
                {
                    pCur = pCur->Right;
                }
                else {
                    rs.push_back(pCur->data.first);
                    pCur = huffTree->Right;
                }
            }
        }
        return rs;
    }

    void GenerateHuffCode(line& sr, HInfoT* hit, string& str)
    {
        if (hit->Left != nullptr)
        {
            str += "1";
            ms[hit->Left->data.first] = str;
            GenerateHuffCode(sr, hit->Left, str);
        }
        else {
            string c = str;
            c.erase(c.end() - 1);
            sr.push_back(c);
            return;
        }

        if (hit->Right != nullptr)
        {
            str += "0";
            ms[hit->Right->data.first] = str;
            GenerateHuffCode(sr, hit->Right, str);
        }
        //左子节点一定会有 right node 不一定有
        else {
            //由于先遍历左子节点
            string c = str;
            c.erase(c.end() - 1);
            sr.push_back(c);
        }
        //FindTree(sr, tree);
    }
    void GetCode(HInfoT* node, string& s)
    {
        if (node->localState == HInfoT::ChildState::LeftChild)
        {
            s += "0";
            if (node->Pairent != NULL)
            {
                GetCode(node->Pairent, s);
            }
        }
        if (node->localState == HInfoT::ChildState::RightChild)
        {
            s += "1";
            if (node->Pairent != NULL)
            {
                GetCode(node->Pairent, s);
            }
        }
        if (node->localState == HInfoT::ChildState::Root)
        {
            //s += "r";
        }
    }
    
    vector<unsigned char>  EncodeHuffman(std::string str)
    {
        using HInfoT = HuffTree<char_int>;
        std::map<char, int> mp;
        for (int i = 0; i < str.length(); i++)
        {
            if (mp.find(str[i]) == mp.end())
            {
                mp.insert(std::pair<char, int>(str[i], 1));
            }
            else {
                mp[str[i]] ++;
            }
        }
        std::deque<char_int> dqLst;
        for (auto it : mp)
        {
            dqLst.push_back(it);
            cout << it.first << endl;
        }

        std::function<bool(char_int&, char_int&)> asc =
            [](char_int& lhs, char_int& rhs) {
            return lhs.second < rhs.second;
        };
        std::sort(dqLst.begin(), dqLst.end(), asc);

        //当前概率 + 当前所指向的元素
        HInfoT* root = nullptr;// 左节点为 0 右节点1
        HInfoT* hinf = nullptr;
        std::deque<HInfoT*> dqNodes;
        std::function<bool(HInfoT*, HInfoT*)> ascNode =
            [](HInfoT* l, HInfoT* r) {
            return l->data.second < r->data.second;
        };

        for (int i = 0; i < dqLst.size(); i++)
        {
            HInfoT* h = new HInfoT;
            //char
            h->data = dqLst[i];
            h->data.first = dqLst[i].first;
            //概率
            h->data.second = dqLst[i].second;
            dqNodes.push_back(h);
            cout << "(" << h->data.first << "," << h->data.second << ")" << endl;
        }
        int pos;
        char ch;
        while (dqNodes.size() > 2)
        {
            hinf = new HInfoT();
            root = hinf;

            hinf->data.second = dqNodes[0]->data.second + dqNodes[1]->data.second;
            hinf->data.first = dqNodes[0]->data.first;
            int nLeft = dqNodes[0]->data.second < dqNodes[1]->data.second ? 0 : 1;
            int nRight = nLeft == 0 ? 1 : 0;
            hinf->Left = dqNodes[nLeft];
            dqNodes[nLeft]->Pairent = hinf;
            dqNodes[nLeft]->localState = HInfoT::LeftChild;

            dqNodes[nRight]->Pairent = hinf;
            dqNodes[nRight]->localState = HInfoT::RightChild;
            ch = hinf->Left->data.first;

            hinf->Right = dqNodes[nRight];
            //dqNodes.erase(dqNodes.begin());
            //dqNodes.erase(dqNodes.begin() + 1);
            dqNodes.pop_front();
            dqNodes.pop_front();
            dqNodes.push_back(hinf);
            std::sort(dqNodes.begin(), dqNodes.end(), ascNode);

        }
        int nLeft = dqNodes[0]->data.second < dqNodes[1]->data.second ? 0 : 1;
        int nRight = nLeft == 0 ? 1 : 0;
        root = new HInfoT();
        root->data.second = hinf->data.second + hinf->data.second;
        root->data.first = hinf->data.first;
        root->Left = dqNodes[nLeft];
        root->Right = dqNodes[nRight];
        root->localState = HInfoT::Root;
        dqNodes[nLeft]->Pairent = root;
        dqNodes[nLeft]->localState = HInfoT::ChildState::LeftChild;
        dqNodes[nRight]->Pairent = root;
        dqNodes[nRight]->localState = HInfoT::ChildState::RightChild;
        std::vector<HInfoT*> vn;
        std::function<void(std::vector<HInfoT*>*)> bfsFunc = [root](std::vector<HInfoT*>* vhint) {

            deque<HInfoT*> tmpStack;
            HInfoT* r = root;
            tmpStack.push_back(root);
            vhint->push_back(root);
            while (tmpStack.size() > 0)
            {
                r = tmpStack.front();
                tmpStack.pop_front();
                if (r->Left != nullptr)
                {
                    tmpStack.push_back(r->Left);
                    vhint->push_back(r->Left);
                }
                if (r->Right != nullptr)
                {
                    tmpStack.push_back(r->Right);
                    vhint->push_back(r->Right);
                }
            }

        };
        bfsFunc(&vn);
        string codeNodeStr;
        for (auto v : vn)
        {
            codeNodeStr.clear();
            //只记录底层节点的码,也就是真实的码
            if (v->Left == nullptr && v->Right == nullptr)
            {
                GetCode(v, codeNodeStr);
                std::reverse(codeNodeStr.begin(), codeNodeStr.end());
                codeMap[v->data.first] = codeNodeStr;
            }
        }
        cout << endl;
        for (auto vn : codeMap)
        {
            cout << "[" << vn.first << ":" << vn.second << "]";
        }
        cout << "codeMap" << endl;
        //获取编码表
        auto vct = root->ToList();
        for (auto v : vct)
        {
            std::cout << v.first << ":" << v.second << endl;
        }
        //生成Huffman表
        //小的为0 大的为1
        const int leftVal = 0;
        const int rightVal = 1;
        line vl;
        vl.push_back(string());
        string s = "";
        //GenerateHuffCode(vl, root, s);
        //for (int i = 0; i < vl.size(); i++)
        //{
        //    //cout << vl[i] << endl;
        //    string stt = "";
        //    for (int j = 0; j < vl[i].size(); j++)
        //    {
        //        if (j % 3 == 0)
        //        {
        //            stt += ",";
        //        }
        //        stt += vl[i][j];
        //    }
        //    cout << stt << endl;
        //}
        //cout << "---------------------" << endl;/* << pr.second */


        //std::string encoderStr;
        //char buff[2048];
        std::string sres;
        deque<char> sdq;    //用来存储字符串
        deque<string> dq;    //编码替换  需要换成每8个Bit为1字节
        deque<string> stringRes;    //最终结果 8 bit 为一个string
        vector<unsigned char> ucRes;    //字节表
        //std::copy(sdq.begin(), sdq.end(), str.begin());
        for (int i = 0; i < str.size(); i++)
        {
            sdq.push_back(str[i]);
        }
        unsigned char uch;
        while (sdq.size() > 0)
        {
            //存储010字符串
            dq.push_back(codeMap[sdq.front()]);
            sdq.pop_front();
            //buff = ms[str[i]];
        }
        int i = 0;
        string sChar;    //到8位就下一个
        s = "";
        for (int i = 0; i < dq.size(); i++)
        {
            s += dq[i];
        }
        int offset = 0;
        for (int i = 0; i < s.size(); i++)
        {
            //这里可以优化
            if (i % 8 == 0 && i != 0)
            {
                string tmp;
                tmp.resize(8);
                std::copy(s.begin() + i - 8, s.begin() + i, tmp.begin());
                stringRes.push_back(tmp);
                offset += 8;
            }
        }
        if (offset < s.size())
        {
            string tmp;
            tmp.resize(s.size() - offset);
            std::copy(s.begin() + offset, s.end(), tmp.begin());
            while (tmp.size() < 8) {
                tmp.push_back('0');
            }
            stringRes.push_back(tmp);
        }

        for (int i = 0; i < stringRes.size(); i++)
        {
            ucRes.push_back(BinaryStringToInt(stringRes[i]));
            //cout << ucRes[i] << endl;
        }
        int m = ucRes.size();
        cout << "uc res" << m << endl;
        huffTree = root;
        return ucRes;
    }
    
}

 构造过程:     1.先统计各个字符的出现概率

         2.找出最小的两个作为一个节点,该节点的概率为两个子节点的概率之和,继续排序,

         3.重复上一步骤,知道剩下的节点为1

         4.约定左子节点和右子节点 为0 或者1

其中还有一个类似的编码类似,算术编码,算术编码的过程是要把概率用整数表示。构造完成之后然后用对应的节点替换,节点表示(从根节点到节点)

解码过程,使用遍历,遇到0然后指向左子节点(按照约定),遇到1(指向右子节点),然后指向下一节点。如果为空,则把该子节点所表示的字符打印。

然后从根节点开始。

  中间huffman树是可以按位操作的,每个节点表示0或者1 占据大小为1bit,所以在替换环节,用bit oper 或者bit vector来处理。读取的时候也一样

原文地址:https://www.cnblogs.com/yang131/p/13857953.html