数据结构 最优字符串编码 (哈夫曼编码)

Description

基于任给一串大写英文字母序列(例如MNOPPPOPMMPOPOPPOPNP),编程实现求解一套二进制编码,使得上述正文的编码最短。

Input

有多组输入数据,每组一串字符串,每个字符串长度不超过1000且只包含大写英文字母。

Output

每组数据输出两行,第一行输出组数,接下来每行输出一个字母的编码,满足字典序小的字母的编码字典序也尽量小,下一行输出编码后串的长度, 若长度小于50,输出编码后的字符串,格式见样例。

Sample Input

ABC

Sample Output

Case #1:
A: 0
B: 10
C: 11
5 01011

HINT

考察知识点:哈夫曼树, 时间复杂度O(nlogn),空间复杂度O(n),好多人都过不了,数据已经减少了,大家可以试试。


Append Code

析:就是先建树,再遍历树,然后去求每个叶子结点的哈夫曼编码。建树可以用优先队列,优先频率高的和字典序大的,在遍历时,向左就加0,向右就加1,

到最后结点就存储起来。

代码如下:

#pragma comment(linker, "/STACK:1024000000,1024000000")
#include <cstdio>
#include <string>
#include <cstdlib>
#include <cmath>
#include <iostream>
#include <cstring>
#include <set>
#include <queue>
#include <algorithm>
#include <vector>
#include <map>
#include <cctype>
#include <cmath>
#include <stack>
//#include <tr1/unordered_map>
#define freopenr freopen("in.txt", "r", stdin)
#define freopenw freopen("out.txt", "w", stdout)
using namespace std;
//using namespace std :: tr1;
 
typedef long long LL;
typedef pair<int, int> P;
const int INF = 0x3f3f3f3f;
const double inf = 0x3f3f3f3f3f3f;
const LL LNF = 0x3f3f3f3f3f3f;
const double PI = acos(-1.0);
const double eps = 1e-8;
const int maxn = 1000 + 5;
const LL mod = 10000000000007;
const int N = 1e6 + 5;
const int dr[] = {-1, 0, 1, 0, 1, 1, -1, -1};
const int dc[] = {0, 1, 0, -1, 1, -1, 1, -1};
const int hr[]= {-2, -2, -1, -1, 1, 1, 2, 2};
const int hc[]= {-1, 1, -2, 2, -2, 2, -1, 1};
const char *Hex[] = {"0000", "0001", "0010", "0011", "0100", "0101", "0110", "0111", "1000", "1001", "1010", "1011", "1100", "1101", "1110", "1111"};
inline LL gcd(LL a, LL b){  return b == 0 ? a : gcd(b, a%b); }
int n, m;
const int mon[] = {0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31};
const int monn[] = {0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31};
inline int Min(int a, int b){ return a < b ? a : b; }
inline int Max(int a, int b){ return a > b ? a : b; }
inline LL Min(LL a, LL b){ return a < b ? a : b; }
inline LL Max(LL a, LL b){ return a > b ? a : b; }
inline bool is_in(int r, int c){
    return r >= 0 && r < n && c >= 0 && c < m;
 
}
struct Node{
    int id, num, ch;
    Node *lchild, *rchild;
    bool operator < (const Node &p) const{
        return num > p.num || (num == p.num && ch < p.ch);
    }
};
Node a[120];
char s[maxn];
int num[30];
int cnt;
bool ok;
string str[30];
 
void solve(){
    for(int i = 0; i < 26; ++i)  if(num[i]){
        a[cnt].ch = i;
        a[cnt].id = cnt;
        a[cnt++].num = num[i];
    }
    priority_queue<Node> pq;
    for(int i = 0; i < cnt; ++i)  pq.push(a[i]);
    if(cnt == 1) ok = true;
    while(pq.size() > 1){
        int id1 = pq.top().id, num1 = pq.top().num, ch1 = pq.top().ch; pq.pop();
        int id2 = pq.top().id, num2 = pq.top().num, ch2 = pq.top().ch; pq.pop();
        if(num1 == num2 && ch1 > ch2){
            a[cnt].ch = ch2;
            a[cnt].num = num1 + num2;
            a[cnt].id = cnt;
            a[cnt].lchild = &a[id2];
            a[cnt].rchild = &a[id1];
        }
        else{
            a[cnt].ch = ch1;
            a[cnt].num = num1 + num2;
            a[cnt].id = cnt;
            a[cnt].lchild = &a[id1];
            a[cnt].rchild = &a[id2];
        }
        pq.push(a[cnt]);
        ++cnt;
    }
}
 
void dfs(Node *p, string s){
    if(p->lchild == 0 && p->rchild == 0) str[p->ch] = s;
    if(p->lchild != 0)  dfs(p->lchild, s + "0");
    if(p->rchild != 0) dfs(p->rchild, s + "1");
}
 
int main(){
    int kase = 0;
    while(scanf("%s", s) == 1){
        printf("Case #%d:
", ++kase);
        for(int i = 0; i < 120; ++i){
            a[i].id = a[i].num = 0;
            a[i].lchild = a[i].rchild = 0;
            if(i < 30) str[i].clear();
        }
        n = strlen(s);
        memset(num, 0, sizeof num);
        for(int i = 0; i < n; ++i){
            ++num[s[i]-'A'];
        }
        cnt = 0;
        ok = false;
        solve();
        if(ok)  str[s[0]-'A'] = "0";
        else  dfs(&a[cnt-1], "");
        for(int i = 0; i < 26; ++i)
            if(str[i] != "")  printf("%c: %s
", i+'A', str[i].c_str());
        string ans;
        for(int i = 0; i < n; ++i)
            ans += str[s[i]-'A'];
        printf("%d", ans.size());
        if(ans.size() < 50)  printf(" %s", ans.c_str());
        printf("
");
    }
    return 0;
}
原文地址:https://www.cnblogs.com/dwtfukgv/p/5990690.html