哈夫曼编码的实现

　　c语言可以说是一门让人既爱又恨的语言，其效率是众所周知的，但其出现的bug也是大名鼎鼎。对于高手来说，它是多么优雅，可是对门外汉而言，可以掐着指头一年一年数着啥时候能入门。或许很多人不同意这句话，可在愚钝的我身上是适得其所。最近学习c语言，通过实现一些数据结构练手，为一些bug恼火过，也为解决一些bug而拍案而起。没怎么写过博客，写得不好敬请大家谅解。下面分享一下哈夫曼编码过程。最后附上代码。

　　首先声明代码有bug，当输入的字符串只含一种字符时发生异常。本来想修正这个问题的，但这两天写累了，想歇歇。如果有高手其中不足，小的不胜感激。废话不多说，步入正题吧。

　　哈夫曼编码思路比较明了，就是每次根据最小的两个节点（每次找到最小权值节点时都要马上删掉该节点）生成新的根节点，根节点的权重是这两个节点权重之和，然后把新生成的根节点放进节点堆里，待下一轮筛选选择最小值，直到节点堆里只剩下一个节点，也就是哈夫曼树的树根啦。我的实现思路是，为输入的每一种字符统计其出现的次数作为权重做成链表，然后根据链表生成哈夫曼树，接着根据构造好的哈夫曼树进行编码。到这里就可以设计数据结构了，数据结构设计如下：

View Code

 1 typedef struct tNode {
 2     char ch;
 3     int weight;
 4     struct tNode *left;//左孩子
 5     struct tNode *right;//右孩子
 6 } tnode;
 7 
 8 typedef struct cNode {
 9     char ch;
10     int count;
11     struct cNode *next;
12     tnode *tree;//指向当前节点的左子树的跟节点
13 } clist;
14 
15 typedef struct endeNode {
16     char ch;
17     char code[20];
18     struct endeNode *next;
19 } codelist;

　　接下来一步就是对输入字符进行统计，构建单链表：

View Code

 1 /**************************************************
 2 Description:将字符添加进链表里（出现相同字符则不用新建节点，仅仅更改该字符出现的次数即可）
 3 **************************************************/
 4 void
 5 pull_in_list(char in_string[])
 6 {
 7     int i;
 8     clist **link;
 9     clist *current;
10     clist *new_node;
11     char ch;
12     int exist;//判断字符是否已经存在，0表示不存在，1表示存在
13 
14     char_count = strlen(in_string);
15 
16     for(i = 0; (ch = in_string[i]) != '\0'; i += 1)
17     {    
18         char_mount += 1;
19         exist = 0;
20         link = &clistp;
21         while((current = *link) != NULL)
22         {
23             link = &current->next;
24             if(current->ch == ch)
25             {
26                 current->count += 1;
27                 exist = 1;
28                 continue;
29             }
30         }
31         
32         //in_string += 1;
33         if(exist == 1)
34         {
35             continue;
36         }
37 
38         new_node = (clist *)malloc(sizeof(clist));
39         assert(new_node != NULL);
40         new_node->ch = ch;
41         new_node->count = 1;
42         new_node->next = NULL;
43         new_node->tree = NULL;
44 
45         *link = new_node;
46     }
47 
48     current = clistp;
49     char_num = count_rest_list();
50     printf("各个字符出现的次数为：\n");
51     while(current != NULL)
52     {
53         printf("%c: %d 次\t", current->ch, current->count);
54         current = current->next;
55     }
56     printf("\n");
57 }

　　好了，既然链表已经创建完成，是时候构建哈夫曼树了：

View Code

/**************************************************
Description:构建哈夫曼树
**************************************************/
void
create_huffman_tree()
{
    int min1;//链表最小值
    int min2;//链表次小值
    clist *current;//指向链表节点的指针
    clist *pre;//最小节点的前一节点
    clist *min1_l;
    clist *min2_l;
    clist **link;
    clist *temp_pre;

    clist *new_lnode;
    tnode *new_tnode;
    tnode *left;
    tnode *right;

    while(count_rest_list() >= 2)
    {        
        current = clistp;
        min1 = clistp->count;
        min1_l = clistp;
        while(current != NULL)
        {
            if(min1 > current->count)
            {
                min1 = current->count;
                temp_pre = pre;
                min1_l = pre->next;
            }
            pre = current;
            current = current->next;
        }
        //删除最小值节点
        if(min1_l == clistp)
        {
            clistp = min1_l->next;
        }
        else
        {
            temp_pre->next = min1_l->next;
        }

        current = clistp;
        min2 = clistp->count;
        min2_l = clistp;
        while(current != NULL)
        {
            if(min2 > current->count)
            {
                min2 = current->count;
                temp_pre = pre;
                min2_l = pre->next;
            }
            pre = current;
            current = current->next;
        }
        //删除次小值节点
        if(min2_l == clistp)
        {
            clistp = min2_l->next;    
        }
        else
        {
            temp_pre->next = min2_l->next;
        }
        
        //左子树
        if(min1_l->tree == NULL)
        {
            left = (tnode *)malloc(sizeof(tnode));
            assert(left != NULL);
            left->ch = min1_l->ch;
            left->left = NULL;
            left->right = NULL;
            left->weight = min1_l->count;
        }
        else
        {
            left = min1_l->tree;
        }

        //右子树
        if(min2_l->tree == NULL)
        {
            right = (tnode *)malloc(sizeof(tnode));
            assert(right != NULL);
            right->ch = min2_l->ch;
            right->left = NULL;
            right->right = NULL;
            right->weight = min2_l->count;
        }
        else
        {
            right = min2_l->tree;
        }

        //生成新的树根节点
        new_tnode = (tnode *)malloc(sizeof(tnode));
        assert(new_tnode != NULL);
        new_tnode->ch = ' ';
        new_tnode->left = left;
        new_tnode->right = right;
        new_tnode->weight = left->weight + right->weight;

        //生成新的链表节点
        new_lnode = (clist *)malloc(sizeof(clist));
        assert(new_lnode != NULL);
        new_lnode->ch = new_tnode->ch;
        new_lnode->count = new_tnode->weight;
        new_lnode->next = NULL;
        new_lnode->tree = new_tnode;

        //将新生成的链表节点添加到链表中
        link = &clistp;
        while((current = *link) != NULL)
        {
            link = &current->next;
        }
        *link = new_lnode;
        free(min1_l);
        free(min2_l);
    }
    huff_tree = new_tnode;
}

　　万事俱备，只欠东风啦，最后一部当然是根据哈夫曼树进行哈夫曼编码了：

View Code

/**************************************************
Description:实现哈夫曼编码(递归遍历树)
**************************************************/
void
encoding(tnode *bt)
{
    codelist *new_node;
    codelist *current;
    codelist **link;

    if(bt->left == NULL && bt->right == NULL)
    {
        int i;
        new_node = (codelist *)malloc(sizeof(codelist));
        assert(new_node != NULL);
        new_node->ch = bt->ch;
        for(i = 0; i < strlen(stack); i += 1)
        {
            new_node->code[i] = stack[i];
        }
        new_node->code[i] = '\0';
        new_node ->next = NULL;

        //将新生成的编码节点添加进链表中
        link = &head;
        while((current = *link) != NULL)
        {
            link = &current->next;
        }
        *link = new_node;
        //init_stack();
    }
    else
    {
        if(bt->left != NULL)
        {
            stack[++top] = '0';
            encoding(bt->left);
            stack[top] = '\0';
            top -= 1;
        }
        if(bt->right != NULL)
        {
            stack[++top] = '1';
            encoding(bt->right);
            stack[top] = '\0';
            top -= 1;
        }
    }
}

　　刚接触时间不长，很多问题都欠缺考虑，欢迎各位大神指正，谢谢。下面附上源码

　　https://files.cnblogs.com/hwzhong20/HuffmanCoding.rar