LZW压缩算法

LZW编码

LZW算法和LZ78算法在编码方式上的不同:

  • 输出只包含码字,其目的是克服发送每段最后一个未压缩字符造成的低效率。
  • LZW只输出代表词典中的缀-符串的码字。这就意味着开始时词典不能为空。开始状态,词典中必须包含可能在字符流中出现的所有的单个字符,把这些单个字符称为前缀根。

LZW编码算法的步骤:

步骤1: 开始时的词典包含所有可能的根(Root),当前前缀P为空;

步骤2: 当前字符(Char) :=字符流中的下一个字符;

步骤3: 判断缀-符串P+ Char是否在词典中

(1) 如果"是":P := P+ Char // (用Char扩展P) ;

(2) 如果"否":① 把代表当前前缀P的码字输出到码字流;

② 把缀-符串P+ Char添加到词典;

③ 令P := Char //(现在P仅包含一个字符Char);

步骤4:判断字符流中是否还有字符需要编码

(1) 如果"是",就返回到步骤2;

(2) 如果"否"

① 把代表当前前缀P的码字输出到码字流;

② 结束。

LZW译码过程:

开始时的词典包含所有可能的根(Root);

步骤1:当前码字(cW) :=码字流中的待解码的下一个码字;

查词典,输出Dictionary [cW];

步骤2:先前码字(pW):=当前码字(cW);

步骤3:判断码字流中是否还有需要解码的码字,

(1) 如果"是":当前码字(cW) :=码字流中的待解码的下一个码字;

判断Dictionary [cW]是否为空(即词典中是否已经有序号为cW的项)

(a)如果"是":

①查词典,输出Dictionary [cW];

②prefix:=Dictionary [pW] ;

③Char:=first Character of

Dictionary[cW] ;

④添加词典项Prefix+char;

⑤pW:=cW;

(b)如果"否":

①prefix:=Dictionary [pW] ;

②Char:=first Character of prefix;

③添加词典项Prefix+char;

④查词典,输出Dictionary [cW];

⑤pW:=cW;

重复步骤3;

算法实现(C#)

  1. /// <summary>
  2. /// 编码器类
  3. /// </summary>
  4. public static class Encoder
  5. {
  6.     /// <summary>
  7.     /// 词典
  8.     /// </summary>
  9.     static List<Dictionary> D = new List<Dictionary>();
  10.  
  11.     /// <summary>
  12.     /// 在词典中查找相应串
  13.     /// </summary>
  14.     /// <param name="item"></param>
  15.     /// <param name="D"></param>
  16.     /// <returns></returns>
  17.     static bool Find(string item, List<Dictionary> D)
  18.     {
  19.         foreach (Dictionary d in D)
  20.             if (d.content == item)
  21.                 return true;
  22.         return false;
  23.     }
  24.  
  25.     /// <summary>
  26.     /// 将一个条目加入词典
  27.     /// </summary>
  28.     /// <param name="item"></param>
  29.     /// <param name="D"></param>
  30.     static void AddToDic(string item, List<Dictionary> D)
  31.     {
  32.         int maxID;
  33.         if (D.Count == 0)
  34.             maxID = 0;
  35.         else
  36.             maxID = D.Last().id;
  37.  
  38.         D.Add(new Dictionary(maxID + 1, item));
  39.     }
  40.  
  41.     /// <summary>
  42.     /// 初始化词典
  43.     /// </summary>
  44.     /// <param name="str"></param>
  45.     public static void InitializeDictionary(string str) /*由于LZW算法必须有一个初始词典,因此在这里将词典初始化*/
  46.     {
  47.         foreach (char c in str)
  48.         {
  49.             if (!Find(c.ToString(), D))
  50.             {
  51.                 AddToDic(c.ToString(), D);
  52.             }
  53.         }
  54.     }
  55.  
  56.     /// <summary>
  57.     /// 根据词典条目内容查找相应编号
  58.     /// </summary>
  59.     /// <param name="item"></param>
  60.     /// <param name="D"></param>
  61.     /// <returns></returns>
  62.     static int GetDicID(string item, List<Dictionary> D)
  63.     {
  64.         foreach (Dictionary d in D)
  65.             if (d.content == item)
  66.                 return d.id;
  67.         return 0;
  68.     }
  69.  
  70.     /// <summary>
  71.     /// 执行LZW编码算法
  72.     /// </summary>
  73.     /// <param name="str"></param>
  74.     public static void Execute(string str)
  75.     {
  76.         char CHAR = (char)0;
  77.         string P = string.Empty;
  78.  
  79.         InitializeDictionary(str);
  80.  
  81.         foreach (char c in str)
  82.         {
  83.             CHAR = c;
  84.             if (Find((P+CHAR.ToString()), D))
  85.             {
  86.                 P += CHAR;
  87.             }
  88.             else
  89.             {
  90.                 Console.Write("{0},", GetDicID(P,D));
  91.                 AddToDic(P + CHAR, D);
  92.                 P = CHAR.ToString();
  93.             }
  94.         }
  95.         Console.Write("{0}", GetDicID(P, D));
  96.         Console.WriteLine();
  97.     }
  98. }
  1. /// <summary>
  2. /// 解码器类
  3. /// </summary>
  4. public static class Decoder
  5. {
  6.     /// <summary>
  7.     /// 词典
  8.     /// </summary>
  9.     static List<Dictionary> D = new List<Dictionary>();
  10.  
  11.     /// <summary>
  12.     /// 初始化词典,解码算法中需手工输入
  13.     /// </summary>
  14.     public static void GetDictionary() //解码算法需手工输入初始词典
  15.     {
  16.         ShowInputHelp();
  17.         while (true)
  18.         {
  19.             string content = Console.ReadLine();
  20.             string[] items = content.Split(' ');
  21.             if (Convert.ToInt32(items[0]) != 0)
  22.             {
  23.                 int id = Convert.ToInt32(items[0]);
  24.                 string ch = items[1];
  25.                 D.Add(new Dictionary(id, ch));
  26.             }
  27.             else break;
  28.         }
  29.     }
  30.  
  31.     /// <summary>
  32.     /// 输入方法说明
  33.     /// </summary>
  34.     static void ShowInputHelp()
  35.     {
  36.         Console.WriteLine("请输入初始词典:");
  37.         Console.WriteLine("格式:");
  38.         Console.WriteLine("每行输入一个条目,编号与字符串之间以逗号分隔,以0结束");
  39.     }
  40.  
  41.     /// <summary>
  42.     /// 根据词典序号找出词典内容
  43.     /// </summary>
  44.     /// <param name="id"></param>
  45.     /// <param name="D"></param>
  46.     /// <returns></returns>
  47.     static string GetContext(int id, List<Dictionary> D)
  48.     {
  49.         foreach (Dictionary d in D)
  50.         {
  51.             if (d.id == id)
  52.                 return d.content;
  53.         }
  54.         return string.Empty;
  55.     }
  56.  
  57.     /// <summary>
  58.     /// 将一个条目加入词典
  59.     /// </summary>
  60.     /// <param name="item"></param>
  61.     /// <param name="D"></param>
  62.     static void AddToDic(string item, List<Dictionary> D)
  63.     {
  64.         int maxID;
  65.         if (D.Count == 0)
  66.             maxID = 0;
  67.         else
  68.             maxID = D.Last().id;
  69.  
  70.         D.Add(new Dictionary(maxID + 1, item));
  71.     }
  72.  
  73.     /// <summary>
  74.     /// 执行LZW解码算法
  75.     /// </summary>
  76.     /// <param name="str"></param>
  77.     public static void Execute(string str)
  78.     {
  79.         string[] codeStream = str.Split(',');
  80.         int cW = 0, pW = 0;
  81.         char CHAR = (char)0;
  82.         string prefix = string.Empty;
  83.         int count = 1;
  84.  
  85.         foreach (string code in codeStream)
  86.         {
  87.             if (count == 1)
  88.             {
  89.                 cW = Convert.ToInt32(code);
  90.                 Console.Write(GetContext(cW, D));
  91.                 pW = cW;
  92.                 count++;
  93.                 continue;
  94.             }
  95.             if (count > 1)
  96.             {
  97.                 cW = Convert.ToInt32(code);
  98.                 if (GetContext(cW, D) != string.Empty)
  99.                 {
  100.                     Console.Write(GetContext(cW, D));
  101.                     prefix = GetContext(pW, D);
  102.                     CHAR = GetContext(cW, D).First();
  103.                     AddToDic(prefix + CHAR, D);
  104.                     pW = cW;
  105.                 }
  106.                 else
  107.                 {
  108.                     prefix = GetContext(pW, D);
  109.                     CHAR = prefix.First();
  110.                     AddToDic(prefix + CHAR, D);
  111.                     Console.Write(GetContext(cW, D));
  112.                     pW = cW;
  113.                 }
  114.             }
  115.         }
  116.         Console.WriteLine();
  117.     }
  118. }

主函数代码省略,执行效果如下:

本例中要编码的字符串有14个字符,压缩后的编码有10个数字,按实际字节算,压缩率为65%。

源代码下载:http://files.cnblogs.com/ryuasuka/LZW.rar

原文地址:https://www.cnblogs.com/ryuasuka/p/3149403.html