经典的LZW算法C语言实现

/* Basic LZW Data Compression program published in DDJ October 1989 issue.
*
*
* Added: - Method to clear table when compression ratio degrades
*     - Self adjusting code size capability (up to 14 bits)
* Updated functions are marked with "MODIFIED". main() has been updated also
* Compile with -ml (large model) for MAX_BITS == 14 only
*
*
*/

#include <stdio.h>
#include <alloc.h>

#define INIT_BITS 9
#define MAX_BITS 14       /* Do not exceed 14 with this program */
#define HASHING_SHIFT MAX_BITS - 8

#if MAX_BITS == 14         /* Set the table size. Must be a prime   */
#define TABLE_SIZE 18041     /* number somewhat larger than 2^MAX_BITS.*/
#elif MAX_BITS == 13
#define TABLE_SIZE 9029
#else
#define TABLE_SIZE 5021
#endif

#define CLEAR_TABLE 256   /* Code to flush the string table */
#define TERMINATOR 257   /* To mark EOF Condition, instead of MAX_VALUE */
#define FIRST_CODE 258   /* First available code for code_value table */
#define CHECK_TIME 100   /* Check comp ratio every CHECK_TIME chars input */

#define MAXVAL(n) (( 1 <<( n )) -1)   /* max_value formula macro */

unsigned input_code();
void *malloc();

int *code_value;               /* This is the code value array */
unsigned int *prefix_code;         /* This array holds the prefix codes */
unsigned char *append_character;     /* This array holds the appended chars */
unsigned char decode_stack[4000];   /* This array holds the decoded string */

int num_bits=INIT_BITS;           /* Starting with 9 bit codes */
unsigned long bytes_in=0,bytes_out=0; /* Used to monitor compression ratio */
int max_code;                 /* old MAX_CODE */
unsigned long checkpoint=CHECK_TIME; /* For compression ratio monitoring */

main(int argc, char *argv[])
{
  FILE *input_file, *output_file, *lzw_file;
  char input_file_name[81];
/* The three buffers for the compression phase. */
  code_value=malloc(TABLE_SIZE*sizeof(unsigned int));
  prefix_code=malloc(TABLE_SIZE*sizeof(unsigned int));
  append_character=malloc(TABLE_SIZE*sizeof(unsigned char));

  if (code_value==NULL || prefix_code==NULL || append_character==NULL) {
    printf("Error allocating table space!\n");
    exit(1);
  }
/* Get the file name, open it, and open the LZW output file. */
  if (argc>1)
    strcpy(input_file_name,argv[1]);
  else {
    printf("Input file name: ");
    scanf("%s",input_file_name);
  }
  input_file=fopen(input_file_name,"rb");
  lzw_file=fopen("test.lzw","wb");
  if (input_file == NULL || lzw_file == NULL) {
    printf("Error opening files\n");
    exit(1);
  }
  max_code = MAXVAL(num_bits);   /* Initialize max_value & max_code */
  compress(input_file,lzw_file);     /* Call compression routine */

  fclose(input_file);
  fclose(lzw_file);
  free(code_value);             /* Needed only for compression */

  lzw_file=fopen("test.lzw","rb");
  output_file=fopen("test.out","wb");
  if (lzw_file == NULL || output_file == NULL) {
    printf("Error opening files\n");
    exit(1);
  }
  num_bits=INIT_BITS;             /* Re-initialize for expansion */
  max_code = MAXVAL(num_bits);
  expand(lzw_file,output_file);     /* Call expansion routine */

  fclose(lzw_file);             /* Clean it all up */
  fclose(output_file);
  free(prefix_code);
  free(append_character);
}
/* MODIFIED This is the new compression routine. The first two 9-bit codes
* have been reserved for communication between the compressor and expander.
*/
compress(FILE *input, FILE *output)
{
  unsigned int next_code=FIRST_CODE;
  unsigned int character;
  unsigned int string_code;
  unsigned int index;
  int i,         /* All purpose integer */
  ratio_new,       /* New compression ratio as a percentage */
  ratio_old=100;   /* Original ratio at 100% */

  for (i=0;i<TABLE_SIZE;i++)   /* Initialize the string table first */
    code_value
=-1;
  printf("Compressing\n");
  string_code=getc(input);   /* Get the first code */

/* This is the main compression loop. Notice when the table is full we try
* to increment the code size. Only when num_bits == MAX_BITS and the code
* value table is full do we start to monitor the compression ratio.
*/
  while((character=getc(input)) != (unsigned)EOF) {
    if (!(++bytes_in % 1000)) {   /* Count input bytes and pacifier */
      putchar('.');
    }
    index=find_match(string_code,character);
    if (code_value[index] != -1)
      string_code=code_value[index];
    else {
      if (next_code <= max_code ) {
        code_value[index]=next_code++;
        prefix_code[index]=string_code;
        append_character[index]=character;
      }
      output_code(output,string_code);   /* Send out current code */
      string_code=character;
      if (next_code > max_code) {     /* Is table Full? */
        if ( num_bits < MAX_BITS) {   /* Any more bits? */
          putchar('+');
          max_code = MAXVAL(++num_bits); /* Increment code size then */
        }
        else if (bytes_in > checkpoint) {       /* At checkpoint? */
          if (num_bits == MAX_BITS ) {
          ratio_new = bytes_out*100/bytes_in; /* New compression ratio */
          if (ratio_new > ratio_old) {     /* Has ratio degraded? */
            output_code(output,CLEAR_TABLE); /* YES,flush string table */
            putchar('C');
            num_bits=INIT_BITS;
            next_code=FIRST_CODE;     /* Reset to FIRST_CODE */
            max_code = MAXVAL(num_bits); /* Re-Initialize this stuff */
            bytes_in = bytes_out = 0;
            ratio_old=100;           /* Reset compression ratio */
            for (i=0;i<TABLE_SIZE;i++)   /* Reset code value array */
                code_value
=-1;
          }
          else                     /* NO, then save new */
          ratio_old = ratio_new;         /* compression ratio */
        }
        checkpoint = bytes_in + CHECK_TIME;   /* Set new checkpoint */
        }
      }
    }
  }
  output_code(output,string_code);   /* Output the last code */
  if (next_code == max_code) {     /* Handles special case for bit */
    ++num_bits;               /* increment on EOF */
    putchar('+');
  }
  output_code(output,TERMINATOR);   /* Output the end of buffer code */
  output_code(output,0);         /* Flush the output buffer */
  output_code(output,0);
  output_code(output,0);
  putchar('\n');
}
/* UNCHANGED from original
* This is the hashing routine.
*/
find_match(int hash_prefix, unsigned int hash_character)
{
  int index, offset;

  index = (hash_character << HASHING_SHIFT ) ^ hash_prefix;
  if (index == 0 )
    offset=1;
  else
    offset = TABLE_SIZE - index;
  while(1) {
    if (code_value[index] == -1 )
      return(index);
    if (prefix_code[index] == hash_prefix &&
                        append_character[index] == hash_character)
      return(index);
    index -= offset;
    if (index < 0)
      index += TABLE_SIZE;
  }
}
/* MODIFIED This is the modified expansion routine. It must now check for the
* CLEAR_TABLE code and know when to increment the code size.
*/
expand(FILE *input, FILE *output)
{
  unsigned int next_code=FIRST_CODE;
  unsigned int new_code;
  unsigned int old_code;
  int character,
  counter=0,
  clear_flag=1;       /* Need to clear the code value array */
  unsigned char *string;
  char *decode_string(unsigned char *buffer, unsigned int code);

  printf("Expanding\n");

  while((new_code=input_code(input)) != TERMINATOR) {
    if (clear_flag) {     /* Initialize or Re-Initialize */
      clear_flag=0;
      old_code=new_code;   /* The next three lines have been moved */
      character=old_code; /* from the original */
      putc(old_code,output);
      continue;
    }
    if (new_code == CLEAR_TABLE) {   /* Clear string table */
      clear_flag=1;
      num_bits=INIT_BITS;
      next_code=FIRST_CODE;
      putchar('C');
      max_code = MAXVAL(num_bits);
      continue;
    }
    if (++counter == 1000) {       /* Pacifier */
      counter=0;
      putchar('.');
    }
    if (new_code >= next_code) {     /* Check for string+char+string */
      *decode_stack=character;
      string=decode_string(decode_stack+1,old_code);
    }
    else
      string=decode_string(decode_stack,new_code);

    character = *string;         /* Output decoded string in reverse */
    while (string >= decode_stack)
      putc(*string--,output);

    if (next_code <= max_code) {     /* Add to string table if not full */
      prefix_code[next_code]=old_code;
      append_character[next_code++]=character;
      if (next_code == max_code && num_bits < MAX_BITS) {
        putchar('+');
        max_code = MAXVAL(++num_bits);
      }
    }
    old_code=new_code;
  }
  putchar('\n');
}
/* UNCHANGED from original
* Decode a string from the string table, storing it in a buffer.
* The buffer can then be output in reverse order by the expansion
* program.
*/
char *decode_string(unsigned char *buffer, unsigned int code)
{
  int i=0;

  while(code > 255 ) {
    *buffer++ = append_character[code];
    code=prefix_code[code];
    if (i++ >= 4000 ) {
      printf("Error during code expansion\n");
      exit(1);
    }
  }
  *buffer=code;
  return(buffer);
}

/* UNCHANGED from original
* Input a variable length code.
*/
unsigned input_code(FILE *input)
{
  unsigned int return_value;
  static int input_bit_count=0;
  static unsigned long input_bit_buffer=0L;

  while (input_bit_count <= 24 ) {
  input_bit_buffer |= (unsigned long) getc(input) << (24 - input_bit_count);
  input_bit_count += 8;
  }
  return_value=input_bit_buffer >> (32-num_bits);
  input_bit_buffer <<= num_bits;
  input_bit_count -= num_bits;
  return(return_value);
}
/* MODIFIED Output a variable length code.
*/
output_code(FILE *output, unsigned int code)
{
  static int output_bit_count=0;
  static unsigned long output_bit_buffer=0L;

  output_bit_buffer |= (unsigned long) code << (32 - num_bits -
                                        output_bit_count);
  output_bit_count += num_bits;
  while (output_bit_count >= 8) {
    putc(output_bit_buffer >> 24, output);
    output_bit_buffer <<= 8;
    output_bit_count -= 8;
    bytes_out++;             /* ADDED for compression monitoring */
  }
}

原文地址:https://www.cnblogs.com/think/p/372947.html