程序由五个模块组成。

(1)  lzw.h      定义了一些基本的数据结构,常量,还有变量的初始化等。

#ifndef __LZW_H__
#define __LZW_H__
//------------------------------------------------------------------------------
#include <stdio.h>
#include <stdlib.h>
#include <windows.h>
#include <memory.h>
//------------------------------------------------------------------------------
#define LZW_BASE    0x102//  The code base
#define CODE_LEN       12   //  Max code length
#define TABLE_LEN      4099 // It must be prime number and bigger than 2^CODE_LEN=4096.
       // Such as 5051 is also ok.
#define BUFFERSIZE     1024
//------------------------------------------------------------------------------
typedef struct
{
    HANDLE      h_sour;  // Source file handle.
    HANDLE      h_dest;  // Destination file handle.
    
    HANDLE      h_suffix; // Suffix table handle.
    HANDLE      h_prefix; // Prefix table handle.
    HANDLE      h_code;  // Code table handle.
    
    LPWORD      lp_prefix; // Prefix table head pointer.
    LPBYTE      lp_suffix; // Suffix table head pointer.
    LPWORD      lp_code; // Code table head pointer.

    WORD        code;
    WORD        prefix;
    BYTE        suffix;

    BYTE        cur_code_len; // Current code length.[ used in Dynamic-Code-Length mode ]

}LZW_DATA,*PLZW_DATA;


typedef struct
{
    WORD        top;
    WORD        index;

    LPBYTE      lp_buffer;
    HANDLE      h_buffer;
    
    BYTE        by_left;
    DWORD       dw_buffer;

    BOOL        end_flag;

}BUFFER_DATA,*PBUFFER_DATA;


typedef struct                  //Stack used in decode
{
WORD        index;
HANDLE      h_stack;
LPBYTE      lp_stack;

}STACK_DATA,*PSTACK_DATA;
//------------------------------------------------------------------------------
VOID stack_create( PSTACK_DATA stack )
{
stack->h_stack  = GlobalAlloc( GHND , TABLE_LEN*sizeof(BYTE) );
stack->lp_stack = GlobalLock( stack->h_stack );
stack->index = 0;
}
//------------------------------------------------------------------------------
VOID stack_destory( PSTACK_DATA stack )
{
GlobalUnlock( stack->h_stack );
    GlobalFree  ( stack->h_stack );
}
//------------------------------------------------------------------------------
VOID buffer_create( PBUFFER_DATA    buffer )
{
    buffer->h_buffer   = GlobalAlloc(  GHND,  BUFFERSIZE*sizeof(BYTE)  );
    buffer->lp_buffer  = GlobalLock( buffer->h_buffer );
    buffer->top        = 0;
    buffer->index      = 0;
    buffer->by_left    = 0;
    buffer->dw_buffer  = 0;
    buffer->end_flag   = FALSE;
}
//------------------------------------------------------------------------------
VOID buffer_destory( PBUFFER_DATA   buffer )
{
    GlobalUnlock( buffer->h_buffer );
    GlobalFree  ( buffer->h_buffer );
}
//------------------------------------------------------------------------------
VOID re_init_lzw( PLZW_DATA lzw )    //When code table reached its top it should
{                                    //be reinitialized.      
    memset( lzw->lp_code, 0xFFFF, TABLE_LEN*sizeof(WORD) );
    lzw->code          = LZW_BASE;
    lzw->cur_code_len  = 9;
}
//------------------------------------------------------------------------------
VOID lzw_create(PLZW_DATA    lzw,    HANDLE h_sour,    HANDLE h_dest)
{
WORD i;
    lzw->h_code        = GlobalAlloc( GHND, TABLE_LEN*sizeof(WORD) );
    lzw->h_prefix      = GlobalAlloc( GHND, TABLE_LEN*sizeof(WORD) );
    lzw->h_suffix      = GlobalAlloc( GHND, TABLE_LEN*sizeof(BYTE) );
    lzw->lp_code       = GlobalLock( lzw->h_code   );
    lzw->lp_prefix     = GlobalLock( lzw->h_prefix );
    lzw->lp_suffix     = GlobalLock( lzw->h_suffix );
    lzw->code          = LZW_BASE;
    lzw->cur_code_len  = 9;
    lzw->h_sour        = h_sour;
    lzw->h_dest        = h_dest;
    memset( lzw->lp_code, 0xFFFF, TABLE_LEN*sizeof(WORD) );

}
//------------------------------------------------------------------------------
VOID lzw_destory(PLZW_DATA    lzw)
{  
    GlobalUnlock( lzw->h_code   );
    GlobalUnlock( lzw->h_prefix );
    GlobalUnlock( lzw->h_suffix );

GlobalFree( lzw->h_code  );
    GlobalFree( lzw->h_prefix );
    GlobalFree( lzw->h_suffix );    
}
//------------------------------------------------------------------------------
#endif

(2) fileio.h   定义了一些文件操作

#ifndef __FILEIO_H__
#define __FILEIO_H__
//------------------------------------------------------------------------------
#include <stdio.h>
#include <stdlib.h>
#include <windows.h>
//------------------------------------------------------------------------------
HANDLE  file_handle(CHAR* file_name)
{
    HANDLE h_file;
    h_file = CreateFile(file_name,
                       GENERIC_READ|GENERIC_WRITE,
                       FILE_SHARE_READ|FILE_SHARE_WRITE,
                       NULL,
                       OPEN_ALWAYS,
                       0,
                       NULL
                       );
    return h_file;
}
//------------------------------------------------------------------------------
WORD load_buffer(HANDLE h_sour, PBUFFER_DATA buffer)  // Load file to buffer
{
    DWORD ret;
    ReadFile(h_sour,buffer->lp_buffer,BUFFERSIZE,&ret,NULL);
    buffer->index = 0;
    buffer->top = (WORD)ret;
    return (WORD)ret;
}
//------------------------------------------------------------------------------
WORD empty_buffer( PLZW_DATA lzw, PBUFFER_DATA buffer)// Output buffer to file
{
   
    DWORD ret;
    if(buffer->end_flag) // The flag mark the end of decode
{
  if( buffer->by_left )
  {
   buffer->lp_buffer[ buffer->index++ ] = (BYTE)( buffer->dw_buffer >> 32-buffer->by_left )<<(8-buffer->by_left);
  }
}
WriteFile(lzw->h_dest, buffer->lp_buffer,buffer->index,&ret,NULL);
    buffer->index = 0;
    buffer->top = ret;
    return (WORD)ret;
}
//------------------------------------------------------------------------------
#endif

(3) hash.h  定义了压缩时所用的码表操作函数,为了快速查找使用了hash算法,还有处理hash冲突的函数

#ifndef __HASH_H__
#define __HASH_H__
//------------------------------------------------------------------------------
#include <stdio.h>
#include <stdlib.h>
#include <windows.h>
//------------------------------------------------------------------------------
#define   DIV       TABLE_LEN
#define   HASHSTEP  13         // It should bigger than 0.
//------------------------------------------------------------------------------
WORD get_hash_index( PLZW_DATA lzw )
{
    DWORD tmp;
    WORD result;
    DWORD prefix;
    DWORD suffix;
    prefix = lzw->prefix;
    suffix = lzw->suffix;
    tmp = prefix<<8 | suffix;
    result = tmp % DIV;
    return result;
}
//------------------------------------------------------------------------------
WORD re_hash_index( WORD hash ) // If hash conflict occured we must recalculate
{                               // hash index .
    WORD result;
    result = hash + HASHSTEP;
    result = result % DIV;
    return result;
}
//------------------------------------------------------------------------------
BOOL in_table( PLZW_DATA lzw ) // To find whether current code is already in table.
{
    BOOL result;
    WORD hash;

    hash = get_hash_index( lzw );
    if( lzw->lp_code[ hash ] == 0xFFFF )
    {
        result = FALSE;    
    }
    else
    {
        if( lzw->lp_prefix[ hash ] == lzw->prefix &&
            lzw->lp_suffix[ hash ] == lzw->suffix )
        {
            result = TRUE;
        }
        else
        {
            result = FALSE;
            while( lzw->lp_code[ hash ] != 0xFFFF )
            {
                if( lzw->lp_prefix[ hash ] == lzw->prefix &&
                    lzw->lp_suffix[ hash ] == lzw->suffix )
                {
                        result = TRUE;
                        break;    
                }
                hash = re_hash_index( hash );
            }
        }
    }
    return result;
}
//------------------------------------------------------------------------------
WORD get_code( PLZW_DATA lzw )
{
    WORD hash;
    WORD code;
    hash = get_hash_index( lzw );
    if( lzw->lp_prefix[ hash ] == lzw->prefix &&
        lzw->lp_suffix[ hash ] == lzw->suffix )
    {
        code = lzw->lp_code[ hash ];
    }
    else
    {
        while( lzw->lp_prefix[ hash ] != lzw->prefix ||
               lzw->lp_suffix[ hash ] != lzw->suffix )
        {
                hash = re_hash_index( hash );    
        }
        code = lzw->lp_code[ hash ];
    }
    return code;
}
//------------------------------------------------------------------------------
VOID insert_table( PLZW_DATA lzw )
{

    WORD hash;
    hash = get_hash_index( lzw );
    if( lzw->lp_code[ hash ] == 0xFFFF )
    {
        lzw->lp_prefix[ hash ] = lzw->prefix;
        lzw->lp_suffix[ hash ] = lzw->suffix;
        lzw->lp_code[ hash ]   = lzw->code;
    }
    else
    {
        while( lzw->lp_code[ hash ] != 0xFFFF )
        {
                hash = re_hash_index( hash );    
        }
        lzw->lp_prefix[ hash ] = lzw->prefix;
        lzw->lp_suffix[ hash ] = lzw->suffix;
        lzw->lp_code[ hash ]   = lzw->code;
    }

}
//------------------------------------------------------------------------------


#endif

(4) encode.h  压缩程序主函数

#ifndef __ENCODE_H__
#define __ENCODE_H__
//------------------------------------------------------------------------------
#include <stdio.h>
#include <stdlib.h>
#include <windows.h>

//------------------------------------------------------------------------------
VOID output_code( DWORD code ,PBUFFER_DATA out, PLZW_DATA lzw)
{
    out->dw_buffer |= code << ( 32 - out->by_left - lzw->cur_code_len );
    out->by_left += lzw->cur_code_len;

    while( out->by_left >= 8 )
    {
        if( out->index == BUFFERSIZE )
        {
            empty_buffer( lzw,out);
        }

        out->lp_buffer[ out->index++ ] = (BYTE)( out->dw_buffer >> 24 );
        out->dw_buffer <<= 8;
        out->by_left -= 8;
    }
}
//------------------------------------------------------------------------------
VOID do_encode( PBUFFER_DATA in, PBUFFER_DATA out, PLZW_DATA lzw)
{
    WORD prefix;
    while( in->index != in->top )
    {
        if( !in_table(lzw) )
        {
                // current code not in code table
                // then add it to table and output prefix


                insert_table(lzw);
                prefix = lzw->suffix;
                output_code( lzw->prefix ,out ,lzw );
                lzw->code++;

                if( lzw->code == (WORD)1<< lzw->cur_code_len )
                {
                    // code reached current code top(1<<cur_code_len)
                    // then current code length add one
     lzw->cur_code_len++;
     if( lzw->cur_code_len == CODE_LEN + 1 )
     {
      re_init_lzw( lzw );
     }

                }
        }
        else
        {
                // current code already in code table
                // then output nothing
                prefix = get_code(lzw);

        }
        lzw->prefix = prefix;
        lzw->suffix = in->lp_buffer[ in->index++ ];
    }
}

//------------------------------------------------------------------------------
VOID encode(HANDLE h_sour,HANDLE h_dest)
{
    LZW_DATA        lzw;
    BUFFER_DATA     in ;
    BUFFER_DATA     out;
    
    BOOL first_run = TRUE;

    lzw_create( &lzw ,h_sour,h_dest );
    buffer_create( &in );
    buffer_create( &out );


    while( load_buffer( h_sour, &in ) )
    {
        if( first_run )
        {// File length should be considered  but here we simply
         // believe file length bigger than 2 bytes.
                lzw.prefix = in.lp_buffer[ in.index++ ];
                lzw.suffix = in.lp_buffer[ in.index++ ];
                first_run = FALSE;
        }
        do_encode(&in , &out, &lzw);
    }
    
output_code(lzw.prefix, &out , &lzw);
output_code(lzw.suffix, &out , &lzw);
out.end_flag = TRUE;
    empty_buffer( &lzw,&out);

    lzw_destory( &lzw );
    buffer_destory( &in );
    buffer_destory( &out );
}

//------------------------------------------------------------------------------

#endif

(5) decode.h  解压函数主函数

#ifndef __DECODE_H__
#define __DECODE_H__
//------------------------------------------------------------------------------
#include <stdio.h>
#include <stdlib.h>
#include <windows.h>
//------------------------------------------------------------------------------
VOID out_code( WORD code ,PBUFFER_DATA buffer,PLZW_DATA lzw,PSTACK_DATA stack)
{
WORD tmp;
if( code < 0x100 )
{
  stack->lp_stack[ stack->index++ ] = code;
}
else
{
   stack->lp_stack[ stack->index++ ] = lzw->lp_suffix[ code ];
   tmp = lzw->lp_prefix[ code ];
   while( tmp > 0x100 )
   {
    stack->lp_stack[ stack->index++ ] = lzw->lp_suffix[ tmp ];
    tmp = lzw->lp_prefix[ tmp ];
   }
   stack->lp_stack[ stack->index++ ] = (BYTE)tmp;

}


while( stack->index )
{
  if( buffer->index == BUFFERSIZE )
  {
   empty_buffer(lzw,buffer);
  }
  buffer->lp_buffer[ buffer->index++ ] = stack->lp_stack[ --stack->index ] ;
}
}
//------------------------------------------------------------------------------
VOID insert_2_table(PLZW_DATA lzw )
{

lzw->lp_code[ lzw->code ]   = lzw->code;
lzw->lp_prefix[ lzw->code ] = lzw->prefix;
lzw->lp_suffix[ lzw->code ] = lzw->suffix;
lzw->code++;

if( lzw->code == ((WORD)1<<lzw->cur_code_len)-1 )
{
  lzw->cur_code_len++;
  if( lzw->cur_code_len == CODE_LEN+1 )
      lzw->cur_code_len = 9;
}
if(lzw->code >= 1<<CODE_LEN )
{
  re_init_lzw(lzw);
}

}
//------------------------------------------------------------------------------
WORD get_next_code( PBUFFER_DATA buffer , PLZW_DATA lzw )
{

BYTE next;
WORD code;
while( buffer->by_left < lzw->cur_code_len )
{
  if( buffer->index == BUFFERSIZE )
  {
   load_buffer( lzw->h_sour, buffer );
  }
  next = buffer->lp_buffer[ buffer->index++ ];
  buffer->dw_buffer |= (DWORD)next << (24-buffer->by_left);
  buffer->by_left   += 8;
}
code = buffer->dw_buffer >> ( 32 - lzw->cur_code_len );
buffer->dw_buffer <<= lzw->cur_code_len;
buffer->by_left    -= lzw->cur_code_len;

return code;
}
//------------------------------------------------------------------------------
VOID do_decode( PBUFFER_DATA in, PBUFFER_DATA out, PLZW_DATA lzw, PSTACK_DATA stack)
{
WORD code;
WORD tmp;
while( in->index != in->top  )
{
  code = get_next_code( in ,lzw );

  if( code < 0x100 )
  {
   // code already in table
   // then simply output the code
   lzw->suffix = (BYTE)code;
  }
  else
  {
   if( code < lzw->code  )
   {
    // code also in table
    // then output code chain
    
    tmp = lzw->lp_prefix[ code ];
    while( tmp > 0x100 )
    {
     tmp = lzw->lp_prefix[ tmp ];
    }
    lzw->suffix = (BYTE)tmp;
   }
   else
   {
    // code == lzw->code
    // code not in table
    // add code into table
    // and out put code
    tmp = lzw->prefix;
    while( tmp > 0x100 )
    {
     tmp = lzw->lp_prefix[ tmp ];
    }
    lzw->suffix = (BYTE)tmp;
   }
  }
  insert_2_table( lzw );
  out_code(code,out,lzw,stack);

  lzw->prefix = code;

}

}
//------------------------------------------------------------------------------
VOID decode( HANDLE h_sour, HANDLE h_dest )
{
    LZW_DATA        lzw;
    BUFFER_DATA     in ;
    BUFFER_DATA     out;
STACK_DATA      stack;
BOOL   first_run;

first_run = TRUE;


    lzw_create( &lzw ,h_sour,h_dest );
    buffer_create( &in );
    buffer_create( &out );
stack_create(&stack );

    while( load_buffer( h_sour, &in ) )
    {
  if( first_run )
  {
   lzw.prefix = get_next_code( &in, &lzw );
   lzw.suffix = lzw.prefix;
   out_code(lzw.prefix, &out, &lzw , &stack);
   first_run = FALSE;
  }
        do_decode(&in , &out, &lzw, &stack);
    }

    empty_buffer( &lzw,&out);

    lzw_destory( &lzw );
    buffer_destory( &in );
    buffer_destory( &out );
stack_destory( &stack);
}

#endif

2  下面给出一个应用上面模块的简单例子

#include <stdio.h>
#include <stdlib.h>
//------------------------------------------------------------------------------

#include "lzw.h"
#include "hash.h"
#include "fileio.h"
#include "encode.h"
#include "decode.h"

//------------------------------------------------------------------------------
HANDLE h_file_sour;  
HANDLE h_file_dest;
HANDLE h_file;
CHAR*  file_name_in = "d:\\code.c";
CHAR*  file_name_out= "d:\\encode.e";
CHAR*  file_name    = "d:\\decode.d";


//------------------------------------------------------------------------------
int main(int argc, char *argv[])
{
    h_file_sour = file_handle(file_name_in);
    h_file_dest = file_handle(file_name_out);
    h_file     = file_handle(file_name);


  encode(h_file_sour, h_file_dest);  
// decode(h_file_dest,h_file);


    CloseHandle(h_file_sour);
    CloseHandle(h_file_dest);  
    CloseHandle(h_file);

  return 0;      
}    

3  后语

  之前研究gif文件格式时偶然接触了lzw压缩算法,于是就想自己动手实现。从一开始看人家的原码,然后跟着模仿,到现在用自己的语言表达出来,从理解原理到代码的实现花费了不少时间与精力,但是真正的快乐也就在这里,现在把她拿出来跟大家分享也就是分享快乐。