从 libtiff 4.0.2 中提取出来并稍加修改的 lzw 的代码,符合 TIFF6 标准中的 LZW 部分。
本人目前对开源协议还不太清楚,不知是否存在侵权问题,如果有,请告知。
代码:
1 /*
2 codec_lzw.h
3
4 LZW 编码解码。
5 基于 TIFF6 标准,不考虑与之前 TIFF 版本兼容。
6 以字节为字符单位进行编码。
7 */
8
9
10 #ifndef __CODEC_LZW_H_INCLUDED__
11 #define __CODEC_LZW_H_INCLUDED__
12
13
14 #define MAXCODE(n) ((1L<<(n))-1)
15
16 #define BITS_MIN 9
17 #define BITS_MAX 12
18
19 #define CODE_CLEAR 256
20 #define CODE_EOI 257
21 #define CODE_FIRST 258
22 #define CODE_MAX MAXCODE(BITS_MAX)
23
24
25 /*
26 编码函数
27
28 输入参数 input :输入数据,即待编码数据
29 输入参数 inputSize :输入数据量,单位 字节
30 输出参数 output :输出缓冲,编码所得数据置于其中
31 输出数据以 CODE_CLEAR 开始
32 输出数据以 CODE_EOI 结束
33 若输出缓冲太小,则编码所得数据将其填满
34 输入参数 outputLimit : 输出缓冲大小,单位 字节
35 输出参数 *outputSize :编码输出数据量,单位 字节
36 若输出缓冲太小,则返回实际完成编码可得到的数据量
37
38 返回值:
39 非负整数,编码成功,其值表示编码所得数据量,与 (*outputSize) 相同
40 -1 表示出错失败
41 -2 表示输出缓冲太小
42 */
43 int LZWencode( const unsigned char *input, int inputSize,
44 unsigned char *output, int outputLimit,
45 int *outputSize );
46
47 /*
48 解码函数
49
50 输入参数 input :输入数据,即待解码数据
51 认为输入数据以 CODE_CLEAR 开始
52 认为输入数据以 CODE_EOI 结束
53 输入参数 inputSize :输入数据量,单位 字节
54 输出参数 output :输出缓冲,解码所得数据置于其中
55 若输出缓冲太小,则解码所得数据将其填满
56 输入参数 outputLimit : 输出缓冲大小,单位 字节
57 输出参数 *outputSize :解码输出数据量,单位 字节
58 若输出缓冲太小,则返回实际完成解码可得到的数据量
59
60 返回值:
61 非负整数,解码成功,其值表示解码所得数据量,与 (*outputSize) 相同
62 -1 表示出错失败
63 -2 表示输出缓冲太小
64 */
65 int LZWdecode( const unsigned char *input, int inputSize,
66 unsigned char *output, int outputLimit,
67 int *outputSize );
68
69
70 #endif /* __CODEC_LZW_H_INCLUDED__ */
71
1 /*
2 codec_lzw.c
3
4 LZW 编码解码。
5 */
6
7
8 #include "codec_lzw.h"
9
10 #include <stdlib.h>
11 #include <string.h>
12
13
14 #define HSIZE 9001L /* 91% occupancy */
15 #define HSHIFT (13-8)
16 #define CSIZE (MAXCODE(BITS_MAX)+1L)
17 #define CHECK_GAP 10000
18
19 typedef unsigned short hcode_t; /* codes fit in 16 bits */
20 typedef struct {
21 int hash;
22 hcode_t code;
23 } hash_t;
24
25 typedef struct code_ent {
26 struct code_ent *next;
27 unsigned short length; /* string len, including this token */
28 unsigned char value; /* data value */
29 unsigned char firstchar; /* first token of string */
30 } code_t;
31
32 /*
33 * Reset encoding hash table.
34 */
35 static void cl_hash( hash_t *hashtab ) {
36 hash_t *hp = &hashtab[HSIZE-1];
37 int i = HSIZE-8;
38 do {
39 i -= 8;
40 hp[-7].hash = -1;
41 hp[-6].hash = -1;
42 hp[-5].hash = -1;
43 hp[-4].hash = -1;
44 hp[-3].hash = -1;
45 hp[-2].hash = -1;
46 hp[-1].hash = -1;
47 hp[ 0].hash = -1;
48 hp -= 8;
49 } while (i >= 0);
50 for (i += 8; i > 0; i--, hp--)
51 hp->hash = -1;
52 }
53
54
55 int LZWencode( const unsigned char *input, int inputSize,
56 unsigned char *output, int outputLimit,
57 int *outputSize ) {
58 int fcode;
59 hash_t *hp;
60 int h, c;
61 hcode_t ent = (hcode_t)(-1);
62 int disp;
63 int incount = 0;
64 int outcount = 0;
65 int checkpoint = CHECK_GAP;
66 int nextdata = 0;
67 int nextbits = 0;
68 int free_ent = CODE_FIRST;
69 int maxcode = MAXCODE(BITS_MIN);
70 int nbits = BITS_MIN;
71 int enc_ratio = 0;
72 const unsigned char *bp= input;
73 int cc = inputSize;
74 unsigned char *op = output;
75 unsigned char *limit = output + outputLimit;
76 hash_t *hashtab;
77
78 if ( (NULL == input) || (0 > inputSize) ||
79 (NULL == output) || (1 > outputLimit) ) {
80 return (-1);
81 }
82
83 hashtab = (hash_t*)malloc(HSIZE * sizeof(hash_t));
84 if ( hashtab == NULL ) {
85 return (-1);
86 }
87 cl_hash( hashtab );
88
89 #define CALCRATIO(rat) { \
90 if (incount > 0x007fffff) { /* NB: shift will overflow */ \
91 rat = outcount >> 8; \
92 rat = (rat == 0 ? 0x7fffffff : incount/rat); \
93 } else \
94 rat = (incount<<8) / outcount; \
95 }
96
97 #define PutNextCode(op, c) { \
98 nextdata = (nextdata << nbits) | c; \
99 nextbits += nbits; \
100 if (op < limit) \
101 *op = (unsigned char)(nextdata >> (nextbits-8)); \
102 op = op + 1; \
103 nextbits -= 8; \
104 if (nextbits >= 8) { \
105 if (op < limit) \
106 *op = (unsigned char)(nextdata >> (nextbits-8));\
107 op = op + 1; \
108 nextbits -= 8; \
109 } \
110 outcount += nbits; \
111 }
112
113 PutNextCode(op, CODE_CLEAR);
114 if (cc > 0) {
115 ent = *bp++; cc--; incount++;
116 }
117 while (cc > 0) {
118 c = *bp++; cc--; incount++;
119 fcode = ((int)c << BITS_MAX) + ent;
120 h = (c << HSHIFT) ^ ent; /* xor hashing */
121 /*
122 * Check hash index for an overflow.
123 */
124 if (h >= HSIZE)
125 h -= HSIZE;
126
127 hp = &hashtab[h];
128 if (hp->hash == fcode) {
129 ent = hp->code;
130 continue;
131 }
132 if (hp->hash >= 0) {
133 /*
134 * Primary hash failed, check secondary hash.
135 */
136 disp = HSIZE - h;
137 if (h == 0)
138 disp = 1;
139 do {
140 /*
141 * Avoid pointer arithmetic 'cuz of
142 * wraparound problems with segments.
143 */
144 if ((h -= disp) < 0)
145 h += HSIZE;
146 hp = &hashtab[h];
147 if (hp->hash == fcode) {
148 ent = hp->code;
149 goto hit;
150 }
151 } while (hp->hash >= 0);
152 }
153 /*
154 * New entry, emit code and add to table.
155 */
156 PutNextCode(op, ent);
157 ent = c;
158 hp->code = free_ent++;
159 hp->hash = fcode;
160 if (free_ent == CODE_MAX-1) {
161 /* table is full, emit clear code and reset */
162 cl_hash(hashtab);
163 enc_ratio = 0;
164 incount = 0;
165 outcount = 0;
166 free_ent = CODE_FIRST;
167 PutNextCode(op, CODE_CLEAR);
168 nbits = BITS_MIN;
169 maxcode = MAXCODE(BITS_MIN);
170 } else {
171 /*
172 * If the next entry is going to be too big for
173 * the code size, then increase it, if possible.
174 */
175 if (free_ent > maxcode) {
176 nbits++;
177 maxcode = (int) MAXCODE(nbits);
178 } else if (incount >= checkpoint) {
179 int rat;
180 /*
181 * Check compression ratio and, if things seem
182 * to be slipping, clear the hash table and
183 * reset state. The compression ratio is a
184 * 24+8-bit fractional number.
185 */
186 checkpoint = incount + CHECK_GAP;
187 CALCRATIO(rat);
188 if (rat <= enc_ratio) {
189 cl_hash(hashtab);
190 enc_ratio = 0;
191 incount = 0;
192 outcount = 0;
193 free_ent = CODE_FIRST;
194 PutNextCode(op, CODE_CLEAR);
195 nbits = BITS_MIN;
196 maxcode = MAXCODE(BITS_MIN);
197 } else
198 enc_ratio = rat;
199 }
200 }
201 hit:
202 ;
203 }
204 if (ent != (hcode_t) -1) {
205 PutNextCode(op, ent);
206 }
207 PutNextCode(op, CODE_EOI);
208 if (nextbits > 0) {
209 if (op < limit)
210 *op = (unsigned char)(nextdata << (8-nextbits));
211 op = op + 1;
212 }
213
214 if ( outputSize != NULL ) {
215 *outputSize = op - output;
216 }
217
218 return ((op <= limit) ? (op - output) : (-2));
219 }
220
221
222
223
224 int LZWdecode( const unsigned char *input, int inputSize,
225 unsigned char *output, int outputLimit,
226 int *outputSize ) {
227 const unsigned char *bp = input;
228 unsigned char *op = output;
229 int occ = outputLimit;
230 int cc = inputSize;
231
232 unsigned char *tp;
233 hcode_t code;
234 int len;
235 int i;
236
237 int nbits = BITS_MIN;
238 int nextbits = 0;
239 int nextdata = 0;
240 int nbitsmask = MAXCODE(BITS_MIN);
241
242 code_t *codep;
243 code_t *free_entp;
244 code_t *maxcodep;
245 code_t *oldcodep;
246 code_t *codetab;
247
248 if ( (NULL == input) || (1 > inputSize) ||
249 (NULL == output) || (1 > outputLimit) ) {
250 return (-1);
251 }
252
253 codetab = (code_t*)malloc(CSIZE * sizeof (code_t));
254 if ( NULL == codetab ) {
255 return (-1);
256 }
257
258 i = 255;
259 do {
260 codetab[i].value = i;
261 codetab[i].firstchar = i;
262 codetab[i].length = 1;
263 codetab[i].next = NULL;
264 } while (i--);
265 memset(&codetab[CODE_CLEAR], 0,
266 (CODE_FIRST - CODE_CLEAR) * sizeof (code_t));
267
268 #define DecFail do { free( codetab ); return (-1); } while (0)
269
270 #define GetNextCode(bp, code) { \
271 if (cc <= 0) DecFail; \
272 nextdata = (nextdata<<8) | *(bp)++; \
273 nextbits += 8; \
274 --cc; \
275 if (nextbits < nbits) { \
276 if (cc <= 0) DecFail; \
277 nextdata = (nextdata<<8) | *(bp)++; \
278 nextbits += 8; \
279 --cc; \
280 } \
281 code = (hcode_t)((nextdata >> (nextbits-nbits)) & nbitsmask); \
282 nextbits -= nbits; \
283 }
284
285 GetNextCode(bp, code);
286 if (code != CODE_CLEAR) {
287 DecFail;
288 }
289 while (code != CODE_EOI) {
290 if (code == CODE_CLEAR) {
291 free_entp = codetab + CODE_FIRST;
292 memset(free_entp, 0,
293 (CSIZE - CODE_FIRST) * sizeof (code_t));
294 nbits = BITS_MIN;
295 nbitsmask = MAXCODE(BITS_MIN);
296 maxcodep = codetab + nbitsmask - 1;
297 GetNextCode(bp, code);
298 if (code == CODE_EOI)
299 break;
300 if (code >= CODE_CLEAR) {
301 DecFail;
302 }
303 if ( occ-- > 0 ) {
304 *op++ = (unsigned char)code;
305 }
306 oldcodep = codetab + code;
307 GetNextCode(bp, code);
308 continue;
309 }
310
311 codep = codetab + code;
312
313 if (free_entp < &codetab[0] ||
314 free_entp >= &codetab[CSIZE]) {
315 DecFail;
316 }
317
318 free_entp->next = oldcodep;
319 if (free_entp->next < &codetab[0] ||
320 free_entp->next >= &codetab[CSIZE]) {
321 DecFail;
322 }
323
324 free_entp->firstchar = free_entp->next->firstchar;
325 free_entp->length = free_entp->next->length+1;
326 free_entp->value = (codep < free_entp) ?
327 codep->firstchar :
328 free_entp->firstchar;
329 if (++free_entp > maxcodep) {
330 if (++nbits > BITS_MAX) /* should not happen */
331 DecFail;
332 nbitsmask = MAXCODE(nbits);
333 maxcodep = codetab + nbitsmask - 1;
334 }
335 oldcodep = codep;
336
337 if (code >= 256) {
338 if(codep->length == 0) {
339 DecFail;
340 }
341 i = len = codep->length;
342 while ( (len > 0) && (len > occ) && codep ) {
343 codep = codep->next;
344 --len;
345 }
346 tp = op + len;
347 while ( codep && (tp > op) ) {
348 *(--tp) = codep->value;
349 codep = codep->next;
350 }
351 op += len;
352 occ -= i;
353 }
354 else if ( occ-- > 0 ) {
355 *op++ = (unsigned char)code;
356 }
357
358 GetNextCode(bp, code);
359 }
360
361 free( codetab );
362 if ( outputSize != NULL ) {
363 *outputSize = outputLimit - occ;
364 }
365 return ((occ >= 0) ? (outputLimit - occ) : (-2));
366 }
367
测试程序:
1 /*
2 测试 LZW 编码解码,
3 并未做到 100% 测试。
4 */
5
6 #include <stdio.h>
7 #include <string.h>
8 #include <time.h>
9 #include <stdlib.h>
10
11 #include "codec_lzw.h"
12
13 /* 测试字符串最大长度,确保使字典超出大小 */
14 #define L (4096+4096)
15
16 char a[ L + L ];
17 int na;
18
19 char b[ L + L ];
20 int nb;
21
22 int res;
23
24 /* 小数据 */
25 char *data[] =
26 {
27 "", /* 空串 */
28 "a",
29 "aaa",
30 "abcdefgabcdefgabcdefg",
31 "wabba_wabba_wabba_wabba_woo_woo_woo",
32 "abababababababab", /* 解码时使用未构造好的字典条目 */
33 };
34
35 /* 使字典超出大小的数据 */
36 #define DAL 5
37 char da[ L + 3 ];
38 void initDa( int i ) {
39 int j;
40 switch ( i ) {
41 case 0 :
42 for ( j = 0; j < L; ++j ) {
43 da[ j ] = 'a';
44 }
45 break;
46 case 1 :
47 for ( j = 0; j < L; ++j ) {
48 da[ j ] = ((j & 1) ? 'b' : 'a' );
49 }
50 break;
51 case 2 :
52 for ( j = 0; j < L; ++j ) {
53 da[ j ] = (rand() % 26) + 'a';
54 }
55 break;
56 case 3 :
57 for ( j = 0; j < L; ++j ) {
58 da[ j ] = (j % 26) + 'a';
59 }
60 break;
61 case 4 :
62 for ( j = 0; j < L; ++j ) {
63 da[ j ] = rand() & 0xFF;
64 }
65 break;
66 }
67 }
68
69 char* tobin( char d ) {
70 static char tmp[ 512 ];
71 int i;
72 for ( i = 7; i >= 0; --i ) {
73 tmp[ 7 - i ] = '0' + ((d >> i) & 0x1);
74 }
75 tmp[ 8 ] = '\0';
76 return tmp;
77 }
78
79
80 int main() {
81 printf( "\n ---- test ---- \n" );
82 int tc = sizeof(data) / sizeof(data[0]);
83 for ( tc = 0; tc < sizeof(data) / sizeof(data[0]); ++tc ) {
84 int i;
85 printf( "\n*********************\ntest case %d:\n", tc );
86 printf( "text : \n\"%s\"\n", data[tc] );
87 res = LZWencode( data[tc], strlen(data[tc]), a, L, &na );
88 printf( "encode : \nres = %d, len = %d\n", res, na );
89 for ( i = 0; i < na; ++i ) {
90 printf( "%s ", tobin(a[i]) );
91 }
92 res = LZWdecode( a, na, b, L, &nb );
93 printf( "\ndecode : \nres = %d, len = %d\n\"", res, nb );
94 for ( i = 0; i < nb; ++i ) {
95 printf( "%c", b[i] );
96 }
97 printf( "\"\n" );
98 }
99
100 printf( "\n\n ---- huge ---- \n" );
101 srand( time(NULL) );
102 for ( tc = 0; tc < DAL; ++tc ) {
103 printf( "\n*********************\nhuge case %d:\n", tc );
104 initDa( tc );
105 res = LZWencode( da, L, a, L+L, &na );
106 printf( "encode : \nres = %d, len = %d\n", res, na );
107 res = LZWdecode( a, na, b, L+L, &nb );
108 printf( "decode : \nres = %d, len = %d\n", res, nb );
109 printf( (0 == memcmp(da, b, nb)) ? "yes\n" : " no !!!!\n" );
110 }
111
112
113 printf( "\n\n ---- error ---- \n" );
114 for ( tc = 0; tc < DAL; ++tc ) {
115 printf( "\n*********************\nerror case %d:\n", tc );
116 initDa( tc );
117 res = LZWencode( da, L, a, L/3, &na );
118 printf( "encode : \nres = %d, len = %d\n", res, na );
119 if ( -2 == res ) {
120 res = LZWencode( da, L, a, na, &na );
121 printf( "encode : \nres = %d, len = %d\n", res, na );
122 }
123 res = LZWdecode( a, na, b, L/3, &nb );
124 printf( "decode : \nres = %d, len = %d\n", res, nb );
125 if ( -2 == res ) {
126 printf( (0 == memcmp(da, b, L/3)) ? "partly yes\n" : "partly no\n" );
127 res = LZWdecode( a, na, b, nb, &nb );
128 printf( "decode : \nres = %d, len = %d\n", res, nb );
129 }
130 printf( (0 == memcmp(da, b, nb)) ? "yes\n" : " no !!!!\n" );
131 }
132 return 0;
133 }
134
测试结果:
---- test ----
*********************
test case 0:
text :
""
encode :
res = 3, len = 3
10000000 01000000 01000000
decode :
res = 0, len = 0
""
*********************
test case 1:
text :
"a"
encode :
res = 4, len = 4
10000000 00011000 01100000 00100000
decode :
res = 1, len = 1
"a"
*********************
test case 2:
text :
"aaa"
encode :
res = 5, len = 5
10000000 00011000 01100000 01010000 00010000
decode :
res = 3, len = 3
"aaa"
*********************
test case 3:
text :
"abcdefgabcdefgabcdefg"
encode :
res = 18, len = 18
10000000 00011000 01001100 01000110 00110011 00100001 10010100 11001100 01100111 10000001 01000001 00100000 11010000 10001000 00011100 00010110 00001111 00000001
decode :
res = 21, len = 21
"abcdefgabcdefgabcdefg"
*********************
test case 4:
text :
"wabba_wabba_wabba_wabba_woo_woo_woo"
encode :
res = 26, len = 26
10000000 00011101 11001100 00100110 00100011 00010001 10000100 10111111 00000010 10000010 01000001 10100001 00010000 01011000 00111100 00001110 00011000 01110111 00110111 10011011 11100000 11110001 00011000 10011001 10111110 00000010
decode :
res = 35, len = 35
"wabba_wabba_wabba_wabba_woo_woo_woo"
*********************
test case 5:
text :
"abababababababab"
encode :
res = 11, len = 11
10000000 00011000 01001100 01010000 00101000 00100100 00001110 00001101 00000101 10000000 10000000
decode :
res = 16, len = 16
"abababababababab"
---- huge ----
*********************
huge case 0:
encode :
res = 147, len = 147
decode :
res = 8192, len = 8192
yes
*********************
huge case 1:
encode :
res = 206, len = 206
decode :
res = 8192, len = 8192
yes
*********************
huge case 2:
encode :
res = 6219, len = 6219
decode :
res = 8192, len = 8192
yes
*********************
huge case 3:
encode :
res = 771, len = 771
decode :
res = 8192, len = 8192
yes
*********************
huge case 4:
encode :
res = 11176, len = 11176
decode :
res = 8192, len = 8192
yes
---- error ----
*********************
error case 0:
encode :
res = 147, len = 147
decode :
res = -2, len = 8192
partly yes
decode :
res = 8192, len = 8192
yes
*********************
error case 1:
encode :
res = 206, len = 206
decode :
res = -2, len = 8192
partly yes
decode :
res = 8192, len = 8192
yes
*********************
error case 2:
encode :
res = -2, len = 6204
encode :
res = 6204, len = 6204
decode :
res = -2, len = 8192
partly yes
decode :
res = 8192, len = 8192
yes
*********************
error case 3:
encode :
res = 771, len = 771
decode :
res = -2, len = 8192
partly yes
decode :
res = 8192, len = 8192
yes
*********************
error case 4:
encode :
res = -2, len = 11161
encode :
res = 11161, len = 11161
decode :
res = -2, len = 8192
partly yes
decode :
res = 8192, len = 8192
yes