Annotation of /trunk/mkinitrd-magellan/klibc/usr/gzip/inflate.c
Parent Directory | Revision Log
Revision 815 -
(hide annotations)
(download)
Fri Apr 24 18:32:46 2009 UTC (15 years ago) by niro
File MIME type: text/plain
File size: 31539 byte(s)
Fri Apr 24 18:32:46 2009 UTC (15 years ago) by niro
File MIME type: text/plain
File size: 31539 byte(s)
-updated to klibc-1.5.15
1 | niro | 532 | /* inflate.c -- Not copyrighted 1992 by Mark Adler |
2 | version c10p1, 10 January 1993 */ | ||
3 | |||
4 | /* You can do whatever you like with this source file, though I would | ||
5 | prefer that if you modify it and redistribute it that you include | ||
6 | comments to that effect with your name and the date. Thank you. | ||
7 | [The history has been moved to the file ChangeLog.] | ||
8 | */ | ||
9 | |||
10 | /* | ||
11 | Inflate deflated (PKZIP's method 8 compressed) data. The compression | ||
12 | method searches for as much of the current string of bytes (up to a | ||
13 | length of 258) in the previous 32K bytes. If it doesn't find any | ||
14 | matches (of at least length 3), it codes the next byte. Otherwise, it | ||
15 | codes the length of the matched string and its distance backwards from | ||
16 | the current position. There is a single Huffman code that codes both | ||
17 | single bytes (called "literals") and match lengths. A second Huffman | ||
18 | code codes the distance information, which follows a length code. Each | ||
19 | length or distance code actually represents a base value and a number | ||
20 | of "extra" (sometimes zero) bits to get to add to the base value. At | ||
21 | the end of each deflated block is a special end-of-block (EOB) literal/ | ||
22 | length code. The decoding process is basically: get a literal/length | ||
23 | code; if EOB then done; if a literal, emit the decoded byte; if a | ||
24 | length then get the distance and emit the referred-to bytes from the | ||
25 | sliding window of previously emitted data. | ||
26 | |||
27 | There are (currently) three kinds of inflate blocks: stored, fixed, and | ||
28 | dynamic. The compressor deals with some chunk of data at a time, and | ||
29 | decides which method to use on a chunk-by-chunk basis. A chunk might | ||
30 | typically be 32K or 64K. If the chunk is uncompressible, then the | ||
31 | "stored" method is used. In this case, the bytes are simply stored as | ||
32 | is, eight bits per byte, with none of the above coding. The bytes are | ||
33 | preceded by a count, since there is no longer an EOB code. | ||
34 | |||
35 | If the data is compressible, then either the fixed or dynamic methods | ||
36 | are used. In the dynamic method, the compressed data is preceded by | ||
37 | an encoding of the literal/length and distance Huffman codes that are | ||
38 | to be used to decode this block. The representation is itself Huffman | ||
39 | coded, and so is preceded by a description of that code. These code | ||
40 | descriptions take up a little space, and so for small blocks, there is | ||
41 | a predefined set of codes, called the fixed codes. The fixed method is | ||
42 | used if the block codes up smaller that way (usually for quite small | ||
43 | chunks), otherwise the dynamic method is used. In the latter case, the | ||
44 | codes are customized to the probabilities in the current block, and so | ||
45 | can code it much better than the pre-determined fixed codes. | ||
46 | |||
47 | The Huffman codes themselves are decoded using a mutli-level table | ||
48 | lookup, in order to maximize the speed of decoding plus the speed of | ||
49 | building the decoding tables. See the comments below that precede the | ||
50 | lbits and dbits tuning parameters. | ||
51 | */ | ||
52 | |||
53 | |||
54 | /* | ||
55 | Notes beyond the 1.93a appnote.txt: | ||
56 | |||
57 | 1. Distance pointers never point before the beginning of the output | ||
58 | stream. | ||
59 | 2. Distance pointers can point back across blocks, up to 32k away. | ||
60 | 3. There is an implied maximum of 7 bits for the bit length table and | ||
61 | 15 bits for the actual data. | ||
62 | 4. If only one code exists, then it is encoded using one bit. (Zero | ||
63 | would be more efficient, but perhaps a little confusing.) If two | ||
64 | codes exist, they are coded using one bit each (0 and 1). | ||
65 | 5. There is no way of sending zero distance codes--a dummy must be | ||
66 | sent if there are none. (History: a pre 2.0 version of PKZIP would | ||
67 | store blocks with no distance codes, but this was discovered to be | ||
68 | too harsh a criterion.) Valid only for 1.93a. 2.04c does allow | ||
69 | zero distance codes, which is sent as one code of zero bits in | ||
70 | length. | ||
71 | 6. There are up to 286 literal/length codes. Code 256 represents the | ||
72 | end-of-block. Note however that the static length tree defines | ||
73 | 288 codes just to fill out the Huffman codes. Codes 286 and 287 | ||
74 | cannot be used though, since there is no length base or extra bits | ||
75 | defined for them. Similarly, there are up to 30 distance codes. | ||
76 | However, static trees define 32 codes (all 5 bits) to fill out the | ||
77 | Huffman codes, but the last two had better not show up in the data. | ||
78 | 7. Unzip can check dynamic Huffman blocks for complete code sets. | ||
79 | The exception is that a single code would not be complete (see #4). | ||
80 | 8. The five bits following the block type is really the number of | ||
81 | literal codes sent minus 257. | ||
82 | 9. Length codes 8,16,16 are interpreted as 13 length codes of 8 bits | ||
83 | (1+6+6). Therefore, to output three times the length, you output | ||
84 | three codes (1+1+1), whereas to output four times the same length, | ||
85 | you only need two codes (1+3). Hmm. | ||
86 | 10. In the tree reconstruction algorithm, Code = Code + Increment | ||
87 | only if BitLength(i) is not zero. (Pretty obvious.) | ||
88 | 11. Correction: 4 Bits: # of Bit Length codes - 4 (4 - 19) | ||
89 | 12. Note: length code 284 can represent 227-258, but length code 285 | ||
90 | really is 258. The last length deserves its own, short code | ||
91 | since it gets used a lot in very redundant files. The length | ||
92 | 258 is special since 258 - 3 (the min match length) is 255. | ||
93 | 13. The literal/length and distance code bit lengths are read as a | ||
94 | single stream of lengths. It is possible (and advantageous) for | ||
95 | a repeat code (16, 17, or 18) to go across the boundary between | ||
96 | the two sets of lengths. | ||
97 | */ | ||
98 | |||
99 | #ifdef RCSID | ||
100 | niro | 815 | static char rcsid[] = "$Id: inflate.c,v 1.1 2002/08/18 00:59:21 hpa Exp $"; |
101 | niro | 532 | #endif |
102 | |||
103 | #include <sys/types.h> | ||
104 | #include <stdlib.h> | ||
105 | |||
106 | #include "tailor.h" | ||
107 | #include "gzip.h" | ||
108 | #define slide window | ||
109 | |||
110 | /* Huffman code lookup table entry--this entry is four bytes for machines | ||
111 | that have 16-bit pointers (e.g. PC's in the small or medium model). | ||
112 | Valid extra bits are 0..13. e == 15 is EOB (end of block), e == 16 | ||
113 | means that v is a literal, 16 < e < 32 means that v is a pointer to | ||
114 | the next table, which codes e - 16 bits, and lastly e == 99 indicates | ||
115 | an unused code. If a code with e == 99 is looked up, this implies an | ||
116 | error in the data. */ | ||
117 | struct huft { | ||
118 | uch e; /* number of extra bits or operation */ | ||
119 | uch b; /* number of bits in this code or subcode */ | ||
120 | union { | ||
121 | ush n; /* literal, length base, or distance base */ | ||
122 | struct huft *t; /* pointer to next level of table */ | ||
123 | } v; | ||
124 | }; | ||
125 | |||
126 | |||
127 | /* Function prototypes */ | ||
128 | int huft_build OF((unsigned *, unsigned, unsigned, ush *, ush *, | ||
129 | struct huft **, int *)); | ||
130 | int huft_free OF((struct huft *)); | ||
131 | int inflate_codes OF((struct huft *, struct huft *, int, int)); | ||
132 | int inflate_stored OF((void)); | ||
133 | int inflate_fixed OF((void)); | ||
134 | int inflate_dynamic OF((void)); | ||
135 | int inflate_block OF((int *)); | ||
136 | int inflate OF((void)); | ||
137 | |||
138 | |||
139 | /* The inflate algorithm uses a sliding 32K byte window on the uncompressed | ||
140 | stream to find repeated byte strings. This is implemented here as a | ||
141 | circular buffer. The index is updated simply by incrementing and then | ||
142 | and'ing with 0x7fff (32K-1). */ | ||
143 | /* It is left to other modules to supply the 32K area. It is assumed | ||
144 | to be usable as if it were declared "uch slide[32768];" or as just | ||
145 | "uch *slide;" and then malloc'ed in the latter case. The definition | ||
146 | must be in unzip.h, included above. */ | ||
147 | /* unsigned wp; current position in slide */ | ||
148 | #define wp outcnt | ||
149 | #define flush_output(w) (wp=(w),flush_window()) | ||
150 | |||
151 | /* Tables for deflate from PKZIP's appnote.txt. */ | ||
152 | static unsigned border[] = { /* Order of the bit length code lengths */ | ||
153 | 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}; | ||
154 | static ush cplens[] = { /* Copy lengths for literal codes 257..285 */ | ||
155 | 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, | ||
156 | 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0}; | ||
157 | /* note: see note #13 above about the 258 in this list. */ | ||
158 | static ush cplext[] = { /* Extra bits for literal codes 257..285 */ | ||
159 | 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, | ||
160 | 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0, 99, 99}; /* 99==invalid */ | ||
161 | static ush cpdist[] = { /* Copy offsets for distance codes 0..29 */ | ||
162 | 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, | ||
163 | 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, | ||
164 | 8193, 12289, 16385, 24577}; | ||
165 | static ush cpdext[] = { /* Extra bits for distance codes */ | ||
166 | 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, | ||
167 | 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, | ||
168 | 12, 12, 13, 13}; | ||
169 | |||
170 | |||
171 | |||
172 | /* Macros for inflate() bit peeking and grabbing. | ||
173 | The usage is: | ||
174 | |||
175 | NEEDBITS(j) | ||
176 | x = b & mask_bits[j]; | ||
177 | DUMPBITS(j) | ||
178 | |||
179 | where NEEDBITS makes sure that b has at least j bits in it, and | ||
180 | DUMPBITS removes the bits from b. The macros use the variable k | ||
181 | for the number of bits in b. Normally, b and k are register | ||
182 | variables for speed, and are initialized at the beginning of a | ||
183 | routine that uses these macros from a global bit buffer and count. | ||
184 | |||
185 | If we assume that EOB will be the longest code, then we will never | ||
186 | ask for bits with NEEDBITS that are beyond the end of the stream. | ||
187 | So, NEEDBITS should not read any more bytes than are needed to | ||
188 | meet the request. Then no bytes need to be "returned" to the buffer | ||
189 | at the end of the last block. | ||
190 | |||
191 | However, this assumption is not true for fixed blocks--the EOB code | ||
192 | is 7 bits, but the other literal/length codes can be 8 or 9 bits. | ||
193 | (The EOB code is shorter than other codes because fixed blocks are | ||
194 | generally short. So, while a block always has an EOB, many other | ||
195 | literal/length codes have a significantly lower probability of | ||
196 | showing up at all.) However, by making the first table have a | ||
197 | lookup of seven bits, the EOB code will be found in that first | ||
198 | lookup, and so will not require that too many bits be pulled from | ||
199 | the stream. | ||
200 | */ | ||
201 | |||
202 | ulg bb; /* bit buffer */ | ||
203 | unsigned bk; /* bits in bit buffer */ | ||
204 | |||
205 | ush mask_bits[] = { | ||
206 | 0x0000, | ||
207 | 0x0001, 0x0003, 0x0007, 0x000f, 0x001f, 0x003f, 0x007f, 0x00ff, | ||
208 | 0x01ff, 0x03ff, 0x07ff, 0x0fff, 0x1fff, 0x3fff, 0x7fff, 0xffff | ||
209 | }; | ||
210 | |||
211 | #ifdef CRYPT | ||
212 | uch cc; | ||
213 | # define NEXTBYTE() \ | ||
214 | (decrypt ? (cc = get_byte(), cc) : get_byte()) | ||
215 | #else | ||
216 | # define NEXTBYTE() (uch)get_byte() | ||
217 | #endif | ||
218 | #define NEEDBITS(n) {while(k<(n)){b|=((ulg)NEXTBYTE())<<k;k+=8;}} | ||
219 | #define DUMPBITS(n) {b>>=(n);k-=(n);} | ||
220 | |||
221 | |||
222 | /* | ||
223 | Huffman code decoding is performed using a multi-level table lookup. | ||
224 | The fastest way to decode is to simply build a lookup table whose | ||
225 | size is determined by the longest code. However, the time it takes | ||
226 | to build this table can also be a factor if the data being decoded | ||
227 | is not very long. The most common codes are necessarily the | ||
228 | shortest codes, so those codes dominate the decoding time, and hence | ||
229 | the speed. The idea is you can have a shorter table that decodes the | ||
230 | shorter, more probable codes, and then point to subsidiary tables for | ||
231 | the longer codes. The time it costs to decode the longer codes is | ||
232 | then traded against the time it takes to make longer tables. | ||
233 | |||
234 | This results of this trade are in the variables lbits and dbits | ||
235 | below. lbits is the number of bits the first level table for literal/ | ||
236 | length codes can decode in one step, and dbits is the same thing for | ||
237 | the distance codes. Subsequent tables are also less than or equal to | ||
238 | those sizes. These values may be adjusted either when all of the | ||
239 | codes are shorter than that, in which case the longest code length in | ||
240 | bits is used, or when the shortest code is *longer* than the requested | ||
241 | table size, in which case the length of the shortest code in bits is | ||
242 | used. | ||
243 | |||
244 | There are two different values for the two tables, since they code a | ||
245 | different number of possibilities each. The literal/length table | ||
246 | codes 286 possible values, or in a flat code, a little over eight | ||
247 | bits. The distance table codes 30 possible values, or a little less | ||
248 | than five bits, flat. The optimum values for speed end up being | ||
249 | about one bit more than those, so lbits is 8+1 and dbits is 5+1. | ||
250 | The optimum values may differ though from machine to machine, and | ||
251 | possibly even between compilers. Your mileage may vary. | ||
252 | */ | ||
253 | |||
254 | |||
255 | int lbits = 9; /* bits in base literal/length lookup table */ | ||
256 | int dbits = 6; /* bits in base distance lookup table */ | ||
257 | |||
258 | |||
259 | /* If BMAX needs to be larger than 16, then h and x[] should be ulg. */ | ||
260 | #define BMAX 16 /* maximum bit length of any code (16 for explode) */ | ||
261 | #define N_MAX 288 /* maximum number of codes in any set */ | ||
262 | |||
263 | |||
264 | unsigned hufts; /* track memory usage */ | ||
265 | |||
266 | |||
267 | int huft_build(b, n, s, d, e, t, m) | ||
268 | unsigned *b; /* code lengths in bits (all assumed <= BMAX) */ | ||
269 | unsigned n; /* number of codes (assumed <= N_MAX) */ | ||
270 | unsigned s; /* number of simple-valued codes (0..s-1) */ | ||
271 | ush *d; /* list of base values for non-simple codes */ | ||
272 | ush *e; /* list of extra bits for non-simple codes */ | ||
273 | struct huft **t; /* result: starting table */ | ||
274 | int *m; /* maximum lookup bits, returns actual */ | ||
275 | /* Given a list of code lengths and a maximum table size, make a set of | ||
276 | tables to decode that set of codes. Return zero on success, one if | ||
277 | the given code set is incomplete (the tables are still built in this | ||
278 | case), two if the input is invalid (all zero length codes or an | ||
279 | oversubscribed set of lengths), and three if not enough memory. */ | ||
280 | { | ||
281 | unsigned a; /* counter for codes of length k */ | ||
282 | unsigned c[BMAX+1]; /* bit length count table */ | ||
283 | unsigned f; /* i repeats in table every f entries */ | ||
284 | int g; /* maximum code length */ | ||
285 | int h; /* table level */ | ||
286 | register unsigned i; /* counter, current code */ | ||
287 | register unsigned j; /* counter */ | ||
288 | register int k; /* number of bits in current code */ | ||
289 | int l; /* bits per table (returned in m) */ | ||
290 | register unsigned *p; /* pointer into c[], b[], or v[] */ | ||
291 | register struct huft *q; /* points to current table */ | ||
292 | struct huft r; /* table entry for structure assignment */ | ||
293 | struct huft *u[BMAX]; /* table stack */ | ||
294 | unsigned v[N_MAX]; /* values in order of bit length */ | ||
295 | register int w; /* bits before this table == (l * h) */ | ||
296 | unsigned x[BMAX+1]; /* bit offsets, then code stack */ | ||
297 | unsigned *xp; /* pointer into x */ | ||
298 | int y; /* number of dummy codes added */ | ||
299 | unsigned z; /* number of entries in current table */ | ||
300 | |||
301 | |||
302 | /* Generate counts for each bit length */ | ||
303 | memzero(c, sizeof(c)); | ||
304 | p = b; i = n; | ||
305 | do { | ||
306 | Tracecv(*p, (stderr, (n-i >= ' ' && n-i <= '~' ? "%c %d\n" : "0x%x %d\n"), | ||
307 | n-i, *p)); | ||
308 | c[*p]++; /* assume all entries <= BMAX */ | ||
309 | p++; /* Can't combine with above line (Solaris bug) */ | ||
310 | } while (--i); | ||
311 | if (c[0] == n) /* null input--all zero length codes */ | ||
312 | { | ||
313 | *t = (struct huft *)NULL; | ||
314 | *m = 0; | ||
315 | return 0; | ||
316 | } | ||
317 | |||
318 | |||
319 | /* Find minimum and maximum length, bound *m by those */ | ||
320 | l = *m; | ||
321 | for (j = 1; j <= BMAX; j++) | ||
322 | if (c[j]) | ||
323 | break; | ||
324 | k = j; /* minimum code length */ | ||
325 | if ((unsigned)l < j) | ||
326 | l = j; | ||
327 | for (i = BMAX; i; i--) | ||
328 | if (c[i]) | ||
329 | break; | ||
330 | g = i; /* maximum code length */ | ||
331 | if ((unsigned)l > i) | ||
332 | l = i; | ||
333 | *m = l; | ||
334 | |||
335 | |||
336 | /* Adjust last length count to fill out codes, if needed */ | ||
337 | for (y = 1 << j; j < i; j++, y <<= 1) | ||
338 | if ((y -= c[j]) < 0) | ||
339 | return 2; /* bad input: more codes than bits */ | ||
340 | if ((y -= c[i]) < 0) | ||
341 | return 2; | ||
342 | c[i] += y; | ||
343 | |||
344 | |||
345 | /* Generate starting offsets into the value table for each length */ | ||
346 | x[1] = j = 0; | ||
347 | p = c + 1; xp = x + 2; | ||
348 | while (--i) { /* note that i == g from above */ | ||
349 | *xp++ = (j += *p++); | ||
350 | } | ||
351 | |||
352 | |||
353 | /* Make a table of values in order of bit lengths */ | ||
354 | p = b; i = 0; | ||
355 | do { | ||
356 | if ((j = *p++) != 0) | ||
357 | v[x[j]++] = i; | ||
358 | } while (++i < n); | ||
359 | |||
360 | |||
361 | /* Generate the Huffman codes and for each, make the table entries */ | ||
362 | x[0] = i = 0; /* first Huffman code is zero */ | ||
363 | p = v; /* grab values in bit order */ | ||
364 | h = -1; /* no tables yet--level -1 */ | ||
365 | w = -l; /* bits decoded == (l * h) */ | ||
366 | u[0] = (struct huft *)NULL; /* just to keep compilers happy */ | ||
367 | q = (struct huft *)NULL; /* ditto */ | ||
368 | z = 0; /* ditto */ | ||
369 | |||
370 | /* go through the bit lengths (k already is bits in shortest code) */ | ||
371 | for (; k <= g; k++) | ||
372 | { | ||
373 | a = c[k]; | ||
374 | while (a--) | ||
375 | { | ||
376 | /* here i is the Huffman code of length k bits for value *p */ | ||
377 | /* make tables up to required level */ | ||
378 | while (k > w + l) | ||
379 | { | ||
380 | h++; | ||
381 | w += l; /* previous table always l bits */ | ||
382 | |||
383 | /* compute minimum size table less than or equal to l bits */ | ||
384 | z = (z = g - w) > (unsigned)l ? (unsigned)l : z; /* upper limit on table size */ | ||
385 | if ((f = 1 << (j = k - w)) > a + 1) /* try a k-w bit table */ | ||
386 | { /* too few codes for k-w bit table */ | ||
387 | f -= a + 1; /* deduct codes from patterns left */ | ||
388 | xp = c + k; | ||
389 | while (++j < z) /* try smaller tables up to z bits */ | ||
390 | { | ||
391 | if ((f <<= 1) <= *++xp) | ||
392 | break; /* enough codes to use up j bits */ | ||
393 | f -= *xp; /* else deduct codes from patterns */ | ||
394 | } | ||
395 | } | ||
396 | z = 1 << j; /* table entries for j-bit table */ | ||
397 | |||
398 | /* allocate and link in new table */ | ||
399 | if ((q = (struct huft *)malloc((z + 1)*sizeof(struct huft))) == | ||
400 | (struct huft *)NULL) | ||
401 | { | ||
402 | if (h) | ||
403 | huft_free(u[0]); | ||
404 | return 3; /* not enough memory */ | ||
405 | } | ||
406 | hufts += z + 1; /* track memory usage */ | ||
407 | *t = q + 1; /* link to list for huft_free() */ | ||
408 | *(t = &(q->v.t)) = (struct huft *)NULL; | ||
409 | u[h] = ++q; /* table starts after link */ | ||
410 | |||
411 | /* connect to last table, if there is one */ | ||
412 | if (h) | ||
413 | { | ||
414 | x[h] = i; /* save pattern for backing up */ | ||
415 | r.b = (uch)l; /* bits to dump before this table */ | ||
416 | r.e = (uch)(16 + j); /* bits in this table */ | ||
417 | r.v.t = q; /* pointer to this table */ | ||
418 | j = i >> (w - l); /* (get around Turbo C bug) */ | ||
419 | u[h-1][j] = r; /* connect to last table */ | ||
420 | } | ||
421 | } | ||
422 | |||
423 | /* set up table entry in r */ | ||
424 | r.b = (uch)(k - w); | ||
425 | if (p >= v + n) | ||
426 | r.e = 99; /* out of values--invalid code */ | ||
427 | else if (*p < s) | ||
428 | { | ||
429 | r.e = (uch)(*p < 256 ? 16 : 15); /* 256 is end-of-block code */ | ||
430 | r.v.n = (ush)(*p); /* simple code is just the value */ | ||
431 | p++; /* one compiler does not like *p++ */ | ||
432 | } | ||
433 | else | ||
434 | { | ||
435 | r.e = (uch)e[*p - s]; /* non-simple--look up in lists */ | ||
436 | r.v.n = d[*p++ - s]; | ||
437 | } | ||
438 | |||
439 | /* fill code-like entries with r */ | ||
440 | f = 1 << (k - w); | ||
441 | for (j = i >> w; j < z; j += f) | ||
442 | q[j] = r; | ||
443 | |||
444 | /* backwards increment the k-bit code i */ | ||
445 | for (j = 1 << (k - 1); i & j; j >>= 1) | ||
446 | i ^= j; | ||
447 | i ^= j; | ||
448 | |||
449 | /* backup over finished tables */ | ||
450 | while ((i & ((1 << w) - 1)) != x[h]) | ||
451 | { | ||
452 | h--; /* don't need to update q */ | ||
453 | w -= l; | ||
454 | } | ||
455 | } | ||
456 | } | ||
457 | |||
458 | |||
459 | /* Return true (1) if we were given an incomplete table */ | ||
460 | return y != 0 && g != 1; | ||
461 | } | ||
462 | |||
463 | |||
464 | |||
465 | int huft_free(t) | ||
466 | struct huft *t; /* table to free */ | ||
467 | /* Free the malloc'ed tables built by huft_build(), which makes a linked | ||
468 | list of the tables it made, with the links in a dummy first entry of | ||
469 | each table. */ | ||
470 | { | ||
471 | register struct huft *p, *q; | ||
472 | |||
473 | |||
474 | /* Go through linked list, freeing from the malloced (t[-1]) address. */ | ||
475 | p = t; | ||
476 | while (p != (struct huft *)NULL) | ||
477 | { | ||
478 | q = (--p)->v.t; | ||
479 | free((char*)p); | ||
480 | p = q; | ||
481 | } | ||
482 | return 0; | ||
483 | } | ||
484 | |||
485 | |||
486 | int inflate_codes(tl, td, bl, bd) | ||
487 | struct huft *tl, *td; /* literal/length and distance decoder tables */ | ||
488 | int bl, bd; /* number of bits decoded by tl[] and td[] */ | ||
489 | /* inflate (decompress) the codes in a deflated (compressed) block. | ||
490 | Return an error code or zero if it all goes ok. */ | ||
491 | { | ||
492 | register unsigned e; /* table entry flag/number of extra bits */ | ||
493 | unsigned n, d; /* length and index for copy */ | ||
494 | unsigned w; /* current window position */ | ||
495 | struct huft *t; /* pointer to table entry */ | ||
496 | unsigned ml, md; /* masks for bl and bd bits */ | ||
497 | register ulg b; /* bit buffer */ | ||
498 | register unsigned k; /* number of bits in bit buffer */ | ||
499 | |||
500 | |||
501 | /* make local copies of globals */ | ||
502 | b = bb; /* initialize bit buffer */ | ||
503 | k = bk; | ||
504 | w = wp; /* initialize window position */ | ||
505 | |||
506 | /* inflate the coded data */ | ||
507 | ml = mask_bits[bl]; /* precompute masks for speed */ | ||
508 | md = mask_bits[bd]; | ||
509 | for (;;) /* do until end of block */ | ||
510 | { | ||
511 | NEEDBITS((unsigned)bl) | ||
512 | if ((e = (t = tl + ((unsigned)b & ml))->e) > 16) | ||
513 | do { | ||
514 | if (e == 99) | ||
515 | return 1; | ||
516 | DUMPBITS(t->b) | ||
517 | e -= 16; | ||
518 | NEEDBITS(e) | ||
519 | } while ((e = (t = t->v.t + ((unsigned)b & mask_bits[e]))->e) > 16); | ||
520 | DUMPBITS(t->b) | ||
521 | if (e == 16) /* then it's a literal */ | ||
522 | { | ||
523 | slide[w++] = (uch)t->v.n; | ||
524 | Tracevv((stderr, "%c", slide[w-1])); | ||
525 | if (w == WSIZE) | ||
526 | { | ||
527 | flush_output(w); | ||
528 | w = 0; | ||
529 | } | ||
530 | } | ||
531 | else /* it's an EOB or a length */ | ||
532 | { | ||
533 | /* exit if end of block */ | ||
534 | if (e == 15) | ||
535 | break; | ||
536 | |||
537 | /* get length of block to copy */ | ||
538 | NEEDBITS(e) | ||
539 | n = t->v.n + ((unsigned)b & mask_bits[e]); | ||
540 | DUMPBITS(e); | ||
541 | |||
542 | /* decode distance of block to copy */ | ||
543 | NEEDBITS((unsigned)bd) | ||
544 | if ((e = (t = td + ((unsigned)b & md))->e) > 16) | ||
545 | do { | ||
546 | if (e == 99) | ||
547 | return 1; | ||
548 | DUMPBITS(t->b) | ||
549 | e -= 16; | ||
550 | NEEDBITS(e) | ||
551 | } while ((e = (t = t->v.t + ((unsigned)b & mask_bits[e]))->e) > 16); | ||
552 | DUMPBITS(t->b) | ||
553 | NEEDBITS(e) | ||
554 | d = w - t->v.n - ((unsigned)b & mask_bits[e]); | ||
555 | DUMPBITS(e) | ||
556 | Tracevv((stderr,"\\[%d,%d]", w-d, n)); | ||
557 | |||
558 | /* do the copy */ | ||
559 | do { | ||
560 | n -= (e = (e = WSIZE - ((d &= WSIZE-1) > w ? d : w)) > n ? n : e); | ||
561 | #if !defined(NOMEMCPY) && !defined(DEBUG) | ||
562 | if (w - d >= e) /* (this test assumes unsigned comparison) */ | ||
563 | { | ||
564 | memcpy(slide + w, slide + d, e); | ||
565 | w += e; | ||
566 | d += e; | ||
567 | } | ||
568 | else /* do it slow to avoid memcpy() overlap */ | ||
569 | #endif /* !NOMEMCPY */ | ||
570 | do { | ||
571 | slide[w++] = slide[d++]; | ||
572 | Tracevv((stderr, "%c", slide[w-1])); | ||
573 | } while (--e); | ||
574 | if (w == WSIZE) | ||
575 | { | ||
576 | flush_output(w); | ||
577 | w = 0; | ||
578 | } | ||
579 | } while (n); | ||
580 | } | ||
581 | } | ||
582 | |||
583 | |||
584 | /* restore the globals from the locals */ | ||
585 | wp = w; /* restore global window pointer */ | ||
586 | bb = b; /* restore global bit buffer */ | ||
587 | bk = k; | ||
588 | |||
589 | /* done */ | ||
590 | return 0; | ||
591 | } | ||
592 | |||
593 | |||
594 | |||
595 | int inflate_stored() | ||
596 | /* "decompress" an inflated type 0 (stored) block. */ | ||
597 | { | ||
598 | unsigned n; /* number of bytes in block */ | ||
599 | unsigned w; /* current window position */ | ||
600 | register ulg b; /* bit buffer */ | ||
601 | register unsigned k; /* number of bits in bit buffer */ | ||
602 | |||
603 | |||
604 | /* make local copies of globals */ | ||
605 | b = bb; /* initialize bit buffer */ | ||
606 | k = bk; | ||
607 | w = wp; /* initialize window position */ | ||
608 | |||
609 | |||
610 | /* go to byte boundary */ | ||
611 | n = k & 7; | ||
612 | DUMPBITS(n); | ||
613 | |||
614 | |||
615 | /* get the length and its complement */ | ||
616 | NEEDBITS(16) | ||
617 | n = ((unsigned)b & 0xffff); | ||
618 | DUMPBITS(16) | ||
619 | NEEDBITS(16) | ||
620 | if (n != (unsigned)((~b) & 0xffff)) | ||
621 | return 1; /* error in compressed data */ | ||
622 | DUMPBITS(16) | ||
623 | |||
624 | |||
625 | /* read and output the compressed data */ | ||
626 | while (n--) | ||
627 | { | ||
628 | NEEDBITS(8) | ||
629 | slide[w++] = (uch)b; | ||
630 | if (w == WSIZE) | ||
631 | { | ||
632 | flush_output(w); | ||
633 | w = 0; | ||
634 | } | ||
635 | DUMPBITS(8) | ||
636 | } | ||
637 | |||
638 | |||
639 | /* restore the globals from the locals */ | ||
640 | wp = w; /* restore global window pointer */ | ||
641 | bb = b; /* restore global bit buffer */ | ||
642 | bk = k; | ||
643 | return 0; | ||
644 | } | ||
645 | |||
646 | |||
647 | |||
648 | int inflate_fixed() | ||
649 | /* decompress an inflated type 1 (fixed Huffman codes) block. We should | ||
650 | either replace this with a custom decoder, or at least precompute the | ||
651 | Huffman tables. */ | ||
652 | { | ||
653 | int i; /* temporary variable */ | ||
654 | struct huft *tl; /* literal/length code table */ | ||
655 | struct huft *td; /* distance code table */ | ||
656 | int bl; /* lookup bits for tl */ | ||
657 | int bd; /* lookup bits for td */ | ||
658 | unsigned l[288]; /* length list for huft_build */ | ||
659 | |||
660 | |||
661 | /* set up literal table */ | ||
662 | for (i = 0; i < 144; i++) | ||
663 | l[i] = 8; | ||
664 | for (; i < 256; i++) | ||
665 | l[i] = 9; | ||
666 | for (; i < 280; i++) | ||
667 | l[i] = 7; | ||
668 | for (; i < 288; i++) /* make a complete, but wrong code set */ | ||
669 | l[i] = 8; | ||
670 | bl = 7; | ||
671 | if ((i = huft_build(l, 288, 257, cplens, cplext, &tl, &bl)) != 0) | ||
672 | return i; | ||
673 | |||
674 | |||
675 | /* set up distance table */ | ||
676 | for (i = 0; i < 30; i++) /* make an incomplete code set */ | ||
677 | l[i] = 5; | ||
678 | bd = 5; | ||
679 | if ((i = huft_build(l, 30, 0, cpdist, cpdext, &td, &bd)) > 1) | ||
680 | { | ||
681 | huft_free(tl); | ||
682 | return i; | ||
683 | } | ||
684 | |||
685 | |||
686 | /* decompress until an end-of-block code */ | ||
687 | if (inflate_codes(tl, td, bl, bd)) | ||
688 | return 1; | ||
689 | |||
690 | |||
691 | /* free the decoding tables, return */ | ||
692 | huft_free(tl); | ||
693 | huft_free(td); | ||
694 | return 0; | ||
695 | } | ||
696 | |||
697 | |||
698 | |||
699 | int inflate_dynamic() | ||
700 | /* decompress an inflated type 2 (dynamic Huffman codes) block. */ | ||
701 | { | ||
702 | int i; /* temporary variables */ | ||
703 | unsigned j; | ||
704 | unsigned l; /* last length */ | ||
705 | unsigned m; /* mask for bit lengths table */ | ||
706 | unsigned n; /* number of lengths to get */ | ||
707 | struct huft *tl; /* literal/length code table */ | ||
708 | struct huft *td; /* distance code table */ | ||
709 | int bl; /* lookup bits for tl */ | ||
710 | int bd; /* lookup bits for td */ | ||
711 | unsigned nb; /* number of bit length codes */ | ||
712 | unsigned nl; /* number of literal/length codes */ | ||
713 | unsigned nd; /* number of distance codes */ | ||
714 | #ifdef PKZIP_BUG_WORKAROUND | ||
715 | unsigned ll[288+32]; /* literal/length and distance code lengths */ | ||
716 | #else | ||
717 | unsigned ll[286+30]; /* literal/length and distance code lengths */ | ||
718 | #endif | ||
719 | register ulg b; /* bit buffer */ | ||
720 | register unsigned k; /* number of bits in bit buffer */ | ||
721 | |||
722 | |||
723 | /* make local bit buffer */ | ||
724 | b = bb; | ||
725 | k = bk; | ||
726 | |||
727 | |||
728 | /* read in table lengths */ | ||
729 | NEEDBITS(5) | ||
730 | nl = 257 + ((unsigned)b & 0x1f); /* number of literal/length codes */ | ||
731 | DUMPBITS(5) | ||
732 | NEEDBITS(5) | ||
733 | nd = 1 + ((unsigned)b & 0x1f); /* number of distance codes */ | ||
734 | DUMPBITS(5) | ||
735 | NEEDBITS(4) | ||
736 | nb = 4 + ((unsigned)b & 0xf); /* number of bit length codes */ | ||
737 | DUMPBITS(4) | ||
738 | #ifdef PKZIP_BUG_WORKAROUND | ||
739 | if (nl > 288 || nd > 32) | ||
740 | #else | ||
741 | if (nl > 286 || nd > 30) | ||
742 | #endif | ||
743 | return 1; /* bad lengths */ | ||
744 | |||
745 | |||
746 | /* read in bit-length-code lengths */ | ||
747 | for (j = 0; j < nb; j++) | ||
748 | { | ||
749 | NEEDBITS(3) | ||
750 | ll[border[j]] = (unsigned)b & 7; | ||
751 | DUMPBITS(3) | ||
752 | } | ||
753 | for (; j < 19; j++) | ||
754 | ll[border[j]] = 0; | ||
755 | |||
756 | |||
757 | /* build decoding table for trees--single level, 7 bit lookup */ | ||
758 | bl = 7; | ||
759 | if ((i = huft_build(ll, 19, 19, NULL, NULL, &tl, &bl)) != 0) | ||
760 | { | ||
761 | if (i == 1) | ||
762 | huft_free(tl); | ||
763 | return i; /* incomplete code set */ | ||
764 | } | ||
765 | |||
766 | |||
767 | /* read in literal and distance code lengths */ | ||
768 | n = nl + nd; | ||
769 | m = mask_bits[bl]; | ||
770 | i = l = 0; | ||
771 | while ((unsigned)i < n) | ||
772 | { | ||
773 | NEEDBITS((unsigned)bl) | ||
774 | j = (td = tl + ((unsigned)b & m))->b; | ||
775 | DUMPBITS(j) | ||
776 | j = td->v.n; | ||
777 | if (j < 16) /* length of code in bits (0..15) */ | ||
778 | ll[i++] = l = j; /* save last length in l */ | ||
779 | else if (j == 16) /* repeat last length 3 to 6 times */ | ||
780 | { | ||
781 | NEEDBITS(2) | ||
782 | j = 3 + ((unsigned)b & 3); | ||
783 | DUMPBITS(2) | ||
784 | if ((unsigned)i + j > n) | ||
785 | return 1; | ||
786 | while (j--) | ||
787 | ll[i++] = l; | ||
788 | } | ||
789 | else if (j == 17) /* 3 to 10 zero length codes */ | ||
790 | { | ||
791 | NEEDBITS(3) | ||
792 | j = 3 + ((unsigned)b & 7); | ||
793 | DUMPBITS(3) | ||
794 | if ((unsigned)i + j > n) | ||
795 | return 1; | ||
796 | while (j--) | ||
797 | ll[i++] = 0; | ||
798 | l = 0; | ||
799 | } | ||
800 | else /* j == 18: 11 to 138 zero length codes */ | ||
801 | { | ||
802 | NEEDBITS(7) | ||
803 | j = 11 + ((unsigned)b & 0x7f); | ||
804 | DUMPBITS(7) | ||
805 | if ((unsigned)i + j > n) | ||
806 | return 1; | ||
807 | while (j--) | ||
808 | ll[i++] = 0; | ||
809 | l = 0; | ||
810 | } | ||
811 | } | ||
812 | |||
813 | |||
814 | /* free decoding table for trees */ | ||
815 | huft_free(tl); | ||
816 | |||
817 | |||
818 | /* restore the global bit buffer */ | ||
819 | bb = b; | ||
820 | bk = k; | ||
821 | |||
822 | |||
823 | /* build the decoding tables for literal/length and distance codes */ | ||
824 | bl = lbits; | ||
825 | if ((i = huft_build(ll, nl, 257, cplens, cplext, &tl, &bl)) != 0) | ||
826 | { | ||
827 | if (i == 1) { | ||
828 | fprintf(stderr, " incomplete literal tree\n"); | ||
829 | huft_free(tl); | ||
830 | } | ||
831 | return i; /* incomplete code set */ | ||
832 | } | ||
833 | bd = dbits; | ||
834 | if ((i = huft_build(ll + nl, nd, 0, cpdist, cpdext, &td, &bd)) != 0) | ||
835 | { | ||
836 | if (i == 1) { | ||
837 | fprintf(stderr, " incomplete distance tree\n"); | ||
838 | #ifdef PKZIP_BUG_WORKAROUND | ||
839 | i = 0; | ||
840 | } | ||
841 | #else | ||
842 | huft_free(td); | ||
843 | } | ||
844 | huft_free(tl); | ||
845 | return i; /* incomplete code set */ | ||
846 | #endif | ||
847 | } | ||
848 | |||
849 | |||
850 | /* decompress until an end-of-block code */ | ||
851 | if (inflate_codes(tl, td, bl, bd)) | ||
852 | return 1; | ||
853 | |||
854 | |||
855 | /* free the decoding tables, return */ | ||
856 | huft_free(tl); | ||
857 | huft_free(td); | ||
858 | return 0; | ||
859 | } | ||
860 | |||
861 | |||
862 | |||
863 | int inflate_block(e) | ||
864 | int *e; /* last block flag */ | ||
865 | /* decompress an inflated block */ | ||
866 | { | ||
867 | unsigned t; /* block type */ | ||
868 | register ulg b; /* bit buffer */ | ||
869 | register unsigned k; /* number of bits in bit buffer */ | ||
870 | |||
871 | |||
872 | /* make local bit buffer */ | ||
873 | b = bb; | ||
874 | k = bk; | ||
875 | |||
876 | |||
877 | /* read in last block bit */ | ||
878 | NEEDBITS(1) | ||
879 | *e = (int)b & 1; | ||
880 | DUMPBITS(1) | ||
881 | |||
882 | |||
883 | /* read in block type */ | ||
884 | NEEDBITS(2) | ||
885 | t = (unsigned)b & 3; | ||
886 | DUMPBITS(2) | ||
887 | |||
888 | |||
889 | /* restore the global bit buffer */ | ||
890 | bb = b; | ||
891 | bk = k; | ||
892 | |||
893 | |||
894 | /* inflate that block type */ | ||
895 | if (t == 2) | ||
896 | return inflate_dynamic(); | ||
897 | if (t == 0) | ||
898 | return inflate_stored(); | ||
899 | if (t == 1) | ||
900 | return inflate_fixed(); | ||
901 | |||
902 | |||
903 | /* bad block type */ | ||
904 | return 2; | ||
905 | } | ||
906 | |||
907 | |||
908 | |||
909 | int inflate() | ||
910 | /* decompress an inflated entry */ | ||
911 | { | ||
912 | int e; /* last block flag */ | ||
913 | int r; /* result code */ | ||
914 | unsigned h; /* maximum struct huft's malloc'ed */ | ||
915 | |||
916 | |||
917 | /* initialize window, bit buffer */ | ||
918 | wp = 0; | ||
919 | bk = 0; | ||
920 | bb = 0; | ||
921 | |||
922 | |||
923 | /* decompress until the last block */ | ||
924 | h = 0; | ||
925 | do { | ||
926 | hufts = 0; | ||
927 | if ((r = inflate_block(&e)) != 0) | ||
928 | return r; | ||
929 | if (hufts > h) | ||
930 | h = hufts; | ||
931 | } while (!e); | ||
932 | |||
933 | /* Undo too much lookahead. The next read will be byte aligned so we | ||
934 | * can discard unused bits in the last meaningful byte. | ||
935 | */ | ||
936 | while (bk >= 8) { | ||
937 | bk -= 8; | ||
938 | inptr--; | ||
939 | } | ||
940 | |||
941 | /* flush out slide */ | ||
942 | flush_output(wp); | ||
943 | |||
944 | |||
945 | /* return success */ | ||
946 | #ifdef DEBUG | ||
947 | fprintf(stderr, "<%u> ", h); | ||
948 | #endif /* DEBUG */ | ||
949 | return 0; | ||
950 | } |