Contents of /trunk/mkinitrd-magellan/busybox/archival/libunarchive/decompress_unlzma.c
Parent Directory | Revision Log
Revision 816 -
(show annotations)
(download)
Fri Apr 24 18:33:46 2009 UTC (15 years, 1 month ago) by niro
File MIME type: text/plain
File size: 12988 byte(s)
Fri Apr 24 18:33:46 2009 UTC (15 years, 1 month ago) by niro
File MIME type: text/plain
File size: 12988 byte(s)
-updated to busybox-1.13.4
1 | /* vi: set sw=4 ts=4: */ |
2 | /* |
3 | * Small lzma deflate implementation. |
4 | * Copyright (C) 2006 Aurelien Jacobs <aurel@gnuage.org> |
5 | * |
6 | * Based on LzmaDecode.c from the LZMA SDK 4.22 (http://www.7-zip.org/) |
7 | * Copyright (C) 1999-2005 Igor Pavlov |
8 | * |
9 | * Licensed under GPLv2 or later, see file LICENSE in this tarball for details. |
10 | */ |
11 | |
12 | #include "libbb.h" |
13 | #include "unarchive.h" |
14 | |
15 | #if ENABLE_FEATURE_LZMA_FAST |
16 | # define speed_inline ALWAYS_INLINE |
17 | #else |
18 | # define speed_inline |
19 | #endif |
20 | |
21 | |
22 | typedef struct { |
23 | int fd; |
24 | uint8_t *ptr; |
25 | |
26 | /* Was keeping rc on stack in unlzma and separately allocating buffer, |
27 | * but with "buffer 'attached to' allocated rc" code is smaller: */ |
28 | /* uint8_t *buffer; */ |
29 | #define RC_BUFFER ((uint8_t*)(rc+1)) |
30 | |
31 | uint8_t *buffer_end; |
32 | |
33 | /* Had provisions for variable buffer, but we don't need it here */ |
34 | /* int buffer_size; */ |
35 | #define RC_BUFFER_SIZE 0x10000 |
36 | |
37 | uint32_t code; |
38 | uint32_t range; |
39 | uint32_t bound; |
40 | } rc_t; |
41 | |
42 | #define RC_TOP_BITS 24 |
43 | #define RC_MOVE_BITS 5 |
44 | #define RC_MODEL_TOTAL_BITS 11 |
45 | |
46 | |
47 | /* Called twice: once at startup and once in rc_normalize() */ |
48 | static void rc_read(rc_t *rc) |
49 | { |
50 | int buffer_size = safe_read(rc->fd, RC_BUFFER, RC_BUFFER_SIZE); |
51 | if (buffer_size <= 0) |
52 | bb_error_msg_and_die("unexpected EOF"); |
53 | rc->ptr = RC_BUFFER; |
54 | rc->buffer_end = RC_BUFFER + buffer_size; |
55 | } |
56 | |
57 | /* Called once */ |
58 | static rc_t* rc_init(int fd) /*, int buffer_size) */ |
59 | { |
60 | int i; |
61 | rc_t *rc; |
62 | |
63 | rc = xmalloc(sizeof(*rc) + RC_BUFFER_SIZE); |
64 | |
65 | rc->fd = fd; |
66 | /* rc->buffer_size = buffer_size; */ |
67 | rc->buffer_end = RC_BUFFER + RC_BUFFER_SIZE; |
68 | rc->ptr = rc->buffer_end; |
69 | |
70 | rc->code = 0; |
71 | rc->range = 0xFFFFFFFF; |
72 | for (i = 0; i < 5; i++) { |
73 | if (rc->ptr >= rc->buffer_end) |
74 | rc_read(rc); |
75 | rc->code = (rc->code << 8) | *rc->ptr++; |
76 | } |
77 | return rc; |
78 | } |
79 | |
80 | /* Called once */ |
81 | static ALWAYS_INLINE void rc_free(rc_t *rc) |
82 | { |
83 | free(rc); |
84 | } |
85 | |
86 | /* Called twice, but one callsite is in speed_inline'd rc_is_bit_0_helper() */ |
87 | static void rc_do_normalize(rc_t *rc) |
88 | { |
89 | if (rc->ptr >= rc->buffer_end) |
90 | rc_read(rc); |
91 | rc->range <<= 8; |
92 | rc->code = (rc->code << 8) | *rc->ptr++; |
93 | } |
94 | static ALWAYS_INLINE void rc_normalize(rc_t *rc) |
95 | { |
96 | if (rc->range < (1 << RC_TOP_BITS)) { |
97 | rc_do_normalize(rc); |
98 | } |
99 | } |
100 | |
101 | /* rc_is_bit_0 is called 9 times */ |
102 | /* Why rc_is_bit_0_helper exists? |
103 | * Because we want to always expose (rc->code < rc->bound) to optimizer. |
104 | * Thus rc_is_bit_0 is always inlined, and rc_is_bit_0_helper is inlined |
105 | * only if we compile for speed. |
106 | */ |
107 | static speed_inline uint32_t rc_is_bit_0_helper(rc_t *rc, uint16_t *p) |
108 | { |
109 | rc_normalize(rc); |
110 | rc->bound = *p * (rc->range >> RC_MODEL_TOTAL_BITS); |
111 | return rc->bound; |
112 | } |
113 | static ALWAYS_INLINE int rc_is_bit_0(rc_t *rc, uint16_t *p) |
114 | { |
115 | uint32_t t = rc_is_bit_0_helper(rc, p); |
116 | return rc->code < t; |
117 | } |
118 | |
119 | /* Called ~10 times, but very small, thus inlined */ |
120 | static speed_inline void rc_update_bit_0(rc_t *rc, uint16_t *p) |
121 | { |
122 | rc->range = rc->bound; |
123 | *p += ((1 << RC_MODEL_TOTAL_BITS) - *p) >> RC_MOVE_BITS; |
124 | } |
125 | static speed_inline void rc_update_bit_1(rc_t *rc, uint16_t *p) |
126 | { |
127 | rc->range -= rc->bound; |
128 | rc->code -= rc->bound; |
129 | *p -= *p >> RC_MOVE_BITS; |
130 | } |
131 | |
132 | /* Called 4 times in unlzma loop */ |
133 | static int rc_get_bit(rc_t *rc, uint16_t *p, int *symbol) |
134 | { |
135 | if (rc_is_bit_0(rc, p)) { |
136 | rc_update_bit_0(rc, p); |
137 | *symbol *= 2; |
138 | return 0; |
139 | } else { |
140 | rc_update_bit_1(rc, p); |
141 | *symbol = *symbol * 2 + 1; |
142 | return 1; |
143 | } |
144 | } |
145 | |
146 | /* Called once */ |
147 | static ALWAYS_INLINE int rc_direct_bit(rc_t *rc) |
148 | { |
149 | rc_normalize(rc); |
150 | rc->range >>= 1; |
151 | if (rc->code >= rc->range) { |
152 | rc->code -= rc->range; |
153 | return 1; |
154 | } |
155 | return 0; |
156 | } |
157 | |
158 | /* Called twice */ |
159 | static speed_inline void |
160 | rc_bit_tree_decode(rc_t *rc, uint16_t *p, int num_levels, int *symbol) |
161 | { |
162 | int i = num_levels; |
163 | |
164 | *symbol = 1; |
165 | while (i--) |
166 | rc_get_bit(rc, p + *symbol, symbol); |
167 | *symbol -= 1 << num_levels; |
168 | } |
169 | |
170 | |
171 | typedef struct { |
172 | uint8_t pos; |
173 | uint32_t dict_size; |
174 | uint64_t dst_size; |
175 | } __attribute__ ((packed)) lzma_header_t; |
176 | |
177 | |
178 | /* #defines will force compiler to compute/optimize each one with each usage. |
179 | * Have heart and use enum instead. */ |
180 | enum { |
181 | LZMA_BASE_SIZE = 1846, |
182 | LZMA_LIT_SIZE = 768, |
183 | |
184 | LZMA_NUM_POS_BITS_MAX = 4, |
185 | |
186 | LZMA_LEN_NUM_LOW_BITS = 3, |
187 | LZMA_LEN_NUM_MID_BITS = 3, |
188 | LZMA_LEN_NUM_HIGH_BITS = 8, |
189 | |
190 | LZMA_LEN_CHOICE = 0, |
191 | LZMA_LEN_CHOICE_2 = (LZMA_LEN_CHOICE + 1), |
192 | LZMA_LEN_LOW = (LZMA_LEN_CHOICE_2 + 1), |
193 | LZMA_LEN_MID = (LZMA_LEN_LOW \ |
194 | + (1 << (LZMA_NUM_POS_BITS_MAX + LZMA_LEN_NUM_LOW_BITS))), |
195 | LZMA_LEN_HIGH = (LZMA_LEN_MID \ |
196 | + (1 << (LZMA_NUM_POS_BITS_MAX + LZMA_LEN_NUM_MID_BITS))), |
197 | LZMA_NUM_LEN_PROBS = (LZMA_LEN_HIGH + (1 << LZMA_LEN_NUM_HIGH_BITS)), |
198 | |
199 | LZMA_NUM_STATES = 12, |
200 | LZMA_NUM_LIT_STATES = 7, |
201 | |
202 | LZMA_START_POS_MODEL_INDEX = 4, |
203 | LZMA_END_POS_MODEL_INDEX = 14, |
204 | LZMA_NUM_FULL_DISTANCES = (1 << (LZMA_END_POS_MODEL_INDEX >> 1)), |
205 | |
206 | LZMA_NUM_POS_SLOT_BITS = 6, |
207 | LZMA_NUM_LEN_TO_POS_STATES = 4, |
208 | |
209 | LZMA_NUM_ALIGN_BITS = 4, |
210 | |
211 | LZMA_MATCH_MIN_LEN = 2, |
212 | |
213 | LZMA_IS_MATCH = 0, |
214 | LZMA_IS_REP = (LZMA_IS_MATCH + (LZMA_NUM_STATES << LZMA_NUM_POS_BITS_MAX)), |
215 | LZMA_IS_REP_G0 = (LZMA_IS_REP + LZMA_NUM_STATES), |
216 | LZMA_IS_REP_G1 = (LZMA_IS_REP_G0 + LZMA_NUM_STATES), |
217 | LZMA_IS_REP_G2 = (LZMA_IS_REP_G1 + LZMA_NUM_STATES), |
218 | LZMA_IS_REP_0_LONG = (LZMA_IS_REP_G2 + LZMA_NUM_STATES), |
219 | LZMA_POS_SLOT = (LZMA_IS_REP_0_LONG \ |
220 | + (LZMA_NUM_STATES << LZMA_NUM_POS_BITS_MAX)), |
221 | LZMA_SPEC_POS = (LZMA_POS_SLOT \ |
222 | + (LZMA_NUM_LEN_TO_POS_STATES << LZMA_NUM_POS_SLOT_BITS)), |
223 | LZMA_ALIGN = (LZMA_SPEC_POS \ |
224 | + LZMA_NUM_FULL_DISTANCES - LZMA_END_POS_MODEL_INDEX), |
225 | LZMA_LEN_CODER = (LZMA_ALIGN + (1 << LZMA_NUM_ALIGN_BITS)), |
226 | LZMA_REP_LEN_CODER = (LZMA_LEN_CODER + LZMA_NUM_LEN_PROBS), |
227 | LZMA_LITERAL = (LZMA_REP_LEN_CODER + LZMA_NUM_LEN_PROBS), |
228 | }; |
229 | |
230 | |
231 | USE_DESKTOP(long long) int FAST_FUNC |
232 | unpack_lzma_stream(int src_fd, int dst_fd) |
233 | { |
234 | USE_DESKTOP(long long total_written = 0;) |
235 | lzma_header_t header; |
236 | int lc, pb, lp; |
237 | uint32_t pos_state_mask; |
238 | uint32_t literal_pos_mask; |
239 | uint32_t pos; |
240 | uint16_t *p; |
241 | uint16_t *prob; |
242 | uint16_t *prob_lit; |
243 | int num_bits; |
244 | int num_probs; |
245 | rc_t *rc; |
246 | int i, mi; |
247 | uint8_t *buffer; |
248 | uint8_t previous_byte = 0; |
249 | size_t buffer_pos = 0, global_pos = 0; |
250 | int len = 0; |
251 | int state = 0; |
252 | uint32_t rep0 = 1, rep1 = 1, rep2 = 1, rep3 = 1; |
253 | |
254 | xread(src_fd, &header, sizeof(header)); |
255 | |
256 | if (header.pos >= (9 * 5 * 5)) |
257 | bb_error_msg_and_die("bad header"); |
258 | mi = header.pos / 9; |
259 | lc = header.pos % 9; |
260 | pb = mi / 5; |
261 | lp = mi % 5; |
262 | pos_state_mask = (1 << pb) - 1; |
263 | literal_pos_mask = (1 << lp) - 1; |
264 | |
265 | header.dict_size = SWAP_LE32(header.dict_size); |
266 | header.dst_size = SWAP_LE64(header.dst_size); |
267 | |
268 | if (header.dict_size == 0) |
269 | header.dict_size = 1; |
270 | |
271 | buffer = xmalloc(MIN(header.dst_size, header.dict_size)); |
272 | |
273 | num_probs = LZMA_BASE_SIZE + (LZMA_LIT_SIZE << (lc + lp)); |
274 | p = xmalloc(num_probs * sizeof(*p)); |
275 | num_probs = LZMA_LITERAL + (LZMA_LIT_SIZE << (lc + lp)); |
276 | for (i = 0; i < num_probs; i++) |
277 | p[i] = (1 << RC_MODEL_TOTAL_BITS) >> 1; |
278 | |
279 | rc = rc_init(src_fd); /*, RC_BUFFER_SIZE); */ |
280 | |
281 | while (global_pos + buffer_pos < header.dst_size) { |
282 | int pos_state = (buffer_pos + global_pos) & pos_state_mask; |
283 | |
284 | prob = p + LZMA_IS_MATCH + (state << LZMA_NUM_POS_BITS_MAX) + pos_state; |
285 | if (rc_is_bit_0(rc, prob)) { |
286 | mi = 1; |
287 | rc_update_bit_0(rc, prob); |
288 | prob = (p + LZMA_LITERAL |
289 | + (LZMA_LIT_SIZE * ((((buffer_pos + global_pos) & literal_pos_mask) << lc) |
290 | + (previous_byte >> (8 - lc)) |
291 | ) |
292 | ) |
293 | ); |
294 | |
295 | if (state >= LZMA_NUM_LIT_STATES) { |
296 | int match_byte; |
297 | |
298 | pos = buffer_pos - rep0; |
299 | while (pos >= header.dict_size) |
300 | pos += header.dict_size; |
301 | match_byte = buffer[pos]; |
302 | do { |
303 | int bit; |
304 | |
305 | match_byte <<= 1; |
306 | bit = match_byte & 0x100; |
307 | prob_lit = prob + 0x100 + bit + mi; |
308 | bit ^= (rc_get_bit(rc, prob_lit, &mi) << 8); /* 0x100 or 0 */ |
309 | if (bit) |
310 | break; |
311 | } while (mi < 0x100); |
312 | } |
313 | while (mi < 0x100) { |
314 | prob_lit = prob + mi; |
315 | rc_get_bit(rc, prob_lit, &mi); |
316 | } |
317 | |
318 | state -= 3; |
319 | if (state < 4-3) |
320 | state = 0; |
321 | if (state >= 10-3) |
322 | state -= 6-3; |
323 | |
324 | previous_byte = (uint8_t) mi; |
325 | #if ENABLE_FEATURE_LZMA_FAST |
326 | one_byte1: |
327 | buffer[buffer_pos++] = previous_byte; |
328 | if (buffer_pos == header.dict_size) { |
329 | buffer_pos = 0; |
330 | global_pos += header.dict_size; |
331 | if (full_write(dst_fd, buffer, header.dict_size) != (ssize_t)header.dict_size) |
332 | goto bad; |
333 | USE_DESKTOP(total_written += header.dict_size;) |
334 | } |
335 | #else |
336 | len = 1; |
337 | goto one_byte2; |
338 | #endif |
339 | } else { |
340 | int offset; |
341 | uint16_t *prob_len; |
342 | |
343 | rc_update_bit_1(rc, prob); |
344 | prob = p + LZMA_IS_REP + state; |
345 | if (rc_is_bit_0(rc, prob)) { |
346 | rc_update_bit_0(rc, prob); |
347 | rep3 = rep2; |
348 | rep2 = rep1; |
349 | rep1 = rep0; |
350 | state = state < LZMA_NUM_LIT_STATES ? 0 : 3; |
351 | prob = p + LZMA_LEN_CODER; |
352 | } else { |
353 | rc_update_bit_1(rc, prob); |
354 | prob = p + LZMA_IS_REP_G0 + state; |
355 | if (rc_is_bit_0(rc, prob)) { |
356 | rc_update_bit_0(rc, prob); |
357 | prob = (p + LZMA_IS_REP_0_LONG |
358 | + (state << LZMA_NUM_POS_BITS_MAX) |
359 | + pos_state |
360 | ); |
361 | if (rc_is_bit_0(rc, prob)) { |
362 | rc_update_bit_0(rc, prob); |
363 | |
364 | state = state < LZMA_NUM_LIT_STATES ? 9 : 11; |
365 | #if ENABLE_FEATURE_LZMA_FAST |
366 | pos = buffer_pos - rep0; |
367 | while (pos >= header.dict_size) |
368 | pos += header.dict_size; |
369 | previous_byte = buffer[pos]; |
370 | goto one_byte1; |
371 | #else |
372 | len = 1; |
373 | goto string; |
374 | #endif |
375 | } else { |
376 | rc_update_bit_1(rc, prob); |
377 | } |
378 | } else { |
379 | uint32_t distance; |
380 | |
381 | rc_update_bit_1(rc, prob); |
382 | prob = p + LZMA_IS_REP_G1 + state; |
383 | if (rc_is_bit_0(rc, prob)) { |
384 | rc_update_bit_0(rc, prob); |
385 | distance = rep1; |
386 | } else { |
387 | rc_update_bit_1(rc, prob); |
388 | prob = p + LZMA_IS_REP_G2 + state; |
389 | if (rc_is_bit_0(rc, prob)) { |
390 | rc_update_bit_0(rc, prob); |
391 | distance = rep2; |
392 | } else { |
393 | rc_update_bit_1(rc, prob); |
394 | distance = rep3; |
395 | rep3 = rep2; |
396 | } |
397 | rep2 = rep1; |
398 | } |
399 | rep1 = rep0; |
400 | rep0 = distance; |
401 | } |
402 | state = state < LZMA_NUM_LIT_STATES ? 8 : 11; |
403 | prob = p + LZMA_REP_LEN_CODER; |
404 | } |
405 | |
406 | prob_len = prob + LZMA_LEN_CHOICE; |
407 | if (rc_is_bit_0(rc, prob_len)) { |
408 | rc_update_bit_0(rc, prob_len); |
409 | prob_len = (prob + LZMA_LEN_LOW |
410 | + (pos_state << LZMA_LEN_NUM_LOW_BITS)); |
411 | offset = 0; |
412 | num_bits = LZMA_LEN_NUM_LOW_BITS; |
413 | } else { |
414 | rc_update_bit_1(rc, prob_len); |
415 | prob_len = prob + LZMA_LEN_CHOICE_2; |
416 | if (rc_is_bit_0(rc, prob_len)) { |
417 | rc_update_bit_0(rc, prob_len); |
418 | prob_len = (prob + LZMA_LEN_MID |
419 | + (pos_state << LZMA_LEN_NUM_MID_BITS)); |
420 | offset = 1 << LZMA_LEN_NUM_LOW_BITS; |
421 | num_bits = LZMA_LEN_NUM_MID_BITS; |
422 | } else { |
423 | rc_update_bit_1(rc, prob_len); |
424 | prob_len = prob + LZMA_LEN_HIGH; |
425 | offset = ((1 << LZMA_LEN_NUM_LOW_BITS) |
426 | + (1 << LZMA_LEN_NUM_MID_BITS)); |
427 | num_bits = LZMA_LEN_NUM_HIGH_BITS; |
428 | } |
429 | } |
430 | rc_bit_tree_decode(rc, prob_len, num_bits, &len); |
431 | len += offset; |
432 | |
433 | if (state < 4) { |
434 | int pos_slot; |
435 | |
436 | state += LZMA_NUM_LIT_STATES; |
437 | prob = p + LZMA_POS_SLOT + |
438 | ((len < LZMA_NUM_LEN_TO_POS_STATES ? len : |
439 | LZMA_NUM_LEN_TO_POS_STATES - 1) |
440 | << LZMA_NUM_POS_SLOT_BITS); |
441 | rc_bit_tree_decode(rc, prob, LZMA_NUM_POS_SLOT_BITS, |
442 | &pos_slot); |
443 | if (pos_slot >= LZMA_START_POS_MODEL_INDEX) { |
444 | num_bits = (pos_slot >> 1) - 1; |
445 | rep0 = 2 | (pos_slot & 1); |
446 | if (pos_slot < LZMA_END_POS_MODEL_INDEX) { |
447 | rep0 <<= num_bits; |
448 | prob = p + LZMA_SPEC_POS + rep0 - pos_slot - 1; |
449 | } else { |
450 | num_bits -= LZMA_NUM_ALIGN_BITS; |
451 | while (num_bits--) |
452 | rep0 = (rep0 << 1) | rc_direct_bit(rc); |
453 | prob = p + LZMA_ALIGN; |
454 | rep0 <<= LZMA_NUM_ALIGN_BITS; |
455 | num_bits = LZMA_NUM_ALIGN_BITS; |
456 | } |
457 | i = 1; |
458 | mi = 1; |
459 | while (num_bits--) { |
460 | if (rc_get_bit(rc, prob + mi, &mi)) |
461 | rep0 |= i; |
462 | i <<= 1; |
463 | } |
464 | } else |
465 | rep0 = pos_slot; |
466 | if (++rep0 == 0) |
467 | break; |
468 | } |
469 | |
470 | len += LZMA_MATCH_MIN_LEN; |
471 | SKIP_FEATURE_LZMA_FAST(string:) |
472 | do { |
473 | pos = buffer_pos - rep0; |
474 | while (pos >= header.dict_size) |
475 | pos += header.dict_size; |
476 | previous_byte = buffer[pos]; |
477 | SKIP_FEATURE_LZMA_FAST(one_byte2:) |
478 | buffer[buffer_pos++] = previous_byte; |
479 | if (buffer_pos == header.dict_size) { |
480 | buffer_pos = 0; |
481 | global_pos += header.dict_size; |
482 | if (full_write(dst_fd, buffer, header.dict_size) != (ssize_t)header.dict_size) |
483 | goto bad; |
484 | USE_DESKTOP(total_written += header.dict_size;) |
485 | } |
486 | len--; |
487 | } while (len != 0 && buffer_pos < header.dst_size); |
488 | } |
489 | } |
490 | |
491 | { |
492 | SKIP_DESKTOP(int total_written = 0; /* success */) |
493 | USE_DESKTOP(total_written += buffer_pos;) |
494 | if (full_write(dst_fd, buffer, buffer_pos) != (ssize_t)buffer_pos) { |
495 | bad: |
496 | total_written = -1; /* failure */ |
497 | } |
498 | rc_free(rc); |
499 | free(p); |
500 | free(buffer); |
501 | return total_written; |
502 | } |
503 | } |