Contents of /trunk/mkinitrd-magellan/busybox/archival/libunarchive/unxz/xz_dec_bcj.c
Parent Directory | Revision Log
Revision 1123 -
(show annotations)
(download)
Wed Aug 18 21:56:57 2010 UTC (14 years, 1 month ago) by niro
File MIME type: text/plain
File size: 13560 byte(s)
Wed Aug 18 21:56:57 2010 UTC (14 years, 1 month ago) by niro
File MIME type: text/plain
File size: 13560 byte(s)
-updated to busybox-1.17.1
1 | /* |
2 | * Branch/Call/Jump (BCJ) filter decoders |
3 | * |
4 | * Authors: Lasse Collin <lasse.collin@tukaani.org> |
5 | * Igor Pavlov <http://7-zip.org/> |
6 | * |
7 | * This file has been put into the public domain. |
8 | * You can do whatever you want with this file. |
9 | */ |
10 | |
11 | #include "xz_private.h" |
12 | |
13 | /* |
14 | * The rest of the file is inside this ifdef. It makes things a little more |
15 | * convenient when building without support for any BCJ filters. |
16 | */ |
17 | #ifdef XZ_DEC_BCJ |
18 | |
19 | struct xz_dec_bcj { |
20 | /* Type of the BCJ filter being used */ |
21 | enum { |
22 | BCJ_X86 = 4, /* x86 or x86-64 */ |
23 | BCJ_POWERPC = 5, /* Big endian only */ |
24 | BCJ_IA64 = 6, /* Big or little endian */ |
25 | BCJ_ARM = 7, /* Little endian only */ |
26 | BCJ_ARMTHUMB = 8, /* Little endian only */ |
27 | BCJ_SPARC = 9 /* Big or little endian */ |
28 | } type; |
29 | |
30 | /* |
31 | * Return value of the next filter in the chain. We need to preserve |
32 | * this information across calls, because we must not call the next |
33 | * filter anymore once it has returned XZ_STREAM_END. |
34 | */ |
35 | enum xz_ret ret; |
36 | |
37 | /* True if we are operating in single-call mode. */ |
38 | bool single_call; |
39 | |
40 | /* |
41 | * Absolute position relative to the beginning of the uncompressed |
42 | * data (in a single .xz Block). We care only about the lowest 32 |
43 | * bits so this doesn't need to be uint64_t even with big files. |
44 | */ |
45 | uint32_t pos; |
46 | |
47 | /* x86 filter state */ |
48 | uint32_t x86_prev_mask; |
49 | |
50 | /* Temporary space to hold the variables from struct xz_buf */ |
51 | uint8_t *out; |
52 | size_t out_pos; |
53 | size_t out_size; |
54 | |
55 | struct { |
56 | /* Amount of already filtered data in the beginning of buf */ |
57 | size_t filtered; |
58 | |
59 | /* Total amount of data currently stored in buf */ |
60 | size_t size; |
61 | |
62 | /* |
63 | * Buffer to hold a mix of filtered and unfiltered data. This |
64 | * needs to be big enough to hold Alignment + 2 * Look-ahead: |
65 | * |
66 | * Type Alignment Look-ahead |
67 | * x86 1 4 |
68 | * PowerPC 4 0 |
69 | * IA-64 16 0 |
70 | * ARM 4 0 |
71 | * ARM-Thumb 2 2 |
72 | * SPARC 4 0 |
73 | */ |
74 | uint8_t buf[16]; |
75 | } temp; |
76 | }; |
77 | |
78 | #ifdef XZ_DEC_X86 |
79 | /* |
80 | * This is macro used to test the most significant byte of a memory address |
81 | * in an x86 instruction. |
82 | */ |
83 | #define bcj_x86_test_msbyte(b) ((b) == 0x00 || (b) == 0xFF) |
84 | |
85 | static noinline_for_stack size_t XZ_FUNC bcj_x86( |
86 | struct xz_dec_bcj *s, uint8_t *buf, size_t size) |
87 | { |
88 | static const bool mask_to_allowed_status[8] |
89 | = { true, true, true, false, true, false, false, false }; |
90 | |
91 | static const uint8_t mask_to_bit_num[8] = { 0, 1, 2, 2, 3, 3, 3, 3 }; |
92 | |
93 | size_t i; |
94 | size_t prev_pos = (size_t)-1; |
95 | uint32_t prev_mask = s->x86_prev_mask; |
96 | uint32_t src; |
97 | uint32_t dest; |
98 | uint32_t j; |
99 | uint8_t b; |
100 | |
101 | if (size <= 4) |
102 | return 0; |
103 | |
104 | size -= 4; |
105 | for (i = 0; i < size; ++i) { |
106 | if ((buf[i] & 0xFE) != 0xE8) |
107 | continue; |
108 | |
109 | prev_pos = i - prev_pos; |
110 | if (prev_pos > 3) { |
111 | prev_mask = 0; |
112 | } else { |
113 | prev_mask = (prev_mask << (prev_pos - 1)) & 7; |
114 | if (prev_mask != 0) { |
115 | b = buf[i + 4 - mask_to_bit_num[prev_mask]]; |
116 | if (!mask_to_allowed_status[prev_mask] |
117 | || bcj_x86_test_msbyte(b)) { |
118 | prev_pos = i; |
119 | prev_mask = (prev_mask << 1) | 1; |
120 | continue; |
121 | } |
122 | } |
123 | } |
124 | |
125 | prev_pos = i; |
126 | |
127 | if (bcj_x86_test_msbyte(buf[i + 4])) { |
128 | src = get_unaligned_le32(buf + i + 1); |
129 | while (true) { |
130 | dest = src - (s->pos + (uint32_t)i + 5); |
131 | if (prev_mask == 0) |
132 | break; |
133 | |
134 | j = mask_to_bit_num[prev_mask] * 8; |
135 | b = (uint8_t)(dest >> (24 - j)); |
136 | if (!bcj_x86_test_msbyte(b)) |
137 | break; |
138 | |
139 | src = dest ^ (((uint32_t)1 << (32 - j)) - 1); |
140 | } |
141 | |
142 | dest &= 0x01FFFFFF; |
143 | dest |= (uint32_t)0 - (dest & 0x01000000); |
144 | put_unaligned_le32(dest, buf + i + 1); |
145 | i += 4; |
146 | } else { |
147 | prev_mask = (prev_mask << 1) | 1; |
148 | } |
149 | } |
150 | |
151 | prev_pos = i - prev_pos; |
152 | s->x86_prev_mask = prev_pos > 3 ? 0 : prev_mask << (prev_pos - 1); |
153 | return i; |
154 | } |
155 | #endif |
156 | |
157 | #ifdef XZ_DEC_POWERPC |
158 | static noinline_for_stack size_t XZ_FUNC bcj_powerpc( |
159 | struct xz_dec_bcj *s, uint8_t *buf, size_t size) |
160 | { |
161 | size_t i; |
162 | uint32_t instr; |
163 | |
164 | for (i = 0; i + 4 <= size; i += 4) { |
165 | instr = get_unaligned_be32(buf + i); |
166 | if ((instr & 0xFC000003) == 0x48000001) { |
167 | instr &= 0x03FFFFFC; |
168 | instr -= s->pos + (uint32_t)i; |
169 | instr &= 0x03FFFFFC; |
170 | instr |= 0x48000001; |
171 | put_unaligned_be32(instr, buf + i); |
172 | } |
173 | } |
174 | |
175 | return i; |
176 | } |
177 | #endif |
178 | |
179 | #ifdef XZ_DEC_IA64 |
180 | static noinline_for_stack size_t XZ_FUNC bcj_ia64( |
181 | struct xz_dec_bcj *s, uint8_t *buf, size_t size) |
182 | { |
183 | static const uint8_t branch_table[32] = { |
184 | 0, 0, 0, 0, 0, 0, 0, 0, |
185 | 0, 0, 0, 0, 0, 0, 0, 0, |
186 | 4, 4, 6, 6, 0, 0, 7, 7, |
187 | 4, 4, 0, 0, 4, 4, 0, 0 |
188 | }; |
189 | |
190 | /* |
191 | * The local variables take a little bit stack space, but it's less |
192 | * than what LZMA2 decoder takes, so it doesn't make sense to reduce |
193 | * stack usage here without doing that for the LZMA2 decoder too. |
194 | */ |
195 | |
196 | /* Loop counters */ |
197 | size_t i; |
198 | size_t j; |
199 | |
200 | /* Instruction slot (0, 1, or 2) in the 128-bit instruction word */ |
201 | uint32_t slot; |
202 | |
203 | /* Bitwise offset of the instruction indicated by slot */ |
204 | uint32_t bit_pos; |
205 | |
206 | /* bit_pos split into byte and bit parts */ |
207 | uint32_t byte_pos; |
208 | uint32_t bit_res; |
209 | |
210 | /* Address part of an instruction */ |
211 | uint32_t addr; |
212 | |
213 | /* Mask used to detect which instructions to convert */ |
214 | uint32_t mask; |
215 | |
216 | /* 41-bit instruction stored somewhere in the lowest 48 bits */ |
217 | uint64_t instr; |
218 | |
219 | /* Instruction normalized with bit_res for easier manipulation */ |
220 | uint64_t norm; |
221 | |
222 | for (i = 0; i + 16 <= size; i += 16) { |
223 | mask = branch_table[buf[i] & 0x1F]; |
224 | for (slot = 0, bit_pos = 5; slot < 3; ++slot, bit_pos += 41) { |
225 | if (((mask >> slot) & 1) == 0) |
226 | continue; |
227 | |
228 | byte_pos = bit_pos >> 3; |
229 | bit_res = bit_pos & 7; |
230 | instr = 0; |
231 | for (j = 0; j < 6; ++j) |
232 | instr |= (uint64_t)(buf[i + j + byte_pos]) |
233 | << (8 * j); |
234 | |
235 | norm = instr >> bit_res; |
236 | |
237 | if (((norm >> 37) & 0x0F) == 0x05 |
238 | && ((norm >> 9) & 0x07) == 0) { |
239 | addr = (norm >> 13) & 0x0FFFFF; |
240 | addr |= ((uint32_t)(norm >> 36) & 1) << 20; |
241 | addr <<= 4; |
242 | addr -= s->pos + (uint32_t)i; |
243 | addr >>= 4; |
244 | |
245 | norm &= ~((uint64_t)0x8FFFFF << 13); |
246 | norm |= (uint64_t)(addr & 0x0FFFFF) << 13; |
247 | norm |= (uint64_t)(addr & 0x100000) |
248 | << (36 - 20); |
249 | |
250 | instr &= (1 << bit_res) - 1; |
251 | instr |= norm << bit_res; |
252 | |
253 | for (j = 0; j < 6; j++) |
254 | buf[i + j + byte_pos] |
255 | = (uint8_t)(instr >> (8 * j)); |
256 | } |
257 | } |
258 | } |
259 | |
260 | return i; |
261 | } |
262 | #endif |
263 | |
264 | #ifdef XZ_DEC_ARM |
265 | static noinline_for_stack size_t XZ_FUNC bcj_arm( |
266 | struct xz_dec_bcj *s, uint8_t *buf, size_t size) |
267 | { |
268 | size_t i; |
269 | uint32_t addr; |
270 | |
271 | for (i = 0; i + 4 <= size; i += 4) { |
272 | if (buf[i + 3] == 0xEB) { |
273 | addr = (uint32_t)buf[i] | ((uint32_t)buf[i + 1] << 8) |
274 | | ((uint32_t)buf[i + 2] << 16); |
275 | addr <<= 2; |
276 | addr -= s->pos + (uint32_t)i + 8; |
277 | addr >>= 2; |
278 | buf[i] = (uint8_t)addr; |
279 | buf[i + 1] = (uint8_t)(addr >> 8); |
280 | buf[i + 2] = (uint8_t)(addr >> 16); |
281 | } |
282 | } |
283 | |
284 | return i; |
285 | } |
286 | #endif |
287 | |
288 | #ifdef XZ_DEC_ARMTHUMB |
289 | static noinline_for_stack size_t XZ_FUNC bcj_armthumb( |
290 | struct xz_dec_bcj *s, uint8_t *buf, size_t size) |
291 | { |
292 | size_t i; |
293 | uint32_t addr; |
294 | |
295 | for (i = 0; i + 4 <= size; i += 2) { |
296 | if ((buf[i + 1] & 0xF8) == 0xF0 |
297 | && (buf[i + 3] & 0xF8) == 0xF8) { |
298 | addr = (((uint32_t)buf[i + 1] & 0x07) << 19) |
299 | | ((uint32_t)buf[i] << 11) |
300 | | (((uint32_t)buf[i + 3] & 0x07) << 8) |
301 | | (uint32_t)buf[i + 2]; |
302 | addr <<= 1; |
303 | addr -= s->pos + (uint32_t)i + 4; |
304 | addr >>= 1; |
305 | buf[i + 1] = (uint8_t)(0xF0 | ((addr >> 19) & 0x07)); |
306 | buf[i] = (uint8_t)(addr >> 11); |
307 | buf[i + 3] = (uint8_t)(0xF8 | ((addr >> 8) & 0x07)); |
308 | buf[i + 2] = (uint8_t)addr; |
309 | i += 2; |
310 | } |
311 | } |
312 | |
313 | return i; |
314 | } |
315 | #endif |
316 | |
317 | #ifdef XZ_DEC_SPARC |
318 | static noinline_for_stack size_t XZ_FUNC bcj_sparc( |
319 | struct xz_dec_bcj *s, uint8_t *buf, size_t size) |
320 | { |
321 | size_t i; |
322 | uint32_t instr; |
323 | |
324 | for (i = 0; i + 4 <= size; i += 4) { |
325 | instr = get_unaligned_be32(buf + i); |
326 | if ((instr >> 22) == 0x100 || (instr >> 22) == 0x1FF) { |
327 | instr <<= 2; |
328 | instr -= s->pos + (uint32_t)i; |
329 | instr >>= 2; |
330 | instr = ((uint32_t)0x40000000 - (instr & 0x400000)) |
331 | | 0x40000000 | (instr & 0x3FFFFF); |
332 | put_unaligned_be32(instr, buf + i); |
333 | } |
334 | } |
335 | |
336 | return i; |
337 | } |
338 | #endif |
339 | |
340 | /* |
341 | * Apply the selected BCJ filter. Update *pos and s->pos to match the amount |
342 | * of data that got filtered. |
343 | * |
344 | * NOTE: This is implemented as a switch statement to avoid using function |
345 | * pointers, which could be problematic in the kernel boot code, which must |
346 | * avoid pointers to static data (at least on x86). |
347 | */ |
348 | static void XZ_FUNC bcj_apply(struct xz_dec_bcj *s, |
349 | uint8_t *buf, size_t *pos, size_t size) |
350 | { |
351 | size_t filtered; |
352 | |
353 | buf += *pos; |
354 | size -= *pos; |
355 | |
356 | switch (s->type) { |
357 | #ifdef XZ_DEC_X86 |
358 | case BCJ_X86: |
359 | filtered = bcj_x86(s, buf, size); |
360 | break; |
361 | #endif |
362 | #ifdef XZ_DEC_POWERPC |
363 | case BCJ_POWERPC: |
364 | filtered = bcj_powerpc(s, buf, size); |
365 | break; |
366 | #endif |
367 | #ifdef XZ_DEC_IA64 |
368 | case BCJ_IA64: |
369 | filtered = bcj_ia64(s, buf, size); |
370 | break; |
371 | #endif |
372 | #ifdef XZ_DEC_ARM |
373 | case BCJ_ARM: |
374 | filtered = bcj_arm(s, buf, size); |
375 | break; |
376 | #endif |
377 | #ifdef XZ_DEC_ARMTHUMB |
378 | case BCJ_ARMTHUMB: |
379 | filtered = bcj_armthumb(s, buf, size); |
380 | break; |
381 | #endif |
382 | #ifdef XZ_DEC_SPARC |
383 | case BCJ_SPARC: |
384 | filtered = bcj_sparc(s, buf, size); |
385 | break; |
386 | #endif |
387 | default: |
388 | /* Never reached but silence compiler warnings. */ |
389 | filtered = 0; |
390 | break; |
391 | } |
392 | |
393 | *pos += filtered; |
394 | s->pos += filtered; |
395 | } |
396 | |
397 | /* |
398 | * Flush pending filtered data from temp to the output buffer. |
399 | * Move the remaining mixture of possibly filtered and unfiltered |
400 | * data to the beginning of temp. |
401 | */ |
402 | static void XZ_FUNC bcj_flush(struct xz_dec_bcj *s, struct xz_buf *b) |
403 | { |
404 | size_t copy_size; |
405 | |
406 | copy_size = min_t(size_t, s->temp.filtered, b->out_size - b->out_pos); |
407 | memcpy(b->out + b->out_pos, s->temp.buf, copy_size); |
408 | b->out_pos += copy_size; |
409 | |
410 | s->temp.filtered -= copy_size; |
411 | s->temp.size -= copy_size; |
412 | memmove(s->temp.buf, s->temp.buf + copy_size, s->temp.size); |
413 | } |
414 | |
415 | /* |
416 | * The BCJ filter functions are primitive in sense that they process the |
417 | * data in chunks of 1-16 bytes. To hide this issue, this function does |
418 | * some buffering. |
419 | */ |
420 | XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_bcj_run(struct xz_dec_bcj *s, |
421 | struct xz_dec_lzma2 *lzma2, struct xz_buf *b) |
422 | { |
423 | size_t out_start; |
424 | |
425 | /* |
426 | * Flush pending already filtered data to the output buffer. Return |
427 | * immediatelly if we couldn't flush everything, or if the next |
428 | * filter in the chain had already returned XZ_STREAM_END. |
429 | */ |
430 | if (s->temp.filtered > 0) { |
431 | bcj_flush(s, b); |
432 | if (s->temp.filtered > 0) |
433 | return XZ_OK; |
434 | |
435 | if (s->ret == XZ_STREAM_END) |
436 | return XZ_STREAM_END; |
437 | } |
438 | |
439 | /* |
440 | * If we have more output space than what is currently pending in |
441 | * temp, copy the unfiltered data from temp to the output buffer |
442 | * and try to fill the output buffer by decoding more data from the |
443 | * next filter in the chain. Apply the BCJ filter on the new data |
444 | * in the output buffer. If everything cannot be filtered, copy it |
445 | * to temp and rewind the output buffer position accordingly. |
446 | */ |
447 | if (s->temp.size < b->out_size - b->out_pos) { |
448 | out_start = b->out_pos; |
449 | memcpy(b->out + b->out_pos, s->temp.buf, s->temp.size); |
450 | b->out_pos += s->temp.size; |
451 | |
452 | s->ret = xz_dec_lzma2_run(lzma2, b); |
453 | if (s->ret != XZ_STREAM_END |
454 | && (s->ret != XZ_OK || s->single_call)) |
455 | return s->ret; |
456 | |
457 | bcj_apply(s, b->out, &out_start, b->out_pos); |
458 | |
459 | /* |
460 | * As an exception, if the next filter returned XZ_STREAM_END, |
461 | * we can do that too, since the last few bytes that remain |
462 | * unfiltered are meant to remain unfiltered. |
463 | */ |
464 | if (s->ret == XZ_STREAM_END) |
465 | return XZ_STREAM_END; |
466 | |
467 | s->temp.size = b->out_pos - out_start; |
468 | b->out_pos -= s->temp.size; |
469 | memcpy(s->temp.buf, b->out + b->out_pos, s->temp.size); |
470 | } |
471 | |
472 | /* |
473 | * If we have unfiltered data in temp, try to fill by decoding more |
474 | * data from the next filter. Apply the BCJ filter on temp. Then we |
475 | * hopefully can fill the actual output buffer by copying filtered |
476 | * data from temp. A mix of filtered and unfiltered data may be left |
477 | * in temp; it will be taken care on the next call to this function. |
478 | */ |
479 | if (s->temp.size > 0) { |
480 | /* Make b->out{,_pos,_size} temporarily point to s->temp. */ |
481 | s->out = b->out; |
482 | s->out_pos = b->out_pos; |
483 | s->out_size = b->out_size; |
484 | b->out = s->temp.buf; |
485 | b->out_pos = s->temp.size; |
486 | b->out_size = sizeof(s->temp.buf); |
487 | |
488 | s->ret = xz_dec_lzma2_run(lzma2, b); |
489 | |
490 | s->temp.size = b->out_pos; |
491 | b->out = s->out; |
492 | b->out_pos = s->out_pos; |
493 | b->out_size = s->out_size; |
494 | |
495 | if (s->ret != XZ_OK && s->ret != XZ_STREAM_END) |
496 | return s->ret; |
497 | |
498 | bcj_apply(s, s->temp.buf, &s->temp.filtered, s->temp.size); |
499 | |
500 | /* |
501 | * If the next filter returned XZ_STREAM_END, we mark that |
502 | * everything is filtered, since the last unfiltered bytes |
503 | * of the stream are meant to be left as is. |
504 | */ |
505 | if (s->ret == XZ_STREAM_END) |
506 | s->temp.filtered = s->temp.size; |
507 | |
508 | bcj_flush(s, b); |
509 | if (s->temp.filtered > 0) |
510 | return XZ_OK; |
511 | } |
512 | |
513 | return s->ret; |
514 | } |
515 | |
516 | XZ_EXTERN struct xz_dec_bcj * XZ_FUNC xz_dec_bcj_create(bool single_call) |
517 | { |
518 | struct xz_dec_bcj *s = kmalloc(sizeof(*s), GFP_KERNEL); |
519 | if (s != NULL) |
520 | s->single_call = single_call; |
521 | |
522 | return s; |
523 | } |
524 | |
525 | XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_bcj_reset( |
526 | struct xz_dec_bcj *s, uint8_t id) |
527 | { |
528 | switch (id) { |
529 | #ifdef XZ_DEC_X86 |
530 | case BCJ_X86: |
531 | #endif |
532 | #ifdef XZ_DEC_POWERPC |
533 | case BCJ_POWERPC: |
534 | #endif |
535 | #ifdef XZ_DEC_IA64 |
536 | case BCJ_IA64: |
537 | #endif |
538 | #ifdef XZ_DEC_ARM |
539 | case BCJ_ARM: |
540 | #endif |
541 | #ifdef XZ_DEC_ARMTHUMB |
542 | case BCJ_ARMTHUMB: |
543 | #endif |
544 | #ifdef XZ_DEC_SPARC |
545 | case BCJ_SPARC: |
546 | #endif |
547 | break; |
548 | |
549 | default: |
550 | /* Unsupported Filter ID */ |
551 | return XZ_OPTIONS_ERROR; |
552 | } |
553 | |
554 | s->type = id; |
555 | s->ret = XZ_OK; |
556 | s->pos = 0; |
557 | s->x86_prev_mask = 0; |
558 | s->temp.filtered = 0; |
559 | s->temp.size = 0; |
560 | |
561 | return XZ_OK; |
562 | } |
563 | |
564 | #endif |