--- trunk/mkinitrd-magellan/busybox/archival/libunarchive/decompress_unlzma.c 2009/04/24 18:32:46 815 +++ trunk/mkinitrd-magellan/busybox/archival/libunarchive/decompress_unlzma.c 2009/04/24 18:33:46 816 @@ -12,8 +12,8 @@ #include "libbb.h" #include "unarchive.h" -#ifdef CONFIG_FEATURE_LZMA_FAST -# define speed_inline ATTRIBUTE_ALWAYS_INLINE +#if ENABLE_FEATURE_LZMA_FAST +# define speed_inline ALWAYS_INLINE #else # define speed_inline #endif @@ -45,7 +45,7 @@ /* Called twice: once at startup and once in rc_normalize() */ -static void rc_read(rc_t * rc) +static void rc_read(rc_t *rc) { int buffer_size = safe_read(rc->fd, RC_BUFFER, RC_BUFFER_SIZE); if (buffer_size <= 0) @@ -58,9 +58,9 @@ static rc_t* rc_init(int fd) /*, int buffer_size) */ { int i; - rc_t* rc; + rc_t *rc; - rc = xmalloc(sizeof(rc_t) + RC_BUFFER_SIZE); + rc = xmalloc(sizeof(*rc) + RC_BUFFER_SIZE); rc->fd = fd; /* rc->buffer_size = buffer_size; */ @@ -78,50 +78,51 @@ } /* Called once */ -static ATTRIBUTE_ALWAYS_INLINE void rc_free(rc_t * rc) +static ALWAYS_INLINE void rc_free(rc_t *rc) { - if (ENABLE_FEATURE_CLEAN_UP) - free(rc); + free(rc); } /* Called twice, but one callsite is in speed_inline'd rc_is_bit_0_helper() */ -static void rc_do_normalize(rc_t * rc) +static void rc_do_normalize(rc_t *rc) { if (rc->ptr >= rc->buffer_end) rc_read(rc); rc->range <<= 8; rc->code = (rc->code << 8) | *rc->ptr++; } -static ATTRIBUTE_ALWAYS_INLINE void rc_normalize(rc_t * rc) +static ALWAYS_INLINE void rc_normalize(rc_t *rc) { if (rc->range < (1 << RC_TOP_BITS)) { rc_do_normalize(rc); } } -/* Called 9 times */ +/* rc_is_bit_0 is called 9 times */ /* Why rc_is_bit_0_helper exists? - * Because we want to always expose (rc->code < rc->bound) to optimizer + * Because we want to always expose (rc->code < rc->bound) to optimizer. + * Thus rc_is_bit_0 is always inlined, and rc_is_bit_0_helper is inlined + * only if we compile for speed. */ -static speed_inline uint32_t rc_is_bit_0_helper(rc_t * rc, uint16_t * p) +static speed_inline uint32_t rc_is_bit_0_helper(rc_t *rc, uint16_t *p) { rc_normalize(rc); rc->bound = *p * (rc->range >> RC_MODEL_TOTAL_BITS); return rc->bound; } -static ATTRIBUTE_ALWAYS_INLINE int rc_is_bit_0(rc_t * rc, uint16_t * p) +static ALWAYS_INLINE int rc_is_bit_0(rc_t *rc, uint16_t *p) { uint32_t t = rc_is_bit_0_helper(rc, p); return rc->code < t; } /* Called ~10 times, but very small, thus inlined */ -static speed_inline void rc_update_bit_0(rc_t * rc, uint16_t * p) +static speed_inline void rc_update_bit_0(rc_t *rc, uint16_t *p) { rc->range = rc->bound; *p += ((1 << RC_MODEL_TOTAL_BITS) - *p) >> RC_MOVE_BITS; } -static speed_inline void rc_update_bit_1(rc_t * rc, uint16_t * p) +static speed_inline void rc_update_bit_1(rc_t *rc, uint16_t *p) { rc->range -= rc->bound; rc->code -= rc->bound; @@ -129,7 +130,7 @@ } /* Called 4 times in unlzma loop */ -static int rc_get_bit(rc_t * rc, uint16_t * p, int *symbol) +static int rc_get_bit(rc_t *rc, uint16_t *p, int *symbol) { if (rc_is_bit_0(rc, p)) { rc_update_bit_0(rc, p); @@ -143,7 +144,7 @@ } /* Called once */ -static ATTRIBUTE_ALWAYS_INLINE int rc_direct_bit(rc_t * rc) +static ALWAYS_INLINE int rc_direct_bit(rc_t *rc) { rc_normalize(rc); rc->range >>= 1; @@ -156,7 +157,7 @@ /* Called twice */ static speed_inline void -rc_bit_tree_decode(rc_t * rc, uint16_t * p, int num_levels, int *symbol) +rc_bit_tree_decode(rc_t *rc, uint16_t *p, int num_levels, int *symbol) { int i = num_levels; @@ -227,8 +228,8 @@ }; -USE_DESKTOP(long long) int -unlzma(int src_fd, int dst_fd) +USE_DESKTOP(long long) int FAST_FUNC +unpack_lzma_stream(int src_fd, int dst_fd) { USE_DESKTOP(long long total_written = 0;) lzma_header_t header; @@ -280,14 +281,16 @@ while (global_pos + buffer_pos < header.dst_size) { int pos_state = (buffer_pos + global_pos) & pos_state_mask; - prob = - p + LZMA_IS_MATCH + (state << LZMA_NUM_POS_BITS_MAX) + pos_state; + prob = p + LZMA_IS_MATCH + (state << LZMA_NUM_POS_BITS_MAX) + pos_state; if (rc_is_bit_0(rc, prob)) { mi = 1; rc_update_bit_0(rc, prob); - prob = (p + LZMA_LITERAL + (LZMA_LIT_SIZE - * ((((buffer_pos + global_pos) & literal_pos_mask) << lc) - + (previous_byte >> (8 - lc))))); + prob = (p + LZMA_LITERAL + + (LZMA_LIT_SIZE * ((((buffer_pos + global_pos) & literal_pos_mask) << lc) + + (previous_byte >> (8 - lc)) + ) + ) + ); if (state >= LZMA_NUM_LIT_STATES) { int match_byte; @@ -302,35 +305,37 @@ match_byte <<= 1; bit = match_byte & 0x100; prob_lit = prob + 0x100 + bit + mi; - if (rc_get_bit(rc, prob_lit, &mi)) { - if (!bit) - break; - } else { - if (bit) - break; - } + bit ^= (rc_get_bit(rc, prob_lit, &mi) << 8); /* 0x100 or 0 */ + if (bit) + break; } while (mi < 0x100); } while (mi < 0x100) { prob_lit = prob + mi; rc_get_bit(rc, prob_lit, &mi); } - previous_byte = (uint8_t) mi; + state -= 3; + if (state < 4-3) + state = 0; + if (state >= 10-3) + state -= 6-3; + + previous_byte = (uint8_t) mi; +#if ENABLE_FEATURE_LZMA_FAST + one_byte1: buffer[buffer_pos++] = previous_byte; if (buffer_pos == header.dict_size) { buffer_pos = 0; global_pos += header.dict_size; - if (full_write(dst_fd, buffer, header.dict_size) != header.dict_size) + if (full_write(dst_fd, buffer, header.dict_size) != (ssize_t)header.dict_size) goto bad; USE_DESKTOP(total_written += header.dict_size;) } - if (state < 4) - state = 0; - else if (state < 10) - state -= 3; - else - state -= 6; +#else + len = 1; + goto one_byte2; +#endif } else { int offset; uint16_t *prob_len; @@ -350,24 +355,23 @@ if (rc_is_bit_0(rc, prob)) { rc_update_bit_0(rc, prob); prob = (p + LZMA_IS_REP_0_LONG - + (state << LZMA_NUM_POS_BITS_MAX) + pos_state); + + (state << LZMA_NUM_POS_BITS_MAX) + + pos_state + ); if (rc_is_bit_0(rc, prob)) { rc_update_bit_0(rc, prob); state = state < LZMA_NUM_LIT_STATES ? 9 : 11; +#if ENABLE_FEATURE_LZMA_FAST pos = buffer_pos - rep0; while (pos >= header.dict_size) pos += header.dict_size; previous_byte = buffer[pos]; - buffer[buffer_pos++] = previous_byte; - if (buffer_pos == header.dict_size) { - buffer_pos = 0; - global_pos += header.dict_size; - if (full_write(dst_fd, buffer, header.dict_size) != header.dict_size) - goto bad; - USE_DESKTOP(total_written += header.dict_size;) - } - continue; + goto one_byte1; +#else + len = 1; + goto string; +#endif } else { rc_update_bit_1(rc, prob); } @@ -403,7 +407,7 @@ if (rc_is_bit_0(rc, prob_len)) { rc_update_bit_0(rc, prob_len); prob_len = (prob + LZMA_LEN_LOW - + (pos_state << LZMA_LEN_NUM_LOW_BITS)); + + (pos_state << LZMA_LEN_NUM_LOW_BITS)); offset = 0; num_bits = LZMA_LEN_NUM_LOW_BITS; } else { @@ -412,14 +416,14 @@ if (rc_is_bit_0(rc, prob_len)) { rc_update_bit_0(rc, prob_len); prob_len = (prob + LZMA_LEN_MID - + (pos_state << LZMA_LEN_NUM_MID_BITS)); + + (pos_state << LZMA_LEN_NUM_MID_BITS)); offset = 1 << LZMA_LEN_NUM_LOW_BITS; num_bits = LZMA_LEN_NUM_MID_BITS; } else { rc_update_bit_1(rc, prob_len); prob_len = prob + LZMA_LEN_HIGH; offset = ((1 << LZMA_LEN_NUM_LOW_BITS) - + (1 << LZMA_LEN_NUM_MID_BITS)); + + (1 << LZMA_LEN_NUM_MID_BITS)); num_bits = LZMA_LEN_NUM_HIGH_BITS; } } @@ -430,12 +434,10 @@ int pos_slot; state += LZMA_NUM_LIT_STATES; - prob = - p + LZMA_POS_SLOT + - ((len < - LZMA_NUM_LEN_TO_POS_STATES ? len : - LZMA_NUM_LEN_TO_POS_STATES - 1) - << LZMA_NUM_POS_SLOT_BITS); + prob = p + LZMA_POS_SLOT + + ((len < LZMA_NUM_LEN_TO_POS_STATES ? len : + LZMA_NUM_LEN_TO_POS_STATES - 1) + << LZMA_NUM_POS_SLOT_BITS); rc_bit_tree_decode(rc, prob, LZMA_NUM_POS_SLOT_BITS, &pos_slot); if (pos_slot >= LZMA_START_POS_MODEL_INDEX) { @@ -466,17 +468,18 @@ } len += LZMA_MATCH_MIN_LEN; - + SKIP_FEATURE_LZMA_FAST(string:) do { pos = buffer_pos - rep0; while (pos >= header.dict_size) pos += header.dict_size; previous_byte = buffer[pos]; + SKIP_FEATURE_LZMA_FAST(one_byte2:) buffer[buffer_pos++] = previous_byte; if (buffer_pos == header.dict_size) { buffer_pos = 0; global_pos += header.dict_size; - if (full_write(dst_fd, buffer, header.dict_size) != header.dict_size) + if (full_write(dst_fd, buffer, header.dict_size) != (ssize_t)header.dict_size) goto bad; USE_DESKTOP(total_written += header.dict_size;) } @@ -485,13 +488,16 @@ } } - - if (full_write(dst_fd, buffer, buffer_pos) != buffer_pos) { + { + SKIP_DESKTOP(int total_written = 0; /* success */) + USE_DESKTOP(total_written += buffer_pos;) + if (full_write(dst_fd, buffer, buffer_pos) != (ssize_t)buffer_pos) { bad: + total_written = -1; /* failure */ + } rc_free(rc); - return -1; + free(p); + free(buffer); + return total_written; } - rc_free(rc); - USE_DESKTOP(total_written += buffer_pos;) - return USE_DESKTOP(total_written) + 0; }