44 |
#define RETVAL_LAST_BLOCK (-1) |
#define RETVAL_LAST_BLOCK (-1) |
45 |
#define RETVAL_NOT_BZIP_DATA (-2) |
#define RETVAL_NOT_BZIP_DATA (-2) |
46 |
#define RETVAL_UNEXPECTED_INPUT_EOF (-3) |
#define RETVAL_UNEXPECTED_INPUT_EOF (-3) |
47 |
#define RETVAL_UNEXPECTED_OUTPUT_EOF (-4) |
#define RETVAL_SHORT_WRITE (-4) |
48 |
#define RETVAL_DATA_ERROR (-5) |
#define RETVAL_DATA_ERROR (-5) |
49 |
#define RETVAL_OUT_OF_MEMORY (-6) |
#define RETVAL_OUT_OF_MEMORY (-6) |
50 |
#define RETVAL_OBSOLETE_INPUT (-7) |
#define RETVAL_OBSOLETE_INPUT (-7) |
54 |
|
|
55 |
/* This is what we know about each Huffman coding group */ |
/* This is what we know about each Huffman coding group */ |
56 |
struct group_data { |
struct group_data { |
57 |
/* We have an extra slot at the end of limit[] for a sentinal value. */ |
/* We have an extra slot at the end of limit[] for a sentinel value. */ |
58 |
int limit[MAX_HUFCODE_BITS+1],base[MAX_HUFCODE_BITS],permute[MAX_SYMBOLS]; |
int limit[MAX_HUFCODE_BITS+1], base[MAX_HUFCODE_BITS], permute[MAX_SYMBOLS]; |
59 |
int minLen, maxLen; |
int minLen, maxLen; |
60 |
}; |
}; |
61 |
|
|
62 |
/* Structure holding all the housekeeping data, including IO buffers and |
/* Structure holding all the housekeeping data, including IO buffers and |
63 |
memory that persists between calls to bunzip */ |
* memory that persists between calls to bunzip |
64 |
|
* Found the most used member: |
65 |
typedef struct { |
* cat this_file.c | sed -e 's/"/ /g' -e "s/'/ /g" | xargs -n1 \ |
66 |
/* State for interrupting output loop */ |
* | grep 'bd->' | sed 's/^.*bd->/bd->/' | sort | $PAGER |
67 |
|
* and moved it (inbufBitCount) to offset 0. |
68 |
int writeCopies,writePos,writeRunCountdown,writeCount,writeCurrent; |
*/ |
69 |
|
struct bunzip_data { |
70 |
/* I/O tracking data (file handles, buffers, positions, etc.) */ |
/* I/O tracking data (file handles, buffers, positions, etc.) */ |
71 |
|
unsigned inbufBitCount, inbufBits; |
72 |
int in_fd,out_fd,inbufCount,inbufPos /*,outbufPos*/; |
int in_fd, out_fd, inbufCount, inbufPos /*, outbufPos*/; |
73 |
unsigned char *inbuf /*,*outbuf*/; |
unsigned char *inbuf /*,*outbuf*/; |
|
unsigned int inbufBitCount, inbufBits; |
|
74 |
|
|
75 |
/* The CRC values stored in the block header and calculated from the data */ |
/* State for interrupting output loop */ |
76 |
|
int writeCopies, writePos, writeRunCountdown, writeCount, writeCurrent; |
77 |
|
|
78 |
|
/* The CRC values stored in the block header and calculated from the data */ |
79 |
uint32_t headerCRC, totalCRC, writeCRC; |
uint32_t headerCRC, totalCRC, writeCRC; |
|
uint32_t *crc32Table; |
|
|
/* Intermediate buffer and its size (in bytes) */ |
|
80 |
|
|
81 |
unsigned int *dbuf, dbufSize; |
/* Intermediate buffer and its size (in bytes) */ |
82 |
|
unsigned *dbuf, dbufSize; |
83 |
|
|
84 |
/* These things are a bit too big to go on the stack */ |
/* For I/O error handling */ |
85 |
|
jmp_buf jmpbuf; |
86 |
|
|
87 |
|
/* Big things go last (register-relative addressing can be larger for big offsets) */ |
88 |
|
uint32_t crc32Table[256]; |
89 |
unsigned char selectors[32768]; /* nSelectors=15 bits */ |
unsigned char selectors[32768]; /* nSelectors=15 bits */ |
90 |
struct group_data groups[MAX_GROUPS]; /* Huffman coding tables */ |
struct group_data groups[MAX_GROUPS]; /* Huffman coding tables */ |
91 |
|
}; |
92 |
|
/* typedef struct bunzip_data bunzip_data; -- done in .h file */ |
93 |
|
|
|
/* For I/O error handling */ |
|
|
|
|
|
jmp_buf jmpbuf; |
|
|
} bunzip_data; |
|
94 |
|
|
95 |
/* Return the next nnn bits of input. All reads from the compressed input |
/* Return the next nnn bits of input. All reads from the compressed input |
96 |
are done through this function. All reads are big endian */ |
are done through this function. All reads are big endian */ |
97 |
|
|
98 |
static unsigned int get_bits(bunzip_data *bd, char bits_wanted) |
static unsigned get_bits(bunzip_data *bd, int bits_wanted) |
99 |
{ |
{ |
100 |
unsigned int bits=0; |
unsigned bits = 0; |
101 |
|
|
102 |
/* If we need to get more data from the byte buffer, do so. (Loop getting |
/* If we need to get more data from the byte buffer, do so. (Loop getting |
103 |
one byte at a time to enforce endianness and avoid unaligned access.) */ |
one byte at a time to enforce endianness and avoid unaligned access.) */ |
104 |
|
while ((int)(bd->inbufBitCount) < bits_wanted) { |
|
while (bd->inbufBitCount<bits_wanted) { |
|
105 |
|
|
106 |
/* If we need to read more data from file into byte buffer, do so */ |
/* If we need to read more data from file into byte buffer, do so */ |
107 |
|
if (bd->inbufPos == bd->inbufCount) { |
108 |
if(bd->inbufPos==bd->inbufCount) { |
/* if "no input fd" case: in_fd == -1, read fails, we jump */ |
109 |
if((bd->inbufCount = read(bd->in_fd, bd->inbuf, IOBUF_SIZE)) <= 0) |
bd->inbufCount = read(bd->in_fd, bd->inbuf, IOBUF_SIZE); |
110 |
longjmp(bd->jmpbuf,RETVAL_UNEXPECTED_INPUT_EOF); |
if (bd->inbufCount <= 0) |
111 |
bd->inbufPos=0; |
longjmp(bd->jmpbuf, RETVAL_UNEXPECTED_INPUT_EOF); |
112 |
|
bd->inbufPos = 0; |
113 |
} |
} |
114 |
|
|
115 |
/* Avoid 32-bit overflow (dump bit buffer to top of output) */ |
/* Avoid 32-bit overflow (dump bit buffer to top of output) */ |
116 |
|
if (bd->inbufBitCount >= 24) { |
117 |
if(bd->inbufBitCount>=24) { |
bits = bd->inbufBits & ((1 << bd->inbufBitCount) - 1); |
118 |
bits=bd->inbufBits&((1<<bd->inbufBitCount)-1); |
bits_wanted -= bd->inbufBitCount; |
119 |
bits_wanted-=bd->inbufBitCount; |
bits <<= bits_wanted; |
120 |
bits<<=bits_wanted; |
bd->inbufBitCount = 0; |
|
bd->inbufBitCount=0; |
|
121 |
} |
} |
122 |
|
|
123 |
/* Grab next 8 bits of input from buffer. */ |
/* Grab next 8 bits of input from buffer. */ |
124 |
|
bd->inbufBits = (bd->inbufBits << 8) | bd->inbuf[bd->inbufPos++]; |
125 |
bd->inbufBits=(bd->inbufBits<<8)|bd->inbuf[bd->inbufPos++]; |
bd->inbufBitCount += 8; |
|
bd->inbufBitCount+=8; |
|
126 |
} |
} |
127 |
|
|
128 |
/* Calculate result */ |
/* Calculate result */ |
129 |
|
bd->inbufBitCount -= bits_wanted; |
130 |
bd->inbufBitCount-=bits_wanted; |
bits |= (bd->inbufBits >> bd->inbufBitCount) & ((1 << bits_wanted) - 1); |
|
bits|=(bd->inbufBits>>bd->inbufBitCount)&((1<<bits_wanted)-1); |
|
131 |
|
|
132 |
return bits; |
return bits; |
133 |
} |
} |
134 |
|
|
135 |
/* Unpacks the next block and sets up for the inverse burrows-wheeler step. */ |
/* Unpacks the next block and sets up for the inverse burrows-wheeler step. */ |
|
|
|
136 |
static int get_next_block(bunzip_data *bd) |
static int get_next_block(bunzip_data *bd) |
137 |
{ |
{ |
138 |
struct group_data *hufGroup; |
struct group_data *hufGroup; |
139 |
int dbufCount,nextSym,dbufSize,groupCount,*base,*limit,selector, |
int dbufCount, nextSym, dbufSize, groupCount, *base, *limit, selector, |
140 |
i,j,k,t,runPos,symCount,symTotal,nSelectors,byteCount[256]; |
i, j, k, t, runPos, symCount, symTotal, nSelectors, byteCount[256]; |
141 |
unsigned char uc, symToByte[256], mtfSymbol[256], *selectors; |
unsigned char uc, symToByte[256], mtfSymbol[256], *selectors; |
142 |
unsigned int *dbuf,origPtr; |
unsigned *dbuf, origPtr; |
143 |
|
|
144 |
dbuf=bd->dbuf; |
dbuf = bd->dbuf; |
145 |
dbufSize=bd->dbufSize; |
dbufSize = bd->dbufSize; |
146 |
selectors=bd->selectors; |
selectors = bd->selectors; |
147 |
|
|
148 |
/* Reset longjmp I/O error handling */ |
/* Reset longjmp I/O error handling */ |
149 |
|
i = setjmp(bd->jmpbuf); |
|
i=setjmp(bd->jmpbuf); |
|
150 |
if (i) return i; |
if (i) return i; |
151 |
|
|
152 |
/* Read in header signature and CRC, then validate signature. |
/* Read in header signature and CRC, then validate signature. |
153 |
(last block signature means CRC is for whole file, return now) */ |
(last block signature means CRC is for whole file, return now) */ |
154 |
|
i = get_bits(bd, 24); |
155 |
i = get_bits(bd,24); |
j = get_bits(bd, 24); |
156 |
j = get_bits(bd,24); |
bd->headerCRC = get_bits(bd, 32); |
|
bd->headerCRC=get_bits(bd,32); |
|
157 |
if ((i == 0x177245) && (j == 0x385090)) return RETVAL_LAST_BLOCK; |
if ((i == 0x177245) && (j == 0x385090)) return RETVAL_LAST_BLOCK; |
158 |
if ((i != 0x314159) || (j != 0x265359)) return RETVAL_NOT_BZIP_DATA; |
if ((i != 0x314159) || (j != 0x265359)) return RETVAL_NOT_BZIP_DATA; |
159 |
|
|
160 |
/* We can add support for blockRandomised if anybody complains. There was |
/* We can add support for blockRandomised if anybody complains. There was |
161 |
some code for this in busybox 1.0.0-pre3, but nobody ever noticed that |
some code for this in busybox 1.0.0-pre3, but nobody ever noticed that |
162 |
it didn't actually work. */ |
it didn't actually work. */ |
163 |
|
if (get_bits(bd, 1)) return RETVAL_OBSOLETE_INPUT; |
164 |
if (get_bits(bd,1)) return RETVAL_OBSOLETE_INPUT; |
origPtr = get_bits(bd, 24); |
165 |
if ((origPtr=get_bits(bd,24)) > dbufSize) return RETVAL_DATA_ERROR; |
if ((int)origPtr > dbufSize) return RETVAL_DATA_ERROR; |
166 |
|
|
167 |
/* mapping table: if some byte values are never used (encoding things |
/* mapping table: if some byte values are never used (encoding things |
168 |
like ascii text), the compression code removes the gaps to have fewer |
like ascii text), the compression code removes the gaps to have fewer |
169 |
symbols to deal with, and writes a sparse bitfield indicating which |
symbols to deal with, and writes a sparse bitfield indicating which |
170 |
values were present. We make a translation table to convert the symbols |
values were present. We make a translation table to convert the symbols |
171 |
back to the corresponding bytes. */ |
back to the corresponding bytes. */ |
172 |
|
t = get_bits(bd, 16); |
173 |
t=get_bits(bd, 16); |
symTotal = 0; |
174 |
symTotal=0; |
for (i = 0; i < 16; i++) { |
175 |
for (i=0;i<16;i++) { |
if (t & (1 << (15-i))) { |
176 |
if(t&(1<<(15-i))) { |
k = get_bits(bd, 16); |
177 |
k=get_bits(bd,16); |
for (j = 0; j < 16; j++) |
178 |
for (j=0;j<16;j++) |
if (k & (1 << (15-j))) |
179 |
if(k&(1<<(15-j))) symToByte[symTotal++]=(16*i)+j; |
symToByte[symTotal++] = (16*i) + j; |
180 |
} |
} |
181 |
} |
} |
182 |
|
|
183 |
/* How many different Huffman coding groups does this block use? */ |
/* How many different Huffman coding groups does this block use? */ |
184 |
|
groupCount = get_bits(bd, 3); |
185 |
groupCount=get_bits(bd,3); |
if (groupCount < 2 || groupCount > MAX_GROUPS) |
186 |
if (groupCount<2 || groupCount>MAX_GROUPS) return RETVAL_DATA_ERROR; |
return RETVAL_DATA_ERROR; |
187 |
|
|
188 |
/* nSelectors: Every GROUP_SIZE many symbols we select a new Huffman coding |
/* nSelectors: Every GROUP_SIZE many symbols we select a new Huffman coding |
189 |
group. Read in the group selector list, which is stored as MTF encoded |
group. Read in the group selector list, which is stored as MTF encoded |
190 |
bit runs. (MTF=Move To Front, as each value is used it's moved to the |
bit runs. (MTF=Move To Front, as each value is used it's moved to the |
191 |
start of the list.) */ |
start of the list.) */ |
192 |
|
nSelectors = get_bits(bd, 15); |
193 |
if(!(nSelectors=get_bits(bd, 15))) return RETVAL_DATA_ERROR; |
if (!nSelectors) return RETVAL_DATA_ERROR; |
194 |
for (i=0; i<groupCount; i++) mtfSymbol[i] = i; |
for (i = 0; i < groupCount; i++) mtfSymbol[i] = i; |
195 |
for (i=0; i<nSelectors; i++) { |
for (i = 0; i < nSelectors; i++) { |
196 |
|
|
197 |
/* Get next value */ |
/* Get next value */ |
198 |
|
for (j = 0; get_bits(bd, 1); j++) |
199 |
for (j=0;get_bits(bd,1);j++) if (j>=groupCount) return RETVAL_DATA_ERROR; |
if (j >= groupCount) return RETVAL_DATA_ERROR; |
200 |
|
|
201 |
/* Decode MTF to get the next selector */ |
/* Decode MTF to get the next selector */ |
|
|
|
202 |
uc = mtfSymbol[j]; |
uc = mtfSymbol[j]; |
203 |
for (;j;j--) mtfSymbol[j] = mtfSymbol[j-1]; |
for (;j;j--) mtfSymbol[j] = mtfSymbol[j-1]; |
204 |
mtfSymbol[0]=selectors[i]=uc; |
mtfSymbol[0] = selectors[i] = uc; |
205 |
} |
} |
206 |
|
|
207 |
/* Read the Huffman coding tables for each group, which code for symTotal |
/* Read the Huffman coding tables for each group, which code for symTotal |
208 |
literal symbols, plus two run symbols (RUNA, RUNB) */ |
literal symbols, plus two run symbols (RUNA, RUNB) */ |
209 |
|
symCount = symTotal + 2; |
210 |
symCount=symTotal+2; |
for (j = 0; j < groupCount; j++) { |
211 |
for (j=0; j<groupCount; j++) { |
unsigned char length[MAX_SYMBOLS]; |
212 |
unsigned char length[MAX_SYMBOLS],temp[MAX_HUFCODE_BITS+1]; |
/* 8 bits is ALMOST enough for temp[], see below */ |
213 |
int minLen, maxLen, pp; |
unsigned temp[MAX_HUFCODE_BITS+1]; |
214 |
|
int minLen, maxLen, pp; |
215 |
|
|
216 |
/* Read Huffman code lengths for each symbol. They're stored in |
/* Read Huffman code lengths for each symbol. They're stored in |
217 |
a way similar to mtf; record a starting value for the first symbol, |
a way similar to mtf; record a starting value for the first symbol, |
219 |
(Subtracting 1 before the loop and then adding it back at the end is |
(Subtracting 1 before the loop and then adding it back at the end is |
220 |
an optimization that makes the test inside the loop simpler: symbol |
an optimization that makes the test inside the loop simpler: symbol |
221 |
length 0 becomes negative, so an unsigned inequality catches it.) */ |
length 0 becomes negative, so an unsigned inequality catches it.) */ |
222 |
|
t = get_bits(bd, 5) - 1; |
|
t=get_bits(bd, 5)-1; |
|
223 |
for (i = 0; i < symCount; i++) { |
for (i = 0; i < symCount; i++) { |
224 |
for (;;) { |
for (;;) { |
225 |
if (((unsigned)t) > (MAX_HUFCODE_BITS-1)) |
if ((unsigned)t > (MAX_HUFCODE_BITS-1)) |
226 |
return RETVAL_DATA_ERROR; |
return RETVAL_DATA_ERROR; |
227 |
|
|
228 |
/* If first bit is 0, stop. Else second bit indicates whether |
/* If first bit is 0, stop. Else second bit indicates whether |
229 |
to increment or decrement the value. Optimization: grab 2 |
to increment or decrement the value. Optimization: grab 2 |
230 |
bits and unget the second if the first was 0. */ |
bits and unget the second if the first was 0. */ |
231 |
|
k = get_bits(bd, 2); |
|
k = get_bits(bd,2); |
|
232 |
if (k < 2) { |
if (k < 2) { |
233 |
bd->inbufBitCount++; |
bd->inbufBitCount++; |
234 |
break; |
break; |
235 |
} |
} |
236 |
|
|
237 |
/* Add one if second bit 1, else subtract 1. Avoids if/else */ |
/* Add one if second bit 1, else subtract 1. Avoids if/else */ |
238 |
|
t += (((k+1) & 2) - 1); |
|
t+=(((k+1)&2)-1); |
|
239 |
} |
} |
240 |
|
|
241 |
/* Correct for the initial -1, to get the final symbol length */ |
/* Correct for the initial -1, to get the final symbol length */ |
242 |
|
length[i] = t + 1; |
|
length[i]=t+1; |
|
243 |
} |
} |
244 |
|
|
245 |
/* Find largest and smallest lengths in this group */ |
/* Find largest and smallest lengths in this group */ |
246 |
|
minLen = maxLen = length[0]; |
|
minLen=maxLen=length[0]; |
|
247 |
for (i = 1; i < symCount; i++) { |
for (i = 1; i < symCount; i++) { |
248 |
if(length[i] > maxLen) maxLen = length[i]; |
if (length[i] > maxLen) maxLen = length[i]; |
249 |
else if(length[i] < minLen) minLen = length[i]; |
else if (length[i] < minLen) minLen = length[i]; |
250 |
} |
} |
251 |
|
|
252 |
/* Calculate permute[], base[], and limit[] tables from length[]. |
/* Calculate permute[], base[], and limit[] tables from length[]. |
259 |
* number of bits can have. This is how the Huffman codes can vary in |
* number of bits can have. This is how the Huffman codes can vary in |
260 |
* length: each code with a value>limit[length] needs another bit. |
* length: each code with a value>limit[length] needs another bit. |
261 |
*/ |
*/ |
262 |
|
hufGroup = bd->groups + j; |
|
hufGroup=bd->groups+j; |
|
263 |
hufGroup->minLen = minLen; |
hufGroup->minLen = minLen; |
264 |
hufGroup->maxLen = maxLen; |
hufGroup->maxLen = maxLen; |
265 |
|
|
266 |
/* Note that minLen can't be smaller than 1, so we adjust the base |
/* Note that minLen can't be smaller than 1, so we adjust the base |
267 |
and limit array pointers so we're not always wasting the first |
and limit array pointers so we're not always wasting the first |
268 |
entry. We do this again when using them (during symbol decoding).*/ |
entry. We do this again when using them (during symbol decoding).*/ |
269 |
|
base = hufGroup->base - 1; |
270 |
base=hufGroup->base-1; |
limit = hufGroup->limit - 1; |
|
limit=hufGroup->limit-1; |
|
271 |
|
|
272 |
/* Calculate permute[]. Concurently, initialize temp[] and limit[]. */ |
/* Calculate permute[]. Concurently, initialize temp[] and limit[]. */ |
273 |
|
pp = 0; |
274 |
pp=0; |
for (i = minLen; i <= maxLen; i++) { |
275 |
for (i=minLen;i<=maxLen;i++) { |
temp[i] = limit[i] = 0; |
276 |
temp[i]=limit[i]=0; |
for (t = 0; t < symCount; t++) |
277 |
for (t=0;t<symCount;t++) |
if (length[t] == i) |
278 |
if(length[t]==i) hufGroup->permute[pp++] = t; |
hufGroup->permute[pp++] = t; |
279 |
} |
} |
280 |
|
|
281 |
/* Count symbols coded for at each bit length */ |
/* Count symbols coded for at each bit length */ |
282 |
|
/* NB: in pathological cases, temp[8] can end ip being 256. |
283 |
for (i=0;i<symCount;i++) temp[length[i]]++; |
* That's why uint8_t is too small for temp[]. */ |
284 |
|
for (i = 0; i < symCount; i++) temp[length[i]]++; |
285 |
|
|
286 |
/* Calculate limit[] (the largest symbol-coding value at each bit |
/* Calculate limit[] (the largest symbol-coding value at each bit |
287 |
* length, which is (previous limit<<1)+symbols at this level), and |
* length, which is (previous limit<<1)+symbols at this level), and |
288 |
* base[] (number of symbols to ignore at each bit length, which is |
* base[] (number of symbols to ignore at each bit length, which is |
289 |
* limit minus the cumulative count of symbols coded for already). */ |
* limit minus the cumulative count of symbols coded for already). */ |
290 |
|
pp = t = 0; |
291 |
pp=t=0; |
for (i = minLen; i < maxLen; i++) { |
292 |
for (i=minLen; i<maxLen; i++) { |
pp += temp[i]; |
|
pp+=temp[i]; |
|
293 |
|
|
294 |
/* We read the largest possible symbol size and then unget bits |
/* We read the largest possible symbol size and then unget bits |
295 |
after determining how many we need, and those extra bits could |
after determining how many we need, and those extra bits could |
297 |
each level we're really only interested in the first few bits, |
each level we're really only interested in the first few bits, |
298 |
so here we set all the trailing to-be-ignored bits to 1 so they |
so here we set all the trailing to-be-ignored bits to 1 so they |
299 |
don't affect the value>limit[length] comparison. */ |
don't affect the value>limit[length] comparison. */ |
300 |
|
limit[i] = (pp << (maxLen - i)) - 1; |
301 |
limit[i]= (pp << (maxLen - i)) - 1; |
pp <<= 1; |
302 |
pp<<=1; |
t += temp[i]; |
303 |
base[i+1]=pp-(t+=temp[i]); |
base[i+1] = pp - t; |
304 |
} |
} |
305 |
limit[maxLen+1] = INT_MAX; /* Sentinal value for reading next sym. */ |
limit[maxLen+1] = INT_MAX; /* Sentinel value for reading next sym. */ |
306 |
limit[maxLen]=pp+temp[maxLen]-1; |
limit[maxLen] = pp + temp[maxLen] - 1; |
307 |
base[minLen]=0; |
base[minLen] = 0; |
308 |
} |
} |
309 |
|
|
310 |
/* We've finished reading and digesting the block header. Now read this |
/* We've finished reading and digesting the block header. Now read this |
311 |
block's Huffman coded symbols from the file and undo the Huffman coding |
block's Huffman coded symbols from the file and undo the Huffman coding |
312 |
and run length encoding, saving the result into dbuf[dbufCount++]=uc */ |
and run length encoding, saving the result into dbuf[dbufCount++] = uc */ |
313 |
|
|
314 |
/* Initialize symbol occurrence counters and symbol Move To Front table */ |
/* Initialize symbol occurrence counters and symbol Move To Front table */ |
315 |
|
memset(byteCount, 0, sizeof(byteCount)); /* smaller, maybe slower? */ |
316 |
for (i=0;i<256;i++) { |
for (i = 0; i < 256; i++) { |
317 |
byteCount[i] = 0; |
//byteCount[i] = 0; |
318 |
mtfSymbol[i]=(unsigned char)i; |
mtfSymbol[i] = (unsigned char)i; |
319 |
} |
} |
320 |
|
|
321 |
/* Loop through compressed symbols. */ |
/* Loop through compressed symbols. */ |
322 |
|
|
323 |
runPos=dbufCount=selector=0; |
runPos = dbufCount = selector = 0; |
324 |
for (;;) { |
for (;;) { |
325 |
|
|
326 |
/* fetch next Huffman coding group from list. */ |
/* Fetch next Huffman coding group from list. */ |
327 |
|
symCount = GROUP_SIZE - 1; |
328 |
symCount=GROUP_SIZE-1; |
if (selector >= nSelectors) return RETVAL_DATA_ERROR; |
329 |
if(selector>=nSelectors) return RETVAL_DATA_ERROR; |
hufGroup = bd->groups + selectors[selector++]; |
330 |
hufGroup=bd->groups+selectors[selector++]; |
base = hufGroup->base - 1; |
331 |
base=hufGroup->base-1; |
limit = hufGroup->limit - 1; |
332 |
limit=hufGroup->limit-1; |
continue_this_group: |
|
continue_this_group: |
|
333 |
|
|
334 |
/* Read next Huffman-coded symbol. */ |
/* Read next Huffman-coded symbol. */ |
335 |
|
|
340 |
valid compressed file. As a further optimization, we do the read |
valid compressed file. As a further optimization, we do the read |
341 |
inline (falling back to a call to get_bits if the buffer runs |
inline (falling back to a call to get_bits if the buffer runs |
342 |
dry). The following (up to got_huff_bits:) is equivalent to |
dry). The following (up to got_huff_bits:) is equivalent to |
343 |
j=get_bits(bd,hufGroup->maxLen); |
j = get_bits(bd, hufGroup->maxLen); |
344 |
*/ |
*/ |
345 |
|
while ((int)(bd->inbufBitCount) < hufGroup->maxLen) { |
346 |
while (bd->inbufBitCount<hufGroup->maxLen) { |
if (bd->inbufPos == bd->inbufCount) { |
347 |
if(bd->inbufPos==bd->inbufCount) { |
j = get_bits(bd, hufGroup->maxLen); |
|
j = get_bits(bd,hufGroup->maxLen); |
|
348 |
goto got_huff_bits; |
goto got_huff_bits; |
349 |
} |
} |
350 |
bd->inbufBits=(bd->inbufBits<<8)|bd->inbuf[bd->inbufPos++]; |
bd->inbufBits = (bd->inbufBits << 8) | bd->inbuf[bd->inbufPos++]; |
351 |
bd->inbufBitCount+=8; |
bd->inbufBitCount += 8; |
352 |
}; |
}; |
353 |
bd->inbufBitCount-=hufGroup->maxLen; |
bd->inbufBitCount -= hufGroup->maxLen; |
354 |
j = (bd->inbufBits>>bd->inbufBitCount)&((1<<hufGroup->maxLen)-1); |
j = (bd->inbufBits >> bd->inbufBitCount) & ((1 << hufGroup->maxLen) - 1); |
355 |
|
|
356 |
got_huff_bits: |
got_huff_bits: |
357 |
|
|
358 |
/* Figure how how many bits are in next symbol and unget extras */ |
/* Figure how how many bits are in next symbol and unget extras */ |
359 |
|
i = hufGroup->minLen; |
360 |
i=hufGroup->minLen; |
while (j > limit[i]) ++i; |
|
while (j>limit[i]) ++i; |
|
361 |
bd->inbufBitCount += (hufGroup->maxLen - i); |
bd->inbufBitCount += (hufGroup->maxLen - i); |
362 |
|
|
363 |
/* Huffman decode value to get nextSym (with bounds checking) */ |
/* Huffman decode value to get nextSym (with bounds checking) */ |
364 |
|
if (i > hufGroup->maxLen) |
365 |
if ((i > hufGroup->maxLen) |
return RETVAL_DATA_ERROR; |
366 |
|| (((unsigned)(j=(j>>(hufGroup->maxLen-i))-base[i])) |
j = (j >> (hufGroup->maxLen - i)) - base[i]; |
367 |
>= MAX_SYMBOLS)) |
if ((unsigned)j >= MAX_SYMBOLS) |
368 |
return RETVAL_DATA_ERROR; |
return RETVAL_DATA_ERROR; |
369 |
nextSym = hufGroup->permute[j]; |
nextSym = hufGroup->permute[j]; |
370 |
|
|
372 |
byte, or a repeated run of the most recent literal byte. First, |
byte, or a repeated run of the most recent literal byte. First, |
373 |
check if nextSym indicates a repeated run, and if so loop collecting |
check if nextSym indicates a repeated run, and if so loop collecting |
374 |
how many times to repeat the last literal. */ |
how many times to repeat the last literal. */ |
375 |
|
if ((unsigned)nextSym <= SYMBOL_RUNB) { /* RUNA or RUNB */ |
|
if (((unsigned)nextSym) <= SYMBOL_RUNB) { /* RUNA or RUNB */ |
|
376 |
|
|
377 |
/* If this is the start of a new run, zero out counter */ |
/* If this is the start of a new run, zero out counter */ |
378 |
|
if (!runPos) { |
|
if(!runPos) { |
|
379 |
runPos = 1; |
runPos = 1; |
380 |
t = 0; |
t = 0; |
381 |
} |
} |
387 |
the basic or 0/1 method (except all bits 0, which would use no |
the basic or 0/1 method (except all bits 0, which would use no |
388 |
symbols, but a run of length 0 doesn't mean anything in this |
symbols, but a run of length 0 doesn't mean anything in this |
389 |
context). Thus space is saved. */ |
context). Thus space is saved. */ |
|
|
|
390 |
t += (runPos << nextSym); /* +runPos if RUNA; +2*runPos if RUNB */ |
t += (runPos << nextSym); /* +runPos if RUNA; +2*runPos if RUNB */ |
391 |
if(runPos < dbufSize) runPos <<= 1; |
if (runPos < dbufSize) runPos <<= 1; |
392 |
goto end_of_huffman_loop; |
goto end_of_huffman_loop; |
393 |
} |
} |
394 |
|
|
396 |
how many times to repeat the last literal, so append that many |
how many times to repeat the last literal, so append that many |
397 |
copies to our buffer of decoded symbols (dbuf) now. (The last |
copies to our buffer of decoded symbols (dbuf) now. (The last |
398 |
literal used is the one at the head of the mtfSymbol array.) */ |
literal used is the one at the head of the mtfSymbol array.) */ |
399 |
|
if (runPos) { |
400 |
if(runPos) { |
runPos = 0; |
401 |
runPos=0; |
if (dbufCount + t >= dbufSize) return RETVAL_DATA_ERROR; |
|
if(dbufCount+t>=dbufSize) return RETVAL_DATA_ERROR; |
|
402 |
|
|
403 |
uc = symToByte[mtfSymbol[0]]; |
uc = symToByte[mtfSymbol[0]]; |
404 |
byteCount[uc] += t; |
byteCount[uc] += t; |
405 |
while (t--) dbuf[dbufCount++]=uc; |
while (t--) dbuf[dbufCount++] = uc; |
406 |
} |
} |
407 |
|
|
408 |
/* Is this the terminating symbol? */ |
/* Is this the terminating symbol? */ |
409 |
|
if (nextSym > symTotal) break; |
|
if(nextSym>symTotal) break; |
|
410 |
|
|
411 |
/* At this point, nextSym indicates a new literal character. Subtract |
/* At this point, nextSym indicates a new literal character. Subtract |
412 |
one to get the position in the MTF array at which this literal is |
one to get the position in the MTF array at which this literal is |
415 |
first symbol in the mtf array, position 0, would have been handled |
first symbol in the mtf array, position 0, would have been handled |
416 |
as part of a run above. Therefore 1 unused mtf position minus |
as part of a run above. Therefore 1 unused mtf position minus |
417 |
2 non-literal nextSym values equals -1.) */ |
2 non-literal nextSym values equals -1.) */ |
418 |
|
if (dbufCount >= dbufSize) return RETVAL_DATA_ERROR; |
|
if(dbufCount>=dbufSize) return RETVAL_DATA_ERROR; |
|
419 |
i = nextSym - 1; |
i = nextSym - 1; |
420 |
uc = mtfSymbol[i]; |
uc = mtfSymbol[i]; |
421 |
|
|
423 |
* small number of symbols, and are bound by 256 in any case, using |
* small number of symbols, and are bound by 256 in any case, using |
424 |
* memmove here would typically be bigger and slower due to function |
* memmove here would typically be bigger and slower due to function |
425 |
* call overhead and other assorted setup costs. */ |
* call overhead and other assorted setup costs. */ |
|
|
|
426 |
do { |
do { |
427 |
mtfSymbol[i] = mtfSymbol[i-1]; |
mtfSymbol[i] = mtfSymbol[i-1]; |
428 |
} while (--i); |
} while (--i); |
429 |
mtfSymbol[0] = uc; |
mtfSymbol[0] = uc; |
430 |
uc=symToByte[uc]; |
uc = symToByte[uc]; |
431 |
|
|
432 |
/* We have our literal byte. Save it into dbuf. */ |
/* We have our literal byte. Save it into dbuf. */ |
|
|
|
433 |
byteCount[uc]++; |
byteCount[uc]++; |
434 |
dbuf[dbufCount++] = (unsigned int)uc; |
dbuf[dbufCount++] = (unsigned)uc; |
435 |
|
|
436 |
/* Skip group initialization if we're not done with this group. Done |
/* Skip group initialization if we're not done with this group. Done |
437 |
* this way to avoid compiler warning. */ |
* this way to avoid compiler warning. */ |
438 |
|
end_of_huffman_loop: |
439 |
end_of_huffman_loop: |
if (symCount--) goto continue_this_group; |
|
if(symCount--) goto continue_this_group; |
|
440 |
} |
} |
441 |
|
|
442 |
/* At this point, we've read all the Huffman-coded symbols (and repeated |
/* At this point, we've read all the Huffman-coded symbols (and repeated |
443 |
runs) for this block from the input stream, and decoded them into the |
runs) for this block from the input stream, and decoded them into the |
444 |
intermediate buffer. There are dbufCount many decoded bytes in dbuf[]. |
intermediate buffer. There are dbufCount many decoded bytes in dbuf[]. |
445 |
Now undo the Burrows-Wheeler transform on dbuf. |
Now undo the Burrows-Wheeler transform on dbuf. |
446 |
See http://dogma.net/markn/articles/bwt/bwt.htm |
See http://dogma.net/markn/articles/bwt/bwt.htm |
447 |
*/ |
*/ |
448 |
|
|
449 |
/* Turn byteCount into cumulative occurrence counts of 0 to n-1. */ |
/* Turn byteCount into cumulative occurrence counts of 0 to n-1. */ |
450 |
|
j = 0; |
451 |
j=0; |
for (i = 0; i < 256; i++) { |
452 |
for (i=0;i<256;i++) { |
k = j + byteCount[i]; |
|
k=j+byteCount[i]; |
|
453 |
byteCount[i] = j; |
byteCount[i] = j; |
454 |
j=k; |
j = k; |
455 |
} |
} |
456 |
|
|
457 |
/* Figure out what order dbuf would be in if we sorted it. */ |
/* Figure out what order dbuf would be in if we sorted it. */ |
458 |
|
for (i = 0; i < dbufCount; i++) { |
459 |
for (i=0;i<dbufCount;i++) { |
uc = (unsigned char)(dbuf[i] & 0xff); |
|
uc=(unsigned char)(dbuf[i] & 0xff); |
|
460 |
dbuf[byteCount[uc]] |= (i << 8); |
dbuf[byteCount[uc]] |= (i << 8); |
461 |
byteCount[uc]++; |
byteCount[uc]++; |
462 |
} |
} |
464 |
/* Decode first byte by hand to initialize "previous" byte. Note that it |
/* Decode first byte by hand to initialize "previous" byte. Note that it |
465 |
doesn't get output, and if the first three characters are identical |
doesn't get output, and if the first three characters are identical |
466 |
it doesn't qualify as a run (hence writeRunCountdown=5). */ |
it doesn't qualify as a run (hence writeRunCountdown=5). */ |
467 |
|
if (dbufCount) { |
468 |
if(dbufCount) { |
if ((int)origPtr >= dbufCount) return RETVAL_DATA_ERROR; |
469 |
if(origPtr>=dbufCount) return RETVAL_DATA_ERROR; |
bd->writePos = dbuf[origPtr]; |
470 |
bd->writePos=dbuf[origPtr]; |
bd->writeCurrent = (unsigned char)(bd->writePos & 0xff); |
471 |
bd->writeCurrent=(unsigned char)(bd->writePos&0xff); |
bd->writePos >>= 8; |
472 |
bd->writePos>>=8; |
bd->writeRunCountdown = 5; |
|
bd->writeRunCountdown=5; |
|
473 |
} |
} |
474 |
bd->writeCount=dbufCount; |
bd->writeCount = dbufCount; |
475 |
|
|
476 |
return RETVAL_OK; |
return RETVAL_OK; |
477 |
} |
} |
482 |
error (all errors are negative numbers). If out_fd!=-1, outbuf and len |
error (all errors are negative numbers). If out_fd!=-1, outbuf and len |
483 |
are ignored, data is written to out_fd and return is RETVAL_OK or error. |
are ignored, data is written to out_fd and return is RETVAL_OK or error. |
484 |
*/ |
*/ |
485 |
|
int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len) |
|
static int read_bunzip(bunzip_data *bd, char *outbuf, int len) |
|
486 |
{ |
{ |
487 |
const unsigned int *dbuf; |
const unsigned *dbuf; |
488 |
int pos,current,previous,gotcount; |
int pos, current, previous, gotcount; |
489 |
|
|
490 |
/* If last read was short due to end of file, return last block now */ |
/* If last read was short due to end of file, return last block now */ |
491 |
if(bd->writeCount<0) return bd->writeCount; |
if (bd->writeCount < 0) return bd->writeCount; |
492 |
|
|
493 |
gotcount = 0; |
gotcount = 0; |
494 |
dbuf=bd->dbuf; |
dbuf = bd->dbuf; |
495 |
pos=bd->writePos; |
pos = bd->writePos; |
496 |
current=bd->writeCurrent; |
current = bd->writeCurrent; |
497 |
|
|
498 |
/* We will always have pending decoded data to write into the output |
/* We will always have pending decoded data to write into the output |
499 |
buffer unless this is the very first call (in which case we haven't |
buffer unless this is the very first call (in which case we haven't |
500 |
Huffman-decoded a block into the intermediate buffer yet). */ |
Huffman-decoded a block into the intermediate buffer yet). */ |
|
|
|
501 |
if (bd->writeCopies) { |
if (bd->writeCopies) { |
502 |
|
|
503 |
/* Inside the loop, writeCopies means extra copies (beyond 1) */ |
/* Inside the loop, writeCopies means extra copies (beyond 1) */ |
|
|
|
504 |
--bd->writeCopies; |
--bd->writeCopies; |
505 |
|
|
506 |
/* Loop outputting bytes */ |
/* Loop outputting bytes */ |
|
|
|
507 |
for (;;) { |
for (;;) { |
508 |
|
|
509 |
/* If the output buffer is full, snapshot state and return */ |
/* If the output buffer is full, snapshot state and return */ |
510 |
|
if (gotcount >= len) { |
511 |
if(gotcount >= len) { |
bd->writePos = pos; |
512 |
bd->writePos=pos; |
bd->writeCurrent = current; |
|
bd->writeCurrent=current; |
|
513 |
bd->writeCopies++; |
bd->writeCopies++; |
514 |
return len; |
return len; |
515 |
} |
} |
516 |
|
|
517 |
/* Write next byte into output buffer, updating CRC */ |
/* Write next byte into output buffer, updating CRC */ |
|
|
|
518 |
outbuf[gotcount++] = current; |
outbuf[gotcount++] = current; |
519 |
bd->writeCRC=(((bd->writeCRC)<<8) |
bd->writeCRC = (bd->writeCRC << 8) |
520 |
^bd->crc32Table[((bd->writeCRC)>>24)^current]); |
^ bd->crc32Table[(bd->writeCRC >> 24) ^ current]; |
521 |
|
|
522 |
/* Loop now if we're outputting multiple copies of this byte */ |
/* Loop now if we're outputting multiple copies of this byte */ |
|
|
|
523 |
if (bd->writeCopies) { |
if (bd->writeCopies) { |
524 |
--bd->writeCopies; |
--bd->writeCopies; |
525 |
continue; |
continue; |
526 |
} |
} |
527 |
decode_next_byte: |
decode_next_byte: |
528 |
if (!bd->writeCount--) break; |
if (!bd->writeCount--) break; |
529 |
/* Follow sequence vector to undo Burrows-Wheeler transform */ |
/* Follow sequence vector to undo Burrows-Wheeler transform */ |
530 |
previous=current; |
previous = current; |
531 |
pos=dbuf[pos]; |
pos = dbuf[pos]; |
532 |
current=pos&0xff; |
current = pos & 0xff; |
533 |
pos>>=8; |
pos >>= 8; |
534 |
|
|
535 |
/* After 3 consecutive copies of the same byte, the 4th is a repeat |
/* After 3 consecutive copies of the same byte, the 4th |
536 |
count. We count down from 4 instead |
* is a repeat count. We count down from 4 instead |
537 |
* of counting up because testing for non-zero is faster */ |
* of counting up because testing for non-zero is faster */ |
538 |
|
if (--bd->writeRunCountdown) { |
539 |
if(--bd->writeRunCountdown) { |
if (current != previous) |
540 |
if(current!=previous) bd->writeRunCountdown=4; |
bd->writeRunCountdown = 4; |
541 |
} else { |
} else { |
542 |
|
|
543 |
/* We have a repeated run, this byte indicates the count */ |
/* We have a repeated run, this byte indicates the count */ |
544 |
|
bd->writeCopies = current; |
545 |
bd->writeCopies=current; |
current = previous; |
546 |
current=previous; |
bd->writeRunCountdown = 5; |
|
bd->writeRunCountdown=5; |
|
547 |
|
|
548 |
/* Sometimes there are just 3 bytes (run length 0) */ |
/* Sometimes there are just 3 bytes (run length 0) */ |
549 |
|
if (!bd->writeCopies) goto decode_next_byte; |
|
if(!bd->writeCopies) goto decode_next_byte; |
|
550 |
|
|
551 |
/* Subtract the 1 copy we'd output anyway to get extras */ |
/* Subtract the 1 copy we'd output anyway to get extras */ |
|
|
|
552 |
--bd->writeCopies; |
--bd->writeCopies; |
553 |
} |
} |
554 |
} |
} |
555 |
|
|
556 |
/* Decompression of this block completed successfully */ |
/* Decompression of this block completed successfully */ |
557 |
|
bd->writeCRC = ~bd->writeCRC; |
558 |
bd->writeCRC=~bd->writeCRC; |
bd->totalCRC = ((bd->totalCRC << 1) | (bd->totalCRC >> 31)) ^ bd->writeCRC; |
|
bd->totalCRC=((bd->totalCRC<<1) | (bd->totalCRC>>31)) ^ bd->writeCRC; |
|
559 |
|
|
560 |
/* If this block had a CRC error, force file level CRC error. */ |
/* If this block had a CRC error, force file level CRC error. */ |
561 |
|
if (bd->writeCRC != bd->headerCRC) { |
562 |
if(bd->writeCRC!=bd->headerCRC) { |
bd->totalCRC = bd->headerCRC + 1; |
|
bd->totalCRC=bd->headerCRC+1; |
|
563 |
return RETVAL_LAST_BLOCK; |
return RETVAL_LAST_BLOCK; |
564 |
} |
} |
565 |
} |
} |
566 |
|
|
567 |
/* Refill the intermediate buffer by Huffman-decoding next block of input */ |
/* Refill the intermediate buffer by Huffman-decoding next block of input */ |
568 |
/* (previous is just a convenient unused temp variable here) */ |
/* (previous is just a convenient unused temp variable here) */ |
569 |
|
previous = get_next_block(bd); |
570 |
previous=get_next_block(bd); |
if (previous) { |
571 |
if(previous) { |
bd->writeCount = previous; |
572 |
bd->writeCount=previous; |
return (previous != RETVAL_LAST_BLOCK) ? previous : gotcount; |
573 |
return (previous!=RETVAL_LAST_BLOCK) ? previous : gotcount; |
} |
574 |
} |
bd->writeCRC = ~0; |
575 |
bd->writeCRC=~0; |
pos = bd->writePos; |
576 |
pos=bd->writePos; |
current = bd->writeCurrent; |
|
current=bd->writeCurrent; |
|
577 |
goto decode_next_byte; |
goto decode_next_byte; |
578 |
} |
} |
579 |
|
|
581 |
a complete bunzip file (len bytes long). If in_fd!=-1, inbuf and len are |
a complete bunzip file (len bytes long). If in_fd!=-1, inbuf and len are |
582 |
ignored, and data is read from file handle into temporary buffer. */ |
ignored, and data is read from file handle into temporary buffer. */ |
583 |
|
|
584 |
static int start_bunzip(bunzip_data **bdp, int in_fd, unsigned char *inbuf, |
/* Because bunzip2 is used for help text unpacking, and because bb_show_usage() |
585 |
|
should work for NOFORK applets too, we must be extremely careful to not leak |
586 |
|
any allocations! */ |
587 |
|
int FAST_FUNC start_bunzip(bunzip_data **bdp, int in_fd, const unsigned char *inbuf, |
588 |
int len) |
int len) |
589 |
{ |
{ |
590 |
bunzip_data *bd; |
bunzip_data *bd; |
591 |
unsigned int i; |
unsigned i; |
592 |
const unsigned int BZh0=(((unsigned int)'B')<<24)+(((unsigned int)'Z')<<16) |
enum { |
593 |
+(((unsigned int)'h')<<8)+(unsigned int)'0'; |
BZh0 = ('B' << 24) + ('Z' << 16) + ('h' << 8) + '0', |
594 |
|
h0 = ('h' << 8) + '0', |
595 |
|
}; |
596 |
|
|
597 |
/* Figure out how much data to allocate */ |
/* Figure out how much data to allocate */ |
598 |
|
i = sizeof(bunzip_data); |
599 |
i=sizeof(bunzip_data); |
if (in_fd != -1) i += IOBUF_SIZE; |
|
if(in_fd!=-1) i+=IOBUF_SIZE; |
|
600 |
|
|
601 |
/* Allocate bunzip_data. Most fields initialize to zero. */ |
/* Allocate bunzip_data. Most fields initialize to zero. */ |
602 |
|
bd = *bdp = xzalloc(i); |
|
bd=*bdp=xzalloc(i); |
|
603 |
|
|
604 |
/* Setup input buffer */ |
/* Setup input buffer */ |
605 |
|
bd->in_fd = in_fd; |
606 |
if(-1==(bd->in_fd=in_fd)) { |
if (-1 == in_fd) { |
607 |
bd->inbuf=inbuf; |
/* in this case, bd->inbuf is read-only */ |
608 |
bd->inbufCount=len; |
bd->inbuf = (void*)inbuf; /* cast away const-ness */ |
609 |
} else bd->inbuf=(unsigned char *)(bd+1); |
bd->inbufCount = len; |
610 |
|
} else |
611 |
|
bd->inbuf = (unsigned char *)(bd + 1); |
612 |
|
|
613 |
/* Init the CRC32 table (big endian) */ |
/* Init the CRC32 table (big endian) */ |
614 |
|
crc32_filltable(bd->crc32Table, 1); |
|
bd->crc32Table = crc32_filltable(1); |
|
615 |
|
|
616 |
/* Setup for I/O error handling via longjmp */ |
/* Setup for I/O error handling via longjmp */ |
617 |
|
i = setjmp(bd->jmpbuf); |
618 |
i=setjmp(bd->jmpbuf); |
if (i) return i; |
|
if(i) return i; |
|
619 |
|
|
620 |
/* Ensure that file starts with "BZh['1'-'9']." */ |
/* Ensure that file starts with "BZh['1'-'9']." */ |
621 |
|
/* Update: now caller verifies 1st two bytes, makes .gz/.bz2 |
622 |
|
* integration easier */ |
623 |
|
/* was: */ |
624 |
|
/* i = get_bits(bd, 32); */ |
625 |
|
/* if ((unsigned)(i - BZh0 - 1) >= 9) return RETVAL_NOT_BZIP_DATA; */ |
626 |
|
i = get_bits(bd, 16); |
627 |
|
if ((unsigned)(i - h0 - 1) >= 9) return RETVAL_NOT_BZIP_DATA; |
628 |
|
|
629 |
i = get_bits(bd,32); |
/* Fourth byte (ascii '1'-'9') indicates block size in units of 100k of |
|
if (((unsigned int)(i-BZh0-1)) >= 9) return RETVAL_NOT_BZIP_DATA; |
|
|
|
|
|
/* Fourth byte (ascii '1'-'9'), indicates block size in units of 100k of |
|
630 |
uncompressed data. Allocate intermediate buffer for block. */ |
uncompressed data. Allocate intermediate buffer for block. */ |
631 |
|
/* bd->dbufSize = 100000 * (i - BZh0); */ |
632 |
|
bd->dbufSize = 100000 * (i - h0); |
633 |
|
|
634 |
bd->dbufSize=100000*(i-BZh0); |
/* Cannot use xmalloc - may leak bd in NOFORK case! */ |
635 |
|
bd->dbuf = malloc_or_warn(bd->dbufSize * sizeof(int)); |
636 |
bd->dbuf=xmalloc(bd->dbufSize * sizeof(int)); |
if (!bd->dbuf) { |
637 |
|
free(bd); |
638 |
|
xfunc_die(); |
639 |
|
} |
640 |
return RETVAL_OK; |
return RETVAL_OK; |
641 |
} |
} |
642 |
|
|
643 |
/* Example usage: decompress src_fd to dst_fd. (Stops at end of bzip data, |
void FAST_FUNC dealloc_bunzip(bunzip_data *bd) |
644 |
not end of file.) */ |
{ |
645 |
|
free(bd->dbuf); |
646 |
|
free(bd); |
647 |
|
} |
648 |
|
|
649 |
|
|
650 |
USE_DESKTOP(long long) int |
/* Decompress src_fd to dst_fd. Stops at end of bzip data, not end of file. */ |
651 |
uncompressStream(int src_fd, int dst_fd) |
USE_DESKTOP(long long) int FAST_FUNC |
652 |
|
unpack_bz2_stream(int src_fd, int dst_fd) |
653 |
{ |
{ |
654 |
USE_DESKTOP(long long total_written = 0;) |
USE_DESKTOP(long long total_written = 0;) |
655 |
char *outbuf; |
char *outbuf; |
656 |
bunzip_data *bd; |
bunzip_data *bd; |
657 |
int i; |
int i; |
658 |
|
|
659 |
outbuf=xmalloc(IOBUF_SIZE); |
outbuf = xmalloc(IOBUF_SIZE); |
660 |
i=start_bunzip(&bd,src_fd,0,0); |
i = start_bunzip(&bd, src_fd, NULL, 0); |
661 |
if(!i) { |
if (!i) { |
662 |
for (;;) { |
for (;;) { |
663 |
if((i=read_bunzip(bd,outbuf,IOBUF_SIZE)) <= 0) break; |
i = read_bunzip(bd, outbuf, IOBUF_SIZE); |
664 |
if(i!=write(dst_fd,outbuf,i)) { |
if (i <= 0) break; |
665 |
i=RETVAL_UNEXPECTED_OUTPUT_EOF; |
if (i != full_write(dst_fd, outbuf, i)) { |
666 |
|
i = RETVAL_SHORT_WRITE; |
667 |
break; |
break; |
668 |
} |
} |
669 |
USE_DESKTOP(total_written += i;) |
USE_DESKTOP(total_written += i;) |
672 |
|
|
673 |
/* Check CRC and release memory */ |
/* Check CRC and release memory */ |
674 |
|
|
675 |
if(i==RETVAL_LAST_BLOCK) { |
if (i == RETVAL_LAST_BLOCK) { |
676 |
if (bd->headerCRC!=bd->totalCRC) { |
if (bd->headerCRC != bd->totalCRC) { |
677 |
bb_error_msg("data integrity error when decompressing"); |
bb_error_msg("CRC error"); |
678 |
} else { |
} else { |
679 |
i=RETVAL_OK; |
i = RETVAL_OK; |
680 |
} |
} |
681 |
} else if (i==RETVAL_UNEXPECTED_OUTPUT_EOF) { |
} else if (i == RETVAL_SHORT_WRITE) { |
682 |
bb_error_msg("compressed file ends unexpectedly"); |
bb_error_msg("short write"); |
683 |
} else { |
} else { |
684 |
bb_error_msg("decompression failed"); |
bb_error_msg("bunzip error %d", i); |
685 |
} |
} |
686 |
free(bd->dbuf); |
dealloc_bunzip(bd); |
|
free(bd); |
|
687 |
free(outbuf); |
free(outbuf); |
688 |
|
|
689 |
return i ? i : USE_DESKTOP(total_written) + 0; |
return i ? i : USE_DESKTOP(total_written) + 0; |
690 |
} |
} |
691 |
|
|
692 |
|
USE_DESKTOP(long long) int FAST_FUNC |
693 |
|
unpack_bz2_stream_prime(int src_fd, int dst_fd) |
694 |
|
{ |
695 |
|
unsigned char magic[2]; |
696 |
|
xread(src_fd, magic, 2); |
697 |
|
if (magic[0] != 'B' || magic[1] != 'Z') { |
698 |
|
bb_error_msg_and_die("invalid magic"); |
699 |
|
} |
700 |
|
return unpack_bz2_stream(src_fd, dst_fd); |
701 |
|
} |
702 |
|
|
703 |
#ifdef TESTING |
#ifdef TESTING |
704 |
|
|
705 |
static char * const bunzip_errors[]={NULL,"Bad file checksum","Not bzip data", |
static char *const bunzip_errors[] = { |
706 |
"Unexpected input EOF","Unexpected output EOF","Data error", |
NULL, "Bad file checksum", "Not bzip data", |
707 |
"Out of memory","Obsolete (pre 0.9.5) bzip format not supported."}; |
"Unexpected input EOF", "Unexpected output EOF", "Data error", |
708 |
|
"Out of memory", "Obsolete (pre 0.9.5) bzip format not supported" |
709 |
|
}; |
710 |
|
|
711 |
/* Dumb little test thing, decompress stdin to stdout */ |
/* Dumb little test thing, decompress stdin to stdout */ |
712 |
int main(int argc, char *argv[]) |
int main(int argc, char **argv) |
713 |
{ |
{ |
714 |
int i=uncompressStream(0,1); |
int i; |
715 |
char c; |
char c; |
716 |
|
|
717 |
if(i<0) fprintf(stderr,"%s\n", bunzip_errors[-i]); |
int i = unpack_bz2_stream_prime(0, 1); |
718 |
else if(read(0,&c,1)) fprintf(stderr,"Trailing garbage ignored\n"); |
if (i < 0) |
719 |
|
fprintf(stderr, "%s\n", bunzip_errors[-i]); |
720 |
|
else if (read(STDIN_FILENO, &c, 1)) |
721 |
|
fprintf(stderr, "Trailing garbage ignored\n"); |
722 |
return -i; |
return -i; |
723 |
} |
} |
724 |
#endif |
#endif |