Magellan Linux

Annotation of /trunk/mkinitrd-magellan/busybox/archival/bz/blocksort.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 984 - (hide annotations) (download)
Sun May 30 11:32:42 2010 UTC (13 years, 11 months ago) by niro
File MIME type: text/plain
File size: 24222 byte(s)
-updated to busybox-1.16.1 and enabled blkid/uuid support in default config
1 niro 816 /*
2     * bzip2 is written by Julian Seward <jseward@bzip.org>.
3     * Adapted for busybox by Denys Vlasenko <vda.linux@googlemail.com>.
4     * See README and LICENSE files in this directory for more information.
5     */
6    
7     /*-------------------------------------------------------------*/
8     /*--- Block sorting machinery ---*/
9     /*--- blocksort.c ---*/
10     /*-------------------------------------------------------------*/
11    
12     /* ------------------------------------------------------------------
13     This file is part of bzip2/libbzip2, a program and library for
14     lossless, block-sorting data compression.
15    
16     bzip2/libbzip2 version 1.0.4 of 20 December 2006
17     Copyright (C) 1996-2006 Julian Seward <jseward@bzip.org>
18    
19     Please read the WARNING, DISCLAIMER and PATENTS sections in the
20     README file.
21    
22     This program is released under the terms of the license contained
23     in the file LICENSE.
24     ------------------------------------------------------------------ */
25    
26     /* #include "bzlib_private.h" */
27    
28     #define mswap(zz1, zz2) \
29     { \
30     int32_t zztmp = zz1; \
31     zz1 = zz2; \
32     zz2 = zztmp; \
33     }
34    
35     static
36     /* No measurable speed gain with inlining */
37     /* ALWAYS_INLINE */
38     void mvswap(uint32_t* ptr, int32_t zzp1, int32_t zzp2, int32_t zzn)
39     {
40     while (zzn > 0) {
41     mswap(ptr[zzp1], ptr[zzp2]);
42     zzp1++;
43     zzp2++;
44     zzn--;
45     }
46     }
47    
48     static
49     ALWAYS_INLINE
50     int32_t mmin(int32_t a, int32_t b)
51     {
52     return (a < b) ? a : b;
53     }
54    
55    
56     /*---------------------------------------------*/
57     /*--- Fallback O(N log(N)^2) sorting ---*/
58     /*--- algorithm, for repetitive blocks ---*/
59     /*---------------------------------------------*/
60    
61     /*---------------------------------------------*/
62     static
63     inline
64     void fallbackSimpleSort(uint32_t* fmap,
65     uint32_t* eclass,
66     int32_t lo,
67     int32_t hi)
68     {
69     int32_t i, j, tmp;
70     uint32_t ec_tmp;
71    
72     if (lo == hi) return;
73    
74     if (hi - lo > 3) {
75     for (i = hi-4; i >= lo; i--) {
76     tmp = fmap[i];
77     ec_tmp = eclass[tmp];
78     for (j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4)
79     fmap[j-4] = fmap[j];
80     fmap[j-4] = tmp;
81     }
82     }
83    
84     for (i = hi-1; i >= lo; i--) {
85     tmp = fmap[i];
86     ec_tmp = eclass[tmp];
87     for (j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++)
88     fmap[j-1] = fmap[j];
89     fmap[j-1] = tmp;
90     }
91     }
92    
93    
94     /*---------------------------------------------*/
95     #define fpush(lz,hz) { \
96     stackLo[sp] = lz; \
97     stackHi[sp] = hz; \
98     sp++; \
99     }
100    
101     #define fpop(lz,hz) { \
102     sp--; \
103     lz = stackLo[sp]; \
104     hz = stackHi[sp]; \
105     }
106    
107     #define FALLBACK_QSORT_SMALL_THRESH 10
108     #define FALLBACK_QSORT_STACK_SIZE 100
109    
110     static
111     void fallbackQSort3(uint32_t* fmap,
112     uint32_t* eclass,
113     int32_t loSt,
114     int32_t hiSt)
115     {
116     int32_t unLo, unHi, ltLo, gtHi, n, m;
117     int32_t sp, lo, hi;
118     uint32_t med, r, r3;
119     int32_t stackLo[FALLBACK_QSORT_STACK_SIZE];
120     int32_t stackHi[FALLBACK_QSORT_STACK_SIZE];
121    
122     r = 0;
123    
124     sp = 0;
125     fpush(loSt, hiSt);
126    
127     while (sp > 0) {
128     AssertH(sp < FALLBACK_QSORT_STACK_SIZE - 1, 1004);
129    
130     fpop(lo, hi);
131     if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) {
132     fallbackSimpleSort(fmap, eclass, lo, hi);
133     continue;
134     }
135    
136     /* Random partitioning. Median of 3 sometimes fails to
137     * avoid bad cases. Median of 9 seems to help but
138     * looks rather expensive. This too seems to work but
139     * is cheaper. Guidance for the magic constants
140     * 7621 and 32768 is taken from Sedgewick's algorithms
141     * book, chapter 35.
142     */
143     r = ((r * 7621) + 1) % 32768;
144     r3 = r % 3;
145     if (r3 == 0)
146     med = eclass[fmap[lo]];
147     else if (r3 == 1)
148     med = eclass[fmap[(lo+hi)>>1]];
149     else
150     med = eclass[fmap[hi]];
151    
152     unLo = ltLo = lo;
153     unHi = gtHi = hi;
154    
155     while (1) {
156     while (1) {
157     if (unLo > unHi) break;
158     n = (int32_t)eclass[fmap[unLo]] - (int32_t)med;
159     if (n == 0) {
160     mswap(fmap[unLo], fmap[ltLo]);
161     ltLo++;
162     unLo++;
163     continue;
164     };
165     if (n > 0) break;
166     unLo++;
167     }
168     while (1) {
169     if (unLo > unHi) break;
170     n = (int32_t)eclass[fmap[unHi]] - (int32_t)med;
171     if (n == 0) {
172     mswap(fmap[unHi], fmap[gtHi]);
173     gtHi--; unHi--;
174     continue;
175     };
176     if (n < 0) break;
177     unHi--;
178     }
179     if (unLo > unHi) break;
180     mswap(fmap[unLo], fmap[unHi]); unLo++; unHi--;
181     }
182    
183     AssertD(unHi == unLo-1, "fallbackQSort3(2)");
184    
185     if (gtHi < ltLo) continue;
186    
187     n = mmin(ltLo-lo, unLo-ltLo); mvswap(fmap, lo, unLo-n, n);
188     m = mmin(hi-gtHi, gtHi-unHi); mvswap(fmap, unLo, hi-m+1, m);
189    
190     n = lo + unLo - ltLo - 1;
191     m = hi - (gtHi - unHi) + 1;
192    
193     if (n - lo > hi - m) {
194     fpush(lo, n);
195     fpush(m, hi);
196     } else {
197     fpush(m, hi);
198     fpush(lo, n);
199     }
200     }
201     }
202    
203     #undef fpush
204     #undef fpop
205     #undef FALLBACK_QSORT_SMALL_THRESH
206     #undef FALLBACK_QSORT_STACK_SIZE
207    
208    
209     /*---------------------------------------------*/
210     /* Pre:
211     * nblock > 0
212     * eclass exists for [0 .. nblock-1]
213     * ((uint8_t*)eclass) [0 .. nblock-1] holds block
214     * ptr exists for [0 .. nblock-1]
215     *
216     * Post:
217     * ((uint8_t*)eclass) [0 .. nblock-1] holds block
218     * All other areas of eclass destroyed
219     * fmap [0 .. nblock-1] holds sorted order
220     * bhtab[0 .. 2+(nblock/32)] destroyed
221     */
222    
223     #define SET_BH(zz) bhtab[(zz) >> 5] |= (1 << ((zz) & 31))
224     #define CLEAR_BH(zz) bhtab[(zz) >> 5] &= ~(1 << ((zz) & 31))
225     #define ISSET_BH(zz) (bhtab[(zz) >> 5] & (1 << ((zz) & 31)))
226     #define WORD_BH(zz) bhtab[(zz) >> 5]
227     #define UNALIGNED_BH(zz) ((zz) & 0x01f)
228    
229     static
230     void fallbackSort(uint32_t* fmap,
231     uint32_t* eclass,
232     uint32_t* bhtab,
233     int32_t nblock)
234     {
235     int32_t ftab[257];
236     int32_t ftabCopy[256];
237     int32_t H, i, j, k, l, r, cc, cc1;
238     int32_t nNotDone;
239     int32_t nBhtab;
240     uint8_t* eclass8 = (uint8_t*)eclass;
241    
242     /*
243     * Initial 1-char radix sort to generate
244     * initial fmap and initial BH bits.
245     */
246     for (i = 0; i < 257; i++) ftab[i] = 0;
247     for (i = 0; i < nblock; i++) ftab[eclass8[i]]++;
248     for (i = 0; i < 256; i++) ftabCopy[i] = ftab[i];
249    
250     j = ftab[0]; /* bbox: optimized */
251     for (i = 1; i < 257; i++) {
252     j += ftab[i];
253     ftab[i] = j;
254     }
255    
256     for (i = 0; i < nblock; i++) {
257     j = eclass8[i];
258     k = ftab[j] - 1;
259     ftab[j] = k;
260     fmap[k] = i;
261     }
262    
263     nBhtab = 2 + ((uint32_t)nblock / 32); /* bbox: unsigned div is easier */
264     for (i = 0; i < nBhtab; i++) bhtab[i] = 0;
265     for (i = 0; i < 256; i++) SET_BH(ftab[i]);
266    
267     /*
268     * Inductively refine the buckets. Kind-of an
269     * "exponential radix sort" (!), inspired by the
270     * Manber-Myers suffix array construction algorithm.
271     */
272    
273     /*-- set sentinel bits for block-end detection --*/
274     for (i = 0; i < 32; i++) {
275     SET_BH(nblock + 2*i);
276     CLEAR_BH(nblock + 2*i + 1);
277     }
278    
279     /*-- the log(N) loop --*/
280     H = 1;
281     while (1) {
282     j = 0;
283     for (i = 0; i < nblock; i++) {
284     if (ISSET_BH(i))
285     j = i;
286     k = fmap[i] - H;
287     if (k < 0)
288     k += nblock;
289     eclass[k] = j;
290     }
291    
292     nNotDone = 0;
293     r = -1;
294     while (1) {
295    
296     /*-- find the next non-singleton bucket --*/
297     k = r + 1;
298     while (ISSET_BH(k) && UNALIGNED_BH(k))
299     k++;
300     if (ISSET_BH(k)) {
301     while (WORD_BH(k) == 0xffffffff) k += 32;
302     while (ISSET_BH(k)) k++;
303     }
304     l = k - 1;
305     if (l >= nblock)
306     break;
307     while (!ISSET_BH(k) && UNALIGNED_BH(k))
308     k++;
309     if (!ISSET_BH(k)) {
310     while (WORD_BH(k) == 0x00000000) k += 32;
311     while (!ISSET_BH(k)) k++;
312     }
313     r = k - 1;
314     if (r >= nblock)
315     break;
316    
317     /*-- now [l, r] bracket current bucket --*/
318     if (r > l) {
319     nNotDone += (r - l + 1);
320     fallbackQSort3(fmap, eclass, l, r);
321    
322     /*-- scan bucket and generate header bits-- */
323     cc = -1;
324     for (i = l; i <= r; i++) {
325     cc1 = eclass[fmap[i]];
326     if (cc != cc1) {
327     SET_BH(i);
328     cc = cc1;
329     };
330     }
331     }
332     }
333    
334     H *= 2;
335     if (H > nblock || nNotDone == 0)
336     break;
337     }
338    
339     /*
340     * Reconstruct the original block in
341     * eclass8 [0 .. nblock-1], since the
342     * previous phase destroyed it.
343     */
344     j = 0;
345     for (i = 0; i < nblock; i++) {
346     while (ftabCopy[j] == 0)
347     j++;
348     ftabCopy[j]--;
349     eclass8[fmap[i]] = (uint8_t)j;
350     }
351     AssertH(j < 256, 1005);
352     }
353    
354     #undef SET_BH
355     #undef CLEAR_BH
356     #undef ISSET_BH
357     #undef WORD_BH
358     #undef UNALIGNED_BH
359    
360    
361     /*---------------------------------------------*/
362     /*--- The main, O(N^2 log(N)) sorting ---*/
363     /*--- algorithm. Faster for "normal" ---*/
364     /*--- non-repetitive blocks. ---*/
365     /*---------------------------------------------*/
366    
367     /*---------------------------------------------*/
368     static
369     NOINLINE
370     int mainGtU(
371     uint32_t i1,
372     uint32_t i2,
373     uint8_t* block,
374     uint16_t* quadrant,
375     uint32_t nblock,
376     int32_t* budget)
377     {
378     int32_t k;
379     uint8_t c1, c2;
380     uint16_t s1, s2;
381    
382     /* Loop unrolling here is actually very useful
383     * (generated code is much simpler),
384     * code size increase is only 270 bytes (i386)
385     * but speeds up compression 10% overall
386     */
387    
388     #if CONFIG_BZIP2_FEATURE_SPEED >= 1
389    
390     #define TIMES_8(code) \
391     code; code; code; code; \
392     code; code; code; code;
393     #define TIMES_12(code) \
394     code; code; code; code; \
395     code; code; code; code; \
396     code; code; code; code;
397    
398     #else
399    
400     #define TIMES_8(code) \
401     { \
402     int nn = 8; \
403     do { \
404     code; \
405     } while (--nn); \
406     }
407     #define TIMES_12(code) \
408     { \
409     int nn = 12; \
410     do { \
411     code; \
412     } while (--nn); \
413     }
414    
415     #endif
416    
417     AssertD(i1 != i2, "mainGtU");
418     TIMES_12(
419     c1 = block[i1]; c2 = block[i2];
420     if (c1 != c2) return (c1 > c2);
421     i1++; i2++;
422     )
423    
424     k = nblock + 8;
425    
426     do {
427     TIMES_8(
428     c1 = block[i1]; c2 = block[i2];
429     if (c1 != c2) return (c1 > c2);
430     s1 = quadrant[i1]; s2 = quadrant[i2];
431     if (s1 != s2) return (s1 > s2);
432     i1++; i2++;
433     )
434    
435     if (i1 >= nblock) i1 -= nblock;
436     if (i2 >= nblock) i2 -= nblock;
437    
438     (*budget)--;
439     k -= 8;
440     } while (k >= 0);
441    
442     return False;
443     }
444     #undef TIMES_8
445     #undef TIMES_12
446    
447     /*---------------------------------------------*/
448     /*
449     * Knuth's increments seem to work better
450     * than Incerpi-Sedgewick here. Possibly
451     * because the number of elems to sort is
452     * usually small, typically <= 20.
453     */
454     static
455     const int32_t incs[14] = {
456     1, 4, 13, 40, 121, 364, 1093, 3280,
457     9841, 29524, 88573, 265720,
458     797161, 2391484
459     };
460    
461     static
462     void mainSimpleSort(uint32_t* ptr,
463     uint8_t* block,
464     uint16_t* quadrant,
465     int32_t nblock,
466     int32_t lo,
467     int32_t hi,
468     int32_t d,
469     int32_t* budget)
470     {
471     int32_t i, j, h, bigN, hp;
472     uint32_t v;
473    
474     bigN = hi - lo + 1;
475     if (bigN < 2) return;
476    
477     hp = 0;
478     while (incs[hp] < bigN) hp++;
479     hp--;
480    
481     for (; hp >= 0; hp--) {
482     h = incs[hp];
483    
484     i = lo + h;
485     while (1) {
486     /*-- copy 1 --*/
487     if (i > hi) break;
488     v = ptr[i];
489     j = i;
490     while (mainGtU(ptr[j-h]+d, v+d, block, quadrant, nblock, budget)) {
491     ptr[j] = ptr[j-h];
492     j = j - h;
493     if (j <= (lo + h - 1)) break;
494     }
495     ptr[j] = v;
496     i++;
497    
498     /* 1.5% overall speedup, +290 bytes */
499     #if CONFIG_BZIP2_FEATURE_SPEED >= 3
500     /*-- copy 2 --*/
501     if (i > hi) break;
502     v = ptr[i];
503     j = i;
504     while (mainGtU(ptr[j-h]+d, v+d, block, quadrant, nblock, budget)) {
505     ptr[j] = ptr[j-h];
506     j = j - h;
507     if (j <= (lo + h - 1)) break;
508     }
509     ptr[j] = v;
510     i++;
511    
512     /*-- copy 3 --*/
513     if (i > hi) break;
514     v = ptr[i];
515     j = i;
516     while (mainGtU(ptr[j-h]+d, v+d, block, quadrant, nblock, budget)) {
517     ptr[j] = ptr[j-h];
518     j = j - h;
519     if (j <= (lo + h - 1)) break;
520     }
521     ptr[j] = v;
522     i++;
523     #endif
524     if (*budget < 0) return;
525     }
526     }
527     }
528    
529    
530     /*---------------------------------------------*/
531     /*
532     * The following is an implementation of
533     * an elegant 3-way quicksort for strings,
534     * described in a paper "Fast Algorithms for
535     * Sorting and Searching Strings", by Robert
536     * Sedgewick and Jon L. Bentley.
537     */
538    
539     static
540     ALWAYS_INLINE
541     uint8_t mmed3(uint8_t a, uint8_t b, uint8_t c)
542     {
543     uint8_t t;
544     if (a > b) {
545     t = a;
546     a = b;
547     b = t;
548     };
549     /* here b >= a */
550     if (b > c) {
551     b = c;
552     if (a > b)
553     b = a;
554     }
555     return b;
556     }
557    
558     #define mpush(lz,hz,dz) \
559     { \
560     stackLo[sp] = lz; \
561     stackHi[sp] = hz; \
562     stackD [sp] = dz; \
563     sp++; \
564     }
565    
566     #define mpop(lz,hz,dz) \
567     { \
568     sp--; \
569     lz = stackLo[sp]; \
570     hz = stackHi[sp]; \
571     dz = stackD [sp]; \
572     }
573    
574     #define mnextsize(az) (nextHi[az] - nextLo[az])
575    
576     #define mnextswap(az,bz) \
577     { \
578     int32_t tz; \
579     tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \
580     tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \
581     tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; \
582     }
583    
584     #define MAIN_QSORT_SMALL_THRESH 20
585     #define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT)
586     #define MAIN_QSORT_STACK_SIZE 100
587    
588 niro 984 static NOINLINE
589 niro 816 void mainQSort3(uint32_t* ptr,
590     uint8_t* block,
591     uint16_t* quadrant,
592     int32_t nblock,
593     int32_t loSt,
594     int32_t hiSt,
595     int32_t dSt,
596     int32_t* budget)
597     {
598     int32_t unLo, unHi, ltLo, gtHi, n, m, med;
599     int32_t sp, lo, hi, d;
600    
601     int32_t stackLo[MAIN_QSORT_STACK_SIZE];
602     int32_t stackHi[MAIN_QSORT_STACK_SIZE];
603     int32_t stackD [MAIN_QSORT_STACK_SIZE];
604    
605     int32_t nextLo[3];
606     int32_t nextHi[3];
607     int32_t nextD [3];
608    
609     sp = 0;
610     mpush(loSt, hiSt, dSt);
611    
612     while (sp > 0) {
613     AssertH(sp < MAIN_QSORT_STACK_SIZE - 2, 1001);
614    
615     mpop(lo, hi, d);
616     if (hi - lo < MAIN_QSORT_SMALL_THRESH
617     || d > MAIN_QSORT_DEPTH_THRESH
618     ) {
619     mainSimpleSort(ptr, block, quadrant, nblock, lo, hi, d, budget);
620     if (*budget < 0)
621     return;
622     continue;
623     }
624     med = (int32_t) mmed3(block[ptr[lo ] + d],
625     block[ptr[hi ] + d],
626     block[ptr[(lo+hi) >> 1] + d]);
627    
628     unLo = ltLo = lo;
629     unHi = gtHi = hi;
630    
631     while (1) {
632     while (1) {
633     if (unLo > unHi)
634     break;
635     n = ((int32_t)block[ptr[unLo]+d]) - med;
636     if (n == 0) {
637     mswap(ptr[unLo], ptr[ltLo]);
638     ltLo++;
639     unLo++;
640     continue;
641     };
642     if (n > 0) break;
643     unLo++;
644     }
645     while (1) {
646     if (unLo > unHi)
647     break;
648     n = ((int32_t)block[ptr[unHi]+d]) - med;
649     if (n == 0) {
650     mswap(ptr[unHi], ptr[gtHi]);
651     gtHi--;
652     unHi--;
653     continue;
654     };
655     if (n < 0) break;
656     unHi--;
657     }
658     if (unLo > unHi)
659     break;
660     mswap(ptr[unLo], ptr[unHi]);
661     unLo++;
662     unHi--;
663     }
664    
665     AssertD(unHi == unLo-1, "mainQSort3(2)");
666    
667     if (gtHi < ltLo) {
668     mpush(lo, hi, d + 1);
669     continue;
670     }
671    
672     n = mmin(ltLo-lo, unLo-ltLo); mvswap(ptr, lo, unLo-n, n);
673     m = mmin(hi-gtHi, gtHi-unHi); mvswap(ptr, unLo, hi-m+1, m);
674    
675     n = lo + unLo - ltLo - 1;
676     m = hi - (gtHi - unHi) + 1;
677    
678     nextLo[0] = lo; nextHi[0] = n; nextD[0] = d;
679     nextLo[1] = m; nextHi[1] = hi; nextD[1] = d;
680     nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1;
681    
682     if (mnextsize(0) < mnextsize(1)) mnextswap(0, 1);
683     if (mnextsize(1) < mnextsize(2)) mnextswap(1, 2);
684     if (mnextsize(0) < mnextsize(1)) mnextswap(0, 1);
685    
686     AssertD (mnextsize(0) >= mnextsize(1), "mainQSort3(8)");
687     AssertD (mnextsize(1) >= mnextsize(2), "mainQSort3(9)");
688    
689     mpush(nextLo[0], nextHi[0], nextD[0]);
690     mpush(nextLo[1], nextHi[1], nextD[1]);
691     mpush(nextLo[2], nextHi[2], nextD[2]);
692     }
693     }
694    
695     #undef mpush
696     #undef mpop
697     #undef mnextsize
698     #undef mnextswap
699     #undef MAIN_QSORT_SMALL_THRESH
700     #undef MAIN_QSORT_DEPTH_THRESH
701     #undef MAIN_QSORT_STACK_SIZE
702    
703    
704     /*---------------------------------------------*/
705     /* Pre:
706     * nblock > N_OVERSHOOT
707     * block32 exists for [0 .. nblock-1 +N_OVERSHOOT]
708     * ((uint8_t*)block32) [0 .. nblock-1] holds block
709     * ptr exists for [0 .. nblock-1]
710     *
711     * Post:
712     * ((uint8_t*)block32) [0 .. nblock-1] holds block
713     * All other areas of block32 destroyed
714     * ftab[0 .. 65536] destroyed
715     * ptr [0 .. nblock-1] holds sorted order
716     * if (*budget < 0), sorting was abandoned
717     */
718    
719     #define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8])
720     #define SETMASK (1 << 21)
721     #define CLEARMASK (~(SETMASK))
722    
723     static NOINLINE
724     void mainSort(EState* state,
725     uint32_t* ptr,
726     uint8_t* block,
727     uint16_t* quadrant,
728     uint32_t* ftab,
729     int32_t nblock,
730     int32_t* budget)
731     {
732     int32_t i, j, k, ss, sb;
733     uint8_t c1;
734     int32_t numQSorted;
735     uint16_t s;
736     Bool bigDone[256];
737     /* bbox: moved to EState to save stack
738     int32_t runningOrder[256];
739     int32_t copyStart[256];
740     int32_t copyEnd [256];
741     */
742     #define runningOrder (state->mainSort__runningOrder)
743     #define copyStart (state->mainSort__copyStart)
744     #define copyEnd (state->mainSort__copyEnd)
745    
746     /*-- set up the 2-byte frequency table --*/
747     /* was: for (i = 65536; i >= 0; i--) ftab[i] = 0; */
748     memset(ftab, 0, 65537 * sizeof(ftab[0]));
749    
750     j = block[0] << 8;
751     i = nblock - 1;
752     /* 3%, +300 bytes */
753     #if CONFIG_BZIP2_FEATURE_SPEED >= 2
754     for (; i >= 3; i -= 4) {
755     quadrant[i] = 0;
756     j = (j >> 8) | (((uint16_t)block[i]) << 8);
757     ftab[j]++;
758     quadrant[i-1] = 0;
759     j = (j >> 8) | (((uint16_t)block[i-1]) << 8);
760     ftab[j]++;
761     quadrant[i-2] = 0;
762     j = (j >> 8) | (((uint16_t)block[i-2]) << 8);
763     ftab[j]++;
764     quadrant[i-3] = 0;
765     j = (j >> 8) | (((uint16_t)block[i-3]) << 8);
766     ftab[j]++;
767     }
768     #endif
769     for (; i >= 0; i--) {
770     quadrant[i] = 0;
771     j = (j >> 8) | (((uint16_t)block[i]) << 8);
772     ftab[j]++;
773     }
774    
775     /*-- (emphasises close relationship of block & quadrant) --*/
776     for (i = 0; i < BZ_N_OVERSHOOT; i++) {
777     block [nblock+i] = block[i];
778     quadrant[nblock+i] = 0;
779     }
780    
781     /*-- Complete the initial radix sort --*/
782     j = ftab[0]; /* bbox: optimized */
783     for (i = 1; i <= 65536; i++) {
784     j += ftab[i];
785     ftab[i] = j;
786     }
787    
788     s = block[0] << 8;
789     i = nblock - 1;
790     #if CONFIG_BZIP2_FEATURE_SPEED >= 2
791     for (; i >= 3; i -= 4) {
792     s = (s >> 8) | (block[i] << 8);
793     j = ftab[s] - 1;
794     ftab[s] = j;
795     ptr[j] = i;
796     s = (s >> 8) | (block[i-1] << 8);
797     j = ftab[s] - 1;
798     ftab[s] = j;
799     ptr[j] = i-1;
800     s = (s >> 8) | (block[i-2] << 8);
801     j = ftab[s] - 1;
802     ftab[s] = j;
803     ptr[j] = i-2;
804     s = (s >> 8) | (block[i-3] << 8);
805     j = ftab[s] - 1;
806     ftab[s] = j;
807     ptr[j] = i-3;
808     }
809     #endif
810     for (; i >= 0; i--) {
811     s = (s >> 8) | (block[i] << 8);
812     j = ftab[s] - 1;
813     ftab[s] = j;
814     ptr[j] = i;
815     }
816    
817     /*
818     * Now ftab contains the first loc of every small bucket.
819     * Calculate the running order, from smallest to largest
820     * big bucket.
821     */
822     for (i = 0; i <= 255; i++) {
823     bigDone [i] = False;
824     runningOrder[i] = i;
825     }
826    
827     {
828     int32_t vv;
829     /* bbox: was: int32_t h = 1; */
830     /* do h = 3 * h + 1; while (h <= 256); */
831     uint32_t h = 364;
832    
833     do {
834     /*h = h / 3;*/
835     h = (h * 171) >> 9; /* bbox: fast h/3 */
836     for (i = h; i <= 255; i++) {
837     vv = runningOrder[i];
838     j = i;
839     while (BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv)) {
840     runningOrder[j] = runningOrder[j-h];
841     j = j - h;
842     if (j <= (h - 1))
843     goto zero;
844     }
845     zero:
846     runningOrder[j] = vv;
847     }
848     } while (h != 1);
849     }
850    
851     /*
852     * The main sorting loop.
853     */
854    
855     numQSorted = 0;
856    
857     for (i = 0; i <= 255; i++) {
858    
859     /*
860     * Process big buckets, starting with the least full.
861     * Basically this is a 3-step process in which we call
862     * mainQSort3 to sort the small buckets [ss, j], but
863     * also make a big effort to avoid the calls if we can.
864     */
865     ss = runningOrder[i];
866    
867     /*
868     * Step 1:
869     * Complete the big bucket [ss] by quicksorting
870     * any unsorted small buckets [ss, j], for j != ss.
871     * Hopefully previous pointer-scanning phases have already
872     * completed many of the small buckets [ss, j], so
873     * we don't have to sort them at all.
874     */
875     for (j = 0; j <= 255; j++) {
876     if (j != ss) {
877     sb = (ss << 8) + j;
878     if (!(ftab[sb] & SETMASK)) {
879     int32_t lo = ftab[sb] & CLEARMASK;
880     int32_t hi = (ftab[sb+1] & CLEARMASK) - 1;
881     if (hi > lo) {
882     mainQSort3(
883     ptr, block, quadrant, nblock,
884     lo, hi, BZ_N_RADIX, budget
885     );
886     if (*budget < 0) return;
887     numQSorted += (hi - lo + 1);
888     }
889     }
890     ftab[sb] |= SETMASK;
891     }
892     }
893    
894     AssertH(!bigDone[ss], 1006);
895    
896     /*
897     * Step 2:
898     * Now scan this big bucket [ss] so as to synthesise the
899     * sorted order for small buckets [t, ss] for all t,
900     * including, magically, the bucket [ss,ss] too.
901     * This will avoid doing Real Work in subsequent Step 1's.
902     */
903     {
904     for (j = 0; j <= 255; j++) {
905     copyStart[j] = ftab[(j << 8) + ss] & CLEARMASK;
906     copyEnd [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1;
907     }
908     for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) {
909     k = ptr[j] - 1;
910     if (k < 0)
911     k += nblock;
912     c1 = block[k];
913     if (!bigDone[c1])
914     ptr[copyStart[c1]++] = k;
915     }
916     for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) {
917     k = ptr[j]-1;
918     if (k < 0)
919     k += nblock;
920     c1 = block[k];
921     if (!bigDone[c1])
922     ptr[copyEnd[c1]--] = k;
923     }
924     }
925    
926     /* Extremely rare case missing in bzip2-1.0.0 and 1.0.1.
927     * Necessity for this case is demonstrated by compressing
928     * a sequence of approximately 48.5 million of character
929     * 251; 1.0.0/1.0.1 will then die here. */
930     AssertH((copyStart[ss]-1 == copyEnd[ss]) \
931     || (copyStart[ss] == 0 && copyEnd[ss] == nblock-1), 1007);
932    
933     for (j = 0; j <= 255; j++)
934     ftab[(j << 8) + ss] |= SETMASK;
935    
936     /*
937     * Step 3:
938     * The [ss] big bucket is now done. Record this fact,
939     * and update the quadrant descriptors. Remember to
940     * update quadrants in the overshoot area too, if
941     * necessary. The "if (i < 255)" test merely skips
942     * this updating for the last bucket processed, since
943     * updating for the last bucket is pointless.
944     *
945     * The quadrant array provides a way to incrementally
946     * cache sort orderings, as they appear, so as to
947     * make subsequent comparisons in fullGtU() complete
948     * faster. For repetitive blocks this makes a big
949     * difference (but not big enough to be able to avoid
950     * the fallback sorting mechanism, exponential radix sort).
951     *
952     * The precise meaning is: at all times:
953     *
954     * for 0 <= i < nblock and 0 <= j <= nblock
955     *
956     * if block[i] != block[j],
957     *
958     * then the relative values of quadrant[i] and
959     * quadrant[j] are meaningless.
960     *
961     * else {
962     * if quadrant[i] < quadrant[j]
963     * then the string starting at i lexicographically
964     * precedes the string starting at j
965     *
966     * else if quadrant[i] > quadrant[j]
967     * then the string starting at j lexicographically
968     * precedes the string starting at i
969     *
970     * else
971     * the relative ordering of the strings starting
972     * at i and j has not yet been determined.
973     * }
974     */
975     bigDone[ss] = True;
976    
977     if (i < 255) {
978     int32_t bbStart = ftab[ss << 8] & CLEARMASK;
979     int32_t bbSize = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart;
980     int32_t shifts = 0;
981    
982     while ((bbSize >> shifts) > 65534) shifts++;
983    
984     for (j = bbSize-1; j >= 0; j--) {
985     int32_t a2update = ptr[bbStart + j];
986     uint16_t qVal = (uint16_t)(j >> shifts);
987     quadrant[a2update] = qVal;
988     if (a2update < BZ_N_OVERSHOOT)
989     quadrant[a2update + nblock] = qVal;
990     }
991     AssertH(((bbSize-1) >> shifts) <= 65535, 1002);
992     }
993     }
994     #undef runningOrder
995     #undef copyStart
996     #undef copyEnd
997     }
998    
999     #undef BIGFREQ
1000     #undef SETMASK
1001     #undef CLEARMASK
1002    
1003    
1004     /*---------------------------------------------*/
1005     /* Pre:
1006     * nblock > 0
1007     * arr2 exists for [0 .. nblock-1 +N_OVERSHOOT]
1008     * ((uint8_t*)arr2)[0 .. nblock-1] holds block
1009     * arr1 exists for [0 .. nblock-1]
1010     *
1011     * Post:
1012     * ((uint8_t*)arr2) [0 .. nblock-1] holds block
1013     * All other areas of block destroyed
1014     * ftab[0 .. 65536] destroyed
1015     * arr1[0 .. nblock-1] holds sorted order
1016     */
1017     static NOINLINE
1018     void BZ2_blockSort(EState* s)
1019     {
1020     /* In original bzip2 1.0.4, it's a parameter, but 30
1021     * (which was the default) should work ok. */
1022     enum { wfact = 30 };
1023    
1024     uint32_t* ptr = s->ptr;
1025     uint8_t* block = s->block;
1026     uint32_t* ftab = s->ftab;
1027     int32_t nblock = s->nblock;
1028     uint16_t* quadrant;
1029     int32_t budget;
1030     int32_t i;
1031    
1032     if (nblock < 10000) {
1033     fallbackSort(s->arr1, s->arr2, ftab, nblock);
1034     } else {
1035     /* Calculate the location for quadrant, remembering to get
1036     * the alignment right. Assumes that &(block[0]) is at least
1037     * 2-byte aligned -- this should be ok since block is really
1038     * the first section of arr2.
1039     */
1040     i = nblock + BZ_N_OVERSHOOT;
1041     if (i & 1) i++;
1042     quadrant = (uint16_t*)(&(block[i]));
1043    
1044     /* (wfact-1) / 3 puts the default-factor-30
1045     * transition point at very roughly the same place as
1046     * with v0.1 and v0.9.0.
1047     * Not that it particularly matters any more, since the
1048     * resulting compressed stream is now the same regardless
1049     * of whether or not we use the main sort or fallback sort.
1050     */
1051     budget = nblock * ((wfact-1) / 3);
1052    
1053     mainSort(s, ptr, block, quadrant, ftab, nblock, &budget);
1054     if (budget < 0) {
1055     fallbackSort(s->arr1, s->arr2, ftab, nblock);
1056     }
1057     }
1058    
1059     s->origPtr = -1;
1060     for (i = 0; i < s->nblock; i++)
1061     if (ptr[i] == 0) {
1062     s->origPtr = i;
1063     break;
1064     };
1065    
1066     AssertH(s->origPtr != -1, 1003);
1067     }
1068    
1069    
1070     /*-------------------------------------------------------------*/
1071     /*--- end blocksort.c ---*/
1072     /*-------------------------------------------------------------*/