Magellan Linux

Contents of /trunk/glibc/patches/glibc-2.18-strcoll-CVE-2012-4412+4424.patch

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2283 - (show annotations) (download)
Mon Sep 16 11:57:11 2013 UTC (10 years, 7 months ago) by niro
File size: 25987 byte(s)
-glibc-2.18 CVEs and fixes
1 diff --git a/string/strcoll_l.c b/string/strcoll_l.c
2 index ecda08f..bb34a72 100644
3 --- a/string/strcoll_l.c
4 +++ b/string/strcoll_l.c
5 @@ -41,11 +41,434 @@
6
7 #include "../locale/localeinfo.h"
8
9 +/* Track status while looking for sequences in a string. */
10 +typedef struct
11 +{
12 + int len; /* Length of the current sequence. */
13 + size_t val; /* Position of the sequence relative to the
14 + previous non-ignored sequence. */
15 + size_t idxnow; /* Current index in sequences. */
16 + size_t idxmax; /* Maximum index in sequences. */
17 + size_t idxcnt; /* Current count of indices. */
18 + size_t backw; /* Current Backward sequence index. */
19 + size_t backw_stop; /* Index where the backward sequences stop. */
20 + const USTRING_TYPE *us; /* The string. */
21 + int32_t *idxarr; /* Array to cache weight indices. */
22 + unsigned char *rulearr; /* Array to cache rules. */
23 + unsigned char rule; /* Saved rule for the first sequence. */
24 + int32_t idx; /* Index to weight of the current sequence. */
25 + int32_t save_idx; /* Save looked up index of a forward
26 + sequence after the last backward
27 + sequence. */
28 + const USTRING_TYPE *back_us; /* Beginning of the backward sequence. */
29 +} coll_seq;
30 +
31 +/* Get next sequence. The weight indices are cached, so we don't need to
32 + traverse the string. */
33 +static void
34 +get_next_seq_cached (coll_seq *seq, int nrules, int pass,
35 + const unsigned char *rulesets,
36 + const USTRING_TYPE *weights)
37 +{
38 + size_t val = seq->val = 0;
39 + int len = seq->len;
40 + size_t backw_stop = seq->backw_stop;
41 + size_t backw = seq->backw;
42 + size_t idxcnt = seq->idxcnt;
43 + size_t idxmax = seq->idxmax;
44 + size_t idxnow = seq->idxnow;
45 + unsigned char *rulearr = seq->rulearr;
46 + int32_t *idxarr = seq->idxarr;
47 +
48 + while (len == 0)
49 + {
50 + ++val;
51 + if (backw_stop != ~0ul)
52 + {
53 + /* There is something pushed. */
54 + if (backw == backw_stop)
55 + {
56 + /* The last pushed character was handled. Continue
57 + with forward characters. */
58 + if (idxcnt < idxmax)
59 + {
60 + idxnow = idxcnt;
61 + backw_stop = ~0ul;
62 + }
63 + else
64 + {
65 + /* Nothing any more. The backward sequence
66 + ended with the last sequence in the string. */
67 + idxnow = ~0ul;
68 + break;
69 + }
70 + }
71 + else
72 + idxnow = --backw;
73 + }
74 + else
75 + {
76 + backw_stop = idxcnt;
77 +
78 + while (idxcnt < idxmax)
79 + {
80 + if ((rulesets[rulearr[idxcnt] * nrules + pass]
81 + & sort_backward) == 0)
82 + /* No more backward characters to push. */
83 + break;
84 + ++idxcnt;
85 + }
86 +
87 + if (backw_stop == idxcnt)
88 + {
89 + /* No sequence at all or just one. */
90 + if (idxcnt == idxmax)
91 + /* Note that LEN is still zero. */
92 + break;
93 +
94 + backw_stop = ~0ul;
95 + idxnow = idxcnt++;
96 + }
97 + else
98 + /* We pushed backward sequences. */
99 + idxnow = backw = idxcnt - 1;
100 + }
101 + len = weights[idxarr[idxnow]++];
102 + }
103 +
104 + /* Update the structure. */
105 + seq->val = val;
106 + seq->len = len;
107 + seq->backw_stop = backw_stop;
108 + seq->backw = backw;
109 + seq->idxcnt = idxcnt;
110 + seq->idxnow = idxnow;
111 +}
112 +
113 +/* Get next sequence. Traverse the string as required. */
114 +static void
115 +get_next_seq (coll_seq *seq, int nrules, const unsigned char *rulesets,
116 + const USTRING_TYPE *weights, const int32_t *table,
117 + const USTRING_TYPE *extra, const int32_t *indirect)
118 +{
119 +#include WEIGHT_H
120 + size_t val = seq->val = 0;
121 + int len = seq->len;
122 + size_t backw_stop = seq->backw_stop;
123 + size_t backw = seq->backw;
124 + size_t idxcnt = seq->idxcnt;
125 + size_t idxmax = seq->idxmax;
126 + size_t idxnow = seq->idxnow;
127 + unsigned char *rulearr = seq->rulearr;
128 + int32_t *idxarr = seq->idxarr;
129 + const USTRING_TYPE *us = seq->us;
130 +
131 + while (len == 0)
132 + {
133 + ++val;
134 + if (backw_stop != ~0ul)
135 + {
136 + /* There is something pushed. */
137 + if (backw == backw_stop)
138 + {
139 + /* The last pushed character was handled. Continue
140 + with forward characters. */
141 + if (idxcnt < idxmax)
142 + {
143 + idxnow = idxcnt;
144 + backw_stop = ~0ul;
145 + }
146 + else
147 + /* Nothing any more. The backward sequence ended with
148 + the last sequence in the string. Note that LEN
149 + is still zero. */
150 + break;
151 + }
152 + else
153 + idxnow = --backw;
154 + }
155 + else
156 + {
157 + backw_stop = idxmax;
158 +
159 + while (*us != L('\0'))
160 + {
161 + int32_t tmp = findidx (&us, -1);
162 + rulearr[idxmax] = tmp >> 24;
163 + idxarr[idxmax] = tmp & 0xffffff;
164 + idxcnt = idxmax++;
165 +
166 + if ((rulesets[rulearr[idxcnt] * nrules]
167 + & sort_backward) == 0)
168 + /* No more backward characters to push. */
169 + break;
170 + ++idxcnt;
171 + }
172 +
173 + if (backw_stop >= idxcnt)
174 + {
175 + /* No sequence at all or just one. */
176 + if (idxcnt == idxmax || backw_stop > idxcnt)
177 + /* Note that LEN is still zero. */
178 + break;
179 +
180 + backw_stop = ~0ul;
181 + idxnow = idxcnt;
182 + }
183 + else
184 + /* We pushed backward sequences. */
185 + idxnow = backw = idxcnt - 1;
186 + }
187 + len = weights[idxarr[idxnow]++];
188 + }
189 +
190 + /* Update the structure. */
191 + seq->val = val;
192 + seq->len = len;
193 + seq->backw_stop = backw_stop;
194 + seq->backw = backw;
195 + seq->idxcnt = idxcnt;
196 + seq->idxmax = idxmax;
197 + seq->idxnow = idxnow;
198 + seq->us = us;
199 +}
200 +
201 +/* Get next sequence. Traverse the string as required. This function does not
202 + set or use any index or rule cache. */
203 +static void
204 +get_next_seq_nocache (coll_seq *seq, int nrules, const unsigned char *rulesets,
205 + const USTRING_TYPE *weights, const int32_t *table,
206 + const USTRING_TYPE *extra, const int32_t *indirect,
207 + int pass)
208 +{
209 +#include WEIGHT_H
210 + size_t val = seq->val = 0;
211 + int len = seq->len;
212 + size_t backw_stop = seq->backw_stop;
213 + size_t backw = seq->backw;
214 + size_t idxcnt = seq->idxcnt;
215 + size_t idxmax = seq->idxmax;
216 + int32_t idx = seq->idx;
217 + const USTRING_TYPE *us = seq->us;
218 +
219 + while (len == 0)
220 + {
221 + ++val;
222 + if (backw_stop != ~0ul)
223 + {
224 + /* There is something pushed. */
225 + if (backw == backw_stop)
226 + {
227 + /* The last pushed character was handled. Continue
228 + with forward characters. */
229 + if (idxcnt < idxmax)
230 + {
231 + idx = seq->save_idx;
232 + backw_stop = ~0ul;
233 + }
234 + else
235 + {
236 + /* Nothing anymore. The backward sequence ended with
237 + the last sequence in the string. Note that len is
238 + still zero. */
239 + idx = 0;
240 + break;
241 + }
242 + }
243 + else
244 + {
245 + /* XXX Traverse BACKW sequences from the beginning of
246 + BACKW_STOP to get the next sequence. Is ther a quicker way
247 + to do this? */
248 + size_t i = backw_stop;
249 + us = seq->back_us;
250 + while (i < backw)
251 + {
252 + int32_t tmp = findidx (&us, -1);
253 + idx = tmp & 0xffffff;
254 + i++;
255 + }
256 + --backw;
257 + us = seq->us;
258 + }
259 + }
260 + else
261 + {
262 + backw_stop = idxmax;
263 + int32_t prev_idx = idx;
264 +
265 + while (*us != L('\0'))
266 + {
267 + int32_t tmp = findidx (&us, -1);
268 + unsigned char rule = tmp >> 24;
269 + prev_idx = idx;
270 + idx = tmp & 0xffffff;
271 + idxcnt = idxmax++;
272 +
273 + /* Save the rule for the first sequence. */
274 + if (__glibc_unlikely (idxcnt == 0))
275 + seq->rule = rule;
276 +
277 + if ((rulesets[rule * nrules + pass]
278 + & sort_backward) == 0)
279 + /* No more backward characters to push. */
280 + break;
281 + ++idxcnt;
282 + }
283 +
284 + if (backw_stop >= idxcnt)
285 + {
286 + /* No sequence at all or just one. */
287 + if (idxcnt == idxmax || backw_stop > idxcnt)
288 + /* Note that len is still zero. */
289 + break;
290 +
291 + backw_stop = ~0ul;
292 + }
293 + else
294 + {
295 + /* We pushed backward sequences. If the stream ended with the
296 + backward sequence, then we process the last sequence we
297 + found. Otherwise we process the sequence before the last
298 + one since the last one was a forward sequence. */
299 + seq->back_us = seq->us;
300 + seq->us = us;
301 + backw = idxcnt;
302 + if (idxmax > idxcnt)
303 + {
304 + backw--;
305 + seq->save_idx = idx;
306 + idx = prev_idx;
307 + }
308 + if (backw > backw_stop)
309 + backw--;
310 + }
311 + }
312 +
313 + len = weights[idx++];
314 + /* Skip over indices of previous levels. */
315 + for (int i = 0; i < pass; i++)
316 + {
317 + idx += len;
318 + len = weights[idx];
319 + idx++;
320 + }
321 + }
322 +
323 + /* Update the structure. */
324 + seq->val = val;
325 + seq->len = len;
326 + seq->backw_stop = backw_stop;
327 + seq->backw = backw;
328 + seq->idxcnt = idxcnt;
329 + seq->idxmax = idxmax;
330 + seq->us = us;
331 + seq->idx = idx;
332 +}
333 +
334 +/* Compare two sequences. This version does not use the index and rules
335 + cache. */
336 +static int
337 +do_compare_nocache (coll_seq *seq1, coll_seq *seq2, int position,
338 + const USTRING_TYPE *weights)
339 +{
340 + int seq1len = seq1->len;
341 + int seq2len = seq2->len;
342 + size_t val1 = seq1->val;
343 + size_t val2 = seq2->val;
344 + int idx1 = seq1->idx;
345 + int idx2 = seq2->idx;
346 + int result = 0;
347 +
348 + /* Test for position if necessary. */
349 + if (position && val1 != val2)
350 + {
351 + result = val1 > val2 ? 1 : -1;
352 + goto out;
353 + }
354 +
355 + /* Compare the two sequences. */
356 + do
357 + {
358 + if (weights[idx1] != weights[idx2])
359 + {
360 + /* The sequences differ. */
361 + result = weights[idx1] - weights[idx2];
362 + goto out;
363 + }
364 +
365 + /* Increment the offsets. */
366 + ++idx1;
367 + ++idx2;
368 +
369 + --seq1len;
370 + --seq2len;
371 + }
372 + while (seq1len > 0 && seq2len > 0);
373 +
374 + if (position && seq1len != seq2len)
375 + result = seq1len - seq2len;
376 +
377 +out:
378 + seq1->len = seq1len;
379 + seq2->len = seq2len;
380 + seq1->idx = idx1;
381 + seq2->idx = idx2;
382 + return result;
383 +}
384 +
385 +/* Compare two sequences using the index cache. */
386 +static int
387 +do_compare (coll_seq *seq1, coll_seq *seq2, int position,
388 + const USTRING_TYPE *weights)
389 +{
390 + int seq1len = seq1->len;
391 + int seq2len = seq2->len;
392 + size_t val1 = seq1->val;
393 + size_t val2 = seq2->val;
394 + int32_t *idx1arr = seq1->idxarr;
395 + int32_t *idx2arr = seq2->idxarr;
396 + int idx1now = seq1->idxnow;
397 + int idx2now = seq2->idxnow;
398 + int result = 0;
399 +
400 + /* Test for position if necessary. */
401 + if (position && val1 != val2)
402 + {
403 + result = val1 > val2 ? 1 : -1;
404 + goto out;
405 + }
406 +
407 + /* Compare the two sequences. */
408 + do
409 + {
410 + if (weights[idx1arr[idx1now]] != weights[idx2arr[idx2now]])
411 + {
412 + /* The sequences differ. */
413 + result = weights[idx1arr[idx1now]] - weights[idx2arr[idx2now]];
414 + goto out;
415 + }
416 +
417 + /* Increment the offsets. */
418 + ++idx1arr[idx1now];
419 + ++idx2arr[idx2now];
420 +
421 + --seq1len;
422 + --seq2len;
423 + }
424 + while (seq1len > 0 && seq2len > 0);
425 +
426 + if (position && seq1len != seq2len)
427 + result = seq1len - seq2len;
428 +
429 +out:
430 + seq1->len = seq1len;
431 + seq2->len = seq2len;
432 + return result;
433 +}
434 +
435 int
436 -STRCOLL (s1, s2, l)
437 - const STRING_TYPE *s1;
438 - const STRING_TYPE *s2;
439 - __locale_t l;
440 +STRCOLL (const STRING_TYPE *s1, const STRING_TYPE *s2, __locale_t l)
441 {
442 struct __locale_data *current = l->__locales[LC_COLLATE];
443 uint_fast32_t nrules = current->values[_NL_ITEM_INDEX (_NL_COLLATE_NRULES)].word;
444 @@ -56,34 +479,6 @@ STRCOLL (s1, s2, l)
445 const USTRING_TYPE *weights;
446 const USTRING_TYPE *extra;
447 const int32_t *indirect;
448 - uint_fast32_t pass;
449 - int result = 0;
450 - const USTRING_TYPE *us1;
451 - const USTRING_TYPE *us2;
452 - size_t s1len;
453 - size_t s2len;
454 - int32_t *idx1arr;
455 - int32_t *idx2arr;
456 - unsigned char *rule1arr;
457 - unsigned char *rule2arr;
458 - size_t idx1max;
459 - size_t idx2max;
460 - size_t idx1cnt;
461 - size_t idx2cnt;
462 - size_t idx1now;
463 - size_t idx2now;
464 - size_t backw1_stop;
465 - size_t backw2_stop;
466 - size_t backw1;
467 - size_t backw2;
468 - int val1;
469 - int val2;
470 - int position;
471 - int seq1len;
472 - int seq2len;
473 - int use_malloc;
474 -
475 -#include WEIGHT_H
476
477 if (nrules == 0)
478 return STRCMP (s1, s2);
479 @@ -98,7 +493,6 @@ STRCOLL (s1, s2, l)
480 current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_EXTRA,SUFFIX))].string;
481 indirect = (const int32_t *)
482 current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_INDIRECT,SUFFIX))].string;
483 - use_malloc = 0;
484
485 assert (((uintptr_t) table) % __alignof__ (table[0]) == 0);
486 assert (((uintptr_t) weights) % __alignof__ (weights[0]) == 0);
487 @@ -106,18 +500,13 @@ STRCOLL (s1, s2, l)
488 assert (((uintptr_t) indirect) % __alignof__ (indirect[0]) == 0);
489
490 /* We need this a few times. */
491 - s1len = STRLEN (s1);
492 - s2len = STRLEN (s2);
493 + size_t s1len = STRLEN (s1);
494 + size_t s2len = STRLEN (s2);
495
496 /* Catch empty strings. */
497 - if (__builtin_expect (s1len == 0, 0) || __builtin_expect (s2len == 0, 0))
498 + if (__glibc_unlikely (s1len == 0) || __glibc_unlikely (s2len == 0))
499 return (s1len != 0) - (s2len != 0);
500
501 - /* We need the elements of the strings as unsigned values since they
502 - are used as indeces. */
503 - us1 = (const USTRING_TYPE *) s1;
504 - us2 = (const USTRING_TYPE *) s2;
505 -
506 /* Perform the first pass over the string and while doing this find
507 and store the weights for each character. Since we want this to
508 be as fast as possible we are using `alloca' to store the temporary
509 @@ -127,411 +516,124 @@ STRCOLL (s1, s2, l)
510
511 Please note that the localedef programs makes sure that `position'
512 is not used at the first level. */
513 - if (! __libc_use_alloca ((s1len + s2len) * (sizeof (int32_t) + 1)))
514 - {
515 - idx1arr = (int32_t *) malloc ((s1len + s2len) * (sizeof (int32_t) + 1));
516 - idx2arr = &idx1arr[s1len];
517 - rule1arr = (unsigned char *) &idx2arr[s2len];
518 - rule2arr = &rule1arr[s1len];
519 -
520 - if (idx1arr == NULL)
521 - /* No memory. Well, go with the stack then.
522 -
523 - XXX Once this implementation is stable we will handle this
524 - differently. Instead of precomputing the indeces we will
525 - do this in time. This means, though, that this happens for
526 - every pass again. */
527 - goto try_stack;
528 - use_malloc = 1;
529 - }
530 - else
531 - {
532 - try_stack:
533 - idx1arr = (int32_t *) alloca (s1len * sizeof (int32_t));
534 - idx2arr = (int32_t *) alloca (s2len * sizeof (int32_t));
535 - rule1arr = (unsigned char *) alloca (s1len);
536 - rule2arr = (unsigned char *) alloca (s2len);
537 - }
538
539 - idx1cnt = 0;
540 - idx2cnt = 0;
541 - idx1max = 0;
542 - idx2max = 0;
543 - idx1now = 0;
544 - idx2now = 0;
545 - backw1_stop = ~0ul;
546 - backw2_stop = ~0ul;
547 - backw1 = ~0ul;
548 - backw2 = ~0ul;
549 - seq1len = 0;
550 - seq2len = 0;
551 - position = rulesets[0] & sort_position;
552 - while (1)
553 - {
554 - val1 = 0;
555 - val2 = 0;
556 -
557 - /* Get the next non-IGNOREd element for string `s1'. */
558 - if (seq1len == 0)
559 - do
560 - {
561 - ++val1;
562 -
563 - if (backw1_stop != ~0ul)
564 - {
565 - /* The is something pushed. */
566 - if (backw1 == backw1_stop)
567 - {
568 - /* The last pushed character was handled. Continue
569 - with forward characters. */
570 - if (idx1cnt < idx1max)
571 - {
572 - idx1now = idx1cnt;
573 - backw1_stop = ~0ul;
574 - }
575 - else
576 - /* Nothing anymore. The backward sequence ended with
577 - the last sequence in the string. Note that seq1len
578 - is still zero. */
579 - break;
580 - }
581 - else
582 - idx1now = --backw1;
583 - }
584 - else
585 - {
586 - backw1_stop = idx1max;
587 -
588 - while (*us1 != L('\0'))
589 - {
590 - int32_t tmp = findidx (&us1, -1);
591 - rule1arr[idx1max] = tmp >> 24;
592 - idx1arr[idx1max] = tmp & 0xffffff;
593 - idx1cnt = idx1max++;
594 -
595 - if ((rulesets[rule1arr[idx1cnt] * nrules]
596 - & sort_backward) == 0)
597 - /* No more backward characters to push. */
598 - break;
599 - ++idx1cnt;
600 - }
601 -
602 - if (backw1_stop >= idx1cnt)
603 - {
604 - /* No sequence at all or just one. */
605 - if (idx1cnt == idx1max || backw1_stop > idx1cnt)
606 - /* Note that seq1len is still zero. */
607 - break;
608 -
609 - backw1_stop = ~0ul;
610 - idx1now = idx1cnt;
611 - }
612 - else
613 - /* We pushed backward sequences. */
614 - idx1now = backw1 = idx1cnt - 1;
615 - }
616 - }
617 - while ((seq1len = weights[idx1arr[idx1now]++]) == 0);
618 -
619 - /* And the same for string `s2'. */
620 - if (seq2len == 0)
621 - do
622 - {
623 - ++val2;
624 -
625 - if (backw2_stop != ~0ul)
626 - {
627 - /* The is something pushed. */
628 - if (backw2 == backw2_stop)
629 - {
630 - /* The last pushed character was handled. Continue
631 - with forward characters. */
632 - if (idx2cnt < idx2max)
633 - {
634 - idx2now = idx2cnt;
635 - backw2_stop = ~0ul;
636 - }
637 - else
638 - /* Nothing anymore. The backward sequence ended with
639 - the last sequence in the string. Note that seq2len
640 - is still zero. */
641 - break;
642 - }
643 - else
644 - idx2now = --backw2;
645 - }
646 - else
647 - {
648 - backw2_stop = idx2max;
649 -
650 - while (*us2 != L('\0'))
651 - {
652 - int32_t tmp = findidx (&us2, -1);
653 - rule2arr[idx2max] = tmp >> 24;
654 - idx2arr[idx2max] = tmp & 0xffffff;
655 - idx2cnt = idx2max++;
656 -
657 - if ((rulesets[rule2arr[idx2cnt] * nrules]
658 - & sort_backward) == 0)
659 - /* No more backward characters to push. */
660 - break;
661 - ++idx2cnt;
662 - }
663 -
664 - if (backw2_stop >= idx2cnt)
665 - {
666 - /* No sequence at all or just one. */
667 - if (idx2cnt == idx2max || backw2_stop > idx2cnt)
668 - /* Note that seq1len is still zero. */
669 - break;
670 -
671 - backw2_stop = ~0ul;
672 - idx2now = idx2cnt;
673 - }
674 - else
675 - /* We pushed backward sequences. */
676 - idx2now = backw2 = idx2cnt - 1;
677 - }
678 - }
679 - while ((seq2len = weights[idx2arr[idx2now]++]) == 0);
680 -
681 - /* See whether any or both strings are empty. */
682 - if (seq1len == 0 || seq2len == 0)
683 - {
684 - if (seq1len == seq2len)
685 - /* Both ended. So far so good, both strings are equal at the
686 - first level. */
687 - break;
688 -
689 - /* This means one string is shorter than the other. Find out
690 - which one and return an appropriate value. */
691 - result = seq1len == 0 ? -1 : 1;
692 - goto free_and_return;
693 - }
694 + coll_seq seq1, seq2;
695 + bool use_malloc = false;
696 + int result = 0;
697
698 - /* Test for position if necessary. */
699 - if (position && val1 != val2)
700 - {
701 - result = val1 - val2;
702 - goto free_and_return;
703 - }
704 + memset (&seq1, 0, sizeof (seq1));
705 + seq2 = seq1;
706
707 - /* Compare the two sequences. */
708 - do
709 - {
710 - if (weights[idx1arr[idx1now]] != weights[idx2arr[idx2now]])
711 - {
712 - /* The sequences differ. */
713 - result = weights[idx1arr[idx1now]] - weights[idx2arr[idx2now]];
714 - goto free_and_return;
715 - }
716 + size_t size_max = SIZE_MAX / (sizeof (int32_t) + 1);
717
718 - /* Increment the offsets. */
719 - ++idx1arr[idx1now];
720 - ++idx2arr[idx2now];
721 + /* If the strings are long enough to cause overflow in the size request, then
722 + skip the allocation and proceed with the non-cached routines. */
723 + if (MIN (s1len, s2len) > size_max
724 + || MAX (s1len, s2len) > size_max - MIN (s1len, s2len))
725 + goto begin_collate;
726
727 - --seq1len;
728 - --seq2len;
729 - }
730 - while (seq1len > 0 && seq2len > 0);
731 + if (! __libc_use_alloca ((s1len + s2len) * (sizeof (int32_t) + 1)))
732 + {
733 + seq1.idxarr = (int32_t *) malloc ((s1len + s2len) * (sizeof (int32_t) + 1));
734
735 - if (position && seq1len != seq2len)
736 + /* If we failed to allocate memory, we leave everything as NULL so that
737 + we use the nocache version of traversal and comparison functions. */
738 + if (seq1.idxarr != NULL)
739 {
740 - result = seq1len - seq2len;
741 - goto free_and_return;
742 + seq2.idxarr = &seq1.idxarr[s1len];
743 + seq1.rulearr = (unsigned char *) &seq2.idxarr[s2len];
744 + seq2.rulearr = &seq1.rulearr[s1len];
745 + use_malloc = true;
746 }
747 }
748 + else
749 + {
750 + seq1.idxarr = (int32_t *) alloca (s1len * sizeof (int32_t));
751 + seq2.idxarr = (int32_t *) alloca (s2len * sizeof (int32_t));
752 + seq1.rulearr = (unsigned char *) alloca (s1len);
753 + seq2.rulearr = (unsigned char *) alloca (s2len);
754 + }
755
756 - /* Now the remaining passes over the weights. We now use the
757 - indeces we found before. */
758 - for (pass = 1; pass < nrules; ++pass)
759 + int rule;
760 +
761 + begin_collate:
762 + rule = 0;
763 + /* Cache values in the first pass and if needed, use them in subsequent
764 + passes. */
765 + for (int pass = 0; pass < nrules; ++pass)
766 {
767 + seq1.idxcnt = 0;
768 + seq1.idx = 0;
769 + seq2.idx = 0;
770 + seq1.backw_stop = ~0ul;
771 + seq1.backw = ~0ul;
772 + seq2.idxcnt = 0;
773 + seq2.backw_stop = ~0ul;
774 + seq2.backw = ~0ul;
775 +
776 + /* We need the elements of the strings as unsigned values since they
777 + are used as indices. */
778 + seq1.us = (const USTRING_TYPE *) s1;
779 + seq2.us = (const USTRING_TYPE *) s2;
780 +
781 /* We assume that if a rule has defined `position' in one section
782 this is true for all of them. */
783 - idx1cnt = 0;
784 - idx2cnt = 0;
785 - backw1_stop = ~0ul;
786 - backw2_stop = ~0ul;
787 - backw1 = ~0ul;
788 - backw2 = ~0ul;
789 - position = rulesets[rule1arr[0] * nrules + pass] & sort_position;
790 + int position = rulesets[rule * nrules + pass] & sort_position;
791
792 while (1)
793 {
794 - val1 = 0;
795 - val2 = 0;
796 -
797 - /* Get the next non-IGNOREd element for string `s1'. */
798 - if (seq1len == 0)
799 - do
800 - {
801 - ++val1;
802 -
803 - if (backw1_stop != ~0ul)
804 - {
805 - /* The is something pushed. */
806 - if (backw1 == backw1_stop)
807 - {
808 - /* The last pushed character was handled. Continue
809 - with forward characters. */
810 - if (idx1cnt < idx1max)
811 - {
812 - idx1now = idx1cnt;
813 - backw1_stop = ~0ul;
814 - }
815 - else
816 - {
817 - /* Nothing anymore. The backward sequence
818 - ended with the last sequence in the string. */
819 - idx1now = ~0ul;
820 - break;
821 - }
822 - }
823 - else
824 - idx1now = --backw1;
825 - }
826 - else
827 - {
828 - backw1_stop = idx1cnt;
829 -
830 - while (idx1cnt < idx1max)
831 - {
832 - if ((rulesets[rule1arr[idx1cnt] * nrules + pass]
833 - & sort_backward) == 0)
834 - /* No more backward characters to push. */
835 - break;
836 - ++idx1cnt;
837 - }
838 -
839 - if (backw1_stop == idx1cnt)
840 - {
841 - /* No sequence at all or just one. */
842 - if (idx1cnt == idx1max)
843 - /* Note that seq1len is still zero. */
844 - break;
845 -
846 - backw1_stop = ~0ul;
847 - idx1now = idx1cnt++;
848 - }
849 - else
850 - /* We pushed backward sequences. */
851 - idx1now = backw1 = idx1cnt - 1;
852 - }
853 - }
854 - while ((seq1len = weights[idx1arr[idx1now]++]) == 0);
855 -
856 - /* And the same for string `s2'. */
857 - if (seq2len == 0)
858 - do
859 - {
860 - ++val2;
861 -
862 - if (backw2_stop != ~0ul)
863 - {
864 - /* The is something pushed. */
865 - if (backw2 == backw2_stop)
866 - {
867 - /* The last pushed character was handled. Continue
868 - with forward characters. */
869 - if (idx2cnt < idx2max)
870 - {
871 - idx2now = idx2cnt;
872 - backw2_stop = ~0ul;
873 - }
874 - else
875 - {
876 - /* Nothing anymore. The backward sequence
877 - ended with the last sequence in the string. */
878 - idx2now = ~0ul;
879 - break;
880 - }
881 - }
882 - else
883 - idx2now = --backw2;
884 - }
885 - else
886 - {
887 - backw2_stop = idx2cnt;
888 -
889 - while (idx2cnt < idx2max)
890 - {
891 - if ((rulesets[rule2arr[idx2cnt] * nrules + pass]
892 - & sort_backward) == 0)
893 - /* No more backward characters to push. */
894 - break;
895 - ++idx2cnt;
896 - }
897 -
898 - if (backw2_stop == idx2cnt)
899 - {
900 - /* No sequence at all or just one. */
901 - if (idx2cnt == idx2max)
902 - /* Note that seq2len is still zero. */
903 - break;
904 -
905 - backw2_stop = ~0ul;
906 - idx2now = idx2cnt++;
907 - }
908 - else
909 - /* We pushed backward sequences. */
910 - idx2now = backw2 = idx2cnt - 1;
911 - }
912 - }
913 - while ((seq2len = weights[idx2arr[idx2now]++]) == 0);
914 + if (__glibc_unlikely (seq1.idxarr == NULL))
915 + {
916 + get_next_seq_nocache (&seq1, nrules, rulesets, weights, table,
917 + extra, indirect, pass);
918 + get_next_seq_nocache (&seq2, nrules, rulesets, weights, table,
919 + extra, indirect, pass);
920 + }
921 + else if (pass == 0)
922 + {
923 + get_next_seq (&seq1, nrules, rulesets, weights, table, extra,
924 + indirect);
925 + get_next_seq (&seq2, nrules, rulesets, weights, table, extra,
926 + indirect);
927 + }
928 + else
929 + {
930 + get_next_seq_cached (&seq1, nrules, pass, rulesets, weights);
931 + get_next_seq_cached (&seq2, nrules, pass, rulesets, weights);
932 + }
933
934 /* See whether any or both strings are empty. */
935 - if (seq1len == 0 || seq2len == 0)
936 + if (seq1.len == 0 || seq2.len == 0)
937 {
938 - if (seq1len == seq2len)
939 + if (seq1.len == seq2.len)
940 /* Both ended. So far so good, both strings are equal
941 at this level. */
942 break;
943
944 /* This means one string is shorter than the other. Find out
945 which one and return an appropriate value. */
946 - result = seq1len == 0 ? -1 : 1;
947 + result = seq1.len == 0 ? -1 : 1;
948 goto free_and_return;
949 }
950
951 - /* Test for position if necessary. */
952 - if (position && val1 != val2)
953 - {
954 - result = val1 - val2;
955 - goto free_and_return;
956 - }
957 -
958 - /* Compare the two sequences. */
959 - do
960 - {
961 - if (weights[idx1arr[idx1now]] != weights[idx2arr[idx2now]])
962 - {
963 - /* The sequences differ. */
964 - result = (weights[idx1arr[idx1now]]
965 - - weights[idx2arr[idx2now]]);
966 - goto free_and_return;
967 - }
968 -
969 - /* Increment the offsets. */
970 - ++idx1arr[idx1now];
971 - ++idx2arr[idx2now];
972 -
973 - --seq1len;
974 - --seq2len;
975 - }
976 - while (seq1len > 0 && seq2len > 0);
977 -
978 - if (position && seq1len != seq2len)
979 - {
980 - result = seq1len - seq2len;
981 - goto free_and_return;
982 - }
983 + if (__glibc_unlikely (seq1.idxarr == NULL))
984 + result = do_compare_nocache (&seq1, &seq2, position, weights);
985 + else
986 + result = do_compare (&seq1, &seq2, position, weights);
987 + if (result != 0)
988 + goto free_and_return;
989 }
990 +
991 + if (__glibc_likely (seq1.rulearr != NULL))
992 + rule = seq1.rulearr[0];
993 + else
994 + rule = seq1.rule;
995 }
996
997 /* Free the memory if needed. */
998 free_and_return:
999 if (use_malloc)
1000 - free (idx1arr);
1001 + free (seq1.idxarr);
1002
1003 return result;
1004 }