Contents of /trunk/glibc/patches/glibc-2.18-strcoll-CVE-2012-4412+4424.patch
Parent Directory | Revision Log
Revision 2283 -
(show annotations)
(download)
Mon Sep 16 11:57:11 2013 UTC (11 years ago) by niro
File size: 25987 byte(s)
Mon Sep 16 11:57:11 2013 UTC (11 years ago) by niro
File size: 25987 byte(s)
-glibc-2.18 CVEs and fixes
1 | diff --git a/string/strcoll_l.c b/string/strcoll_l.c |
2 | index ecda08f..bb34a72 100644 |
3 | --- a/string/strcoll_l.c |
4 | +++ b/string/strcoll_l.c |
5 | @@ -41,11 +41,434 @@ |
6 | |
7 | #include "../locale/localeinfo.h" |
8 | |
9 | +/* Track status while looking for sequences in a string. */ |
10 | +typedef struct |
11 | +{ |
12 | + int len; /* Length of the current sequence. */ |
13 | + size_t val; /* Position of the sequence relative to the |
14 | + previous non-ignored sequence. */ |
15 | + size_t idxnow; /* Current index in sequences. */ |
16 | + size_t idxmax; /* Maximum index in sequences. */ |
17 | + size_t idxcnt; /* Current count of indices. */ |
18 | + size_t backw; /* Current Backward sequence index. */ |
19 | + size_t backw_stop; /* Index where the backward sequences stop. */ |
20 | + const USTRING_TYPE *us; /* The string. */ |
21 | + int32_t *idxarr; /* Array to cache weight indices. */ |
22 | + unsigned char *rulearr; /* Array to cache rules. */ |
23 | + unsigned char rule; /* Saved rule for the first sequence. */ |
24 | + int32_t idx; /* Index to weight of the current sequence. */ |
25 | + int32_t save_idx; /* Save looked up index of a forward |
26 | + sequence after the last backward |
27 | + sequence. */ |
28 | + const USTRING_TYPE *back_us; /* Beginning of the backward sequence. */ |
29 | +} coll_seq; |
30 | + |
31 | +/* Get next sequence. The weight indices are cached, so we don't need to |
32 | + traverse the string. */ |
33 | +static void |
34 | +get_next_seq_cached (coll_seq *seq, int nrules, int pass, |
35 | + const unsigned char *rulesets, |
36 | + const USTRING_TYPE *weights) |
37 | +{ |
38 | + size_t val = seq->val = 0; |
39 | + int len = seq->len; |
40 | + size_t backw_stop = seq->backw_stop; |
41 | + size_t backw = seq->backw; |
42 | + size_t idxcnt = seq->idxcnt; |
43 | + size_t idxmax = seq->idxmax; |
44 | + size_t idxnow = seq->idxnow; |
45 | + unsigned char *rulearr = seq->rulearr; |
46 | + int32_t *idxarr = seq->idxarr; |
47 | + |
48 | + while (len == 0) |
49 | + { |
50 | + ++val; |
51 | + if (backw_stop != ~0ul) |
52 | + { |
53 | + /* There is something pushed. */ |
54 | + if (backw == backw_stop) |
55 | + { |
56 | + /* The last pushed character was handled. Continue |
57 | + with forward characters. */ |
58 | + if (idxcnt < idxmax) |
59 | + { |
60 | + idxnow = idxcnt; |
61 | + backw_stop = ~0ul; |
62 | + } |
63 | + else |
64 | + { |
65 | + /* Nothing any more. The backward sequence |
66 | + ended with the last sequence in the string. */ |
67 | + idxnow = ~0ul; |
68 | + break; |
69 | + } |
70 | + } |
71 | + else |
72 | + idxnow = --backw; |
73 | + } |
74 | + else |
75 | + { |
76 | + backw_stop = idxcnt; |
77 | + |
78 | + while (idxcnt < idxmax) |
79 | + { |
80 | + if ((rulesets[rulearr[idxcnt] * nrules + pass] |
81 | + & sort_backward) == 0) |
82 | + /* No more backward characters to push. */ |
83 | + break; |
84 | + ++idxcnt; |
85 | + } |
86 | + |
87 | + if (backw_stop == idxcnt) |
88 | + { |
89 | + /* No sequence at all or just one. */ |
90 | + if (idxcnt == idxmax) |
91 | + /* Note that LEN is still zero. */ |
92 | + break; |
93 | + |
94 | + backw_stop = ~0ul; |
95 | + idxnow = idxcnt++; |
96 | + } |
97 | + else |
98 | + /* We pushed backward sequences. */ |
99 | + idxnow = backw = idxcnt - 1; |
100 | + } |
101 | + len = weights[idxarr[idxnow]++]; |
102 | + } |
103 | + |
104 | + /* Update the structure. */ |
105 | + seq->val = val; |
106 | + seq->len = len; |
107 | + seq->backw_stop = backw_stop; |
108 | + seq->backw = backw; |
109 | + seq->idxcnt = idxcnt; |
110 | + seq->idxnow = idxnow; |
111 | +} |
112 | + |
113 | +/* Get next sequence. Traverse the string as required. */ |
114 | +static void |
115 | +get_next_seq (coll_seq *seq, int nrules, const unsigned char *rulesets, |
116 | + const USTRING_TYPE *weights, const int32_t *table, |
117 | + const USTRING_TYPE *extra, const int32_t *indirect) |
118 | +{ |
119 | +#include WEIGHT_H |
120 | + size_t val = seq->val = 0; |
121 | + int len = seq->len; |
122 | + size_t backw_stop = seq->backw_stop; |
123 | + size_t backw = seq->backw; |
124 | + size_t idxcnt = seq->idxcnt; |
125 | + size_t idxmax = seq->idxmax; |
126 | + size_t idxnow = seq->idxnow; |
127 | + unsigned char *rulearr = seq->rulearr; |
128 | + int32_t *idxarr = seq->idxarr; |
129 | + const USTRING_TYPE *us = seq->us; |
130 | + |
131 | + while (len == 0) |
132 | + { |
133 | + ++val; |
134 | + if (backw_stop != ~0ul) |
135 | + { |
136 | + /* There is something pushed. */ |
137 | + if (backw == backw_stop) |
138 | + { |
139 | + /* The last pushed character was handled. Continue |
140 | + with forward characters. */ |
141 | + if (idxcnt < idxmax) |
142 | + { |
143 | + idxnow = idxcnt; |
144 | + backw_stop = ~0ul; |
145 | + } |
146 | + else |
147 | + /* Nothing any more. The backward sequence ended with |
148 | + the last sequence in the string. Note that LEN |
149 | + is still zero. */ |
150 | + break; |
151 | + } |
152 | + else |
153 | + idxnow = --backw; |
154 | + } |
155 | + else |
156 | + { |
157 | + backw_stop = idxmax; |
158 | + |
159 | + while (*us != L('\0')) |
160 | + { |
161 | + int32_t tmp = findidx (&us, -1); |
162 | + rulearr[idxmax] = tmp >> 24; |
163 | + idxarr[idxmax] = tmp & 0xffffff; |
164 | + idxcnt = idxmax++; |
165 | + |
166 | + if ((rulesets[rulearr[idxcnt] * nrules] |
167 | + & sort_backward) == 0) |
168 | + /* No more backward characters to push. */ |
169 | + break; |
170 | + ++idxcnt; |
171 | + } |
172 | + |
173 | + if (backw_stop >= idxcnt) |
174 | + { |
175 | + /* No sequence at all or just one. */ |
176 | + if (idxcnt == idxmax || backw_stop > idxcnt) |
177 | + /* Note that LEN is still zero. */ |
178 | + break; |
179 | + |
180 | + backw_stop = ~0ul; |
181 | + idxnow = idxcnt; |
182 | + } |
183 | + else |
184 | + /* We pushed backward sequences. */ |
185 | + idxnow = backw = idxcnt - 1; |
186 | + } |
187 | + len = weights[idxarr[idxnow]++]; |
188 | + } |
189 | + |
190 | + /* Update the structure. */ |
191 | + seq->val = val; |
192 | + seq->len = len; |
193 | + seq->backw_stop = backw_stop; |
194 | + seq->backw = backw; |
195 | + seq->idxcnt = idxcnt; |
196 | + seq->idxmax = idxmax; |
197 | + seq->idxnow = idxnow; |
198 | + seq->us = us; |
199 | +} |
200 | + |
201 | +/* Get next sequence. Traverse the string as required. This function does not |
202 | + set or use any index or rule cache. */ |
203 | +static void |
204 | +get_next_seq_nocache (coll_seq *seq, int nrules, const unsigned char *rulesets, |
205 | + const USTRING_TYPE *weights, const int32_t *table, |
206 | + const USTRING_TYPE *extra, const int32_t *indirect, |
207 | + int pass) |
208 | +{ |
209 | +#include WEIGHT_H |
210 | + size_t val = seq->val = 0; |
211 | + int len = seq->len; |
212 | + size_t backw_stop = seq->backw_stop; |
213 | + size_t backw = seq->backw; |
214 | + size_t idxcnt = seq->idxcnt; |
215 | + size_t idxmax = seq->idxmax; |
216 | + int32_t idx = seq->idx; |
217 | + const USTRING_TYPE *us = seq->us; |
218 | + |
219 | + while (len == 0) |
220 | + { |
221 | + ++val; |
222 | + if (backw_stop != ~0ul) |
223 | + { |
224 | + /* There is something pushed. */ |
225 | + if (backw == backw_stop) |
226 | + { |
227 | + /* The last pushed character was handled. Continue |
228 | + with forward characters. */ |
229 | + if (idxcnt < idxmax) |
230 | + { |
231 | + idx = seq->save_idx; |
232 | + backw_stop = ~0ul; |
233 | + } |
234 | + else |
235 | + { |
236 | + /* Nothing anymore. The backward sequence ended with |
237 | + the last sequence in the string. Note that len is |
238 | + still zero. */ |
239 | + idx = 0; |
240 | + break; |
241 | + } |
242 | + } |
243 | + else |
244 | + { |
245 | + /* XXX Traverse BACKW sequences from the beginning of |
246 | + BACKW_STOP to get the next sequence. Is ther a quicker way |
247 | + to do this? */ |
248 | + size_t i = backw_stop; |
249 | + us = seq->back_us; |
250 | + while (i < backw) |
251 | + { |
252 | + int32_t tmp = findidx (&us, -1); |
253 | + idx = tmp & 0xffffff; |
254 | + i++; |
255 | + } |
256 | + --backw; |
257 | + us = seq->us; |
258 | + } |
259 | + } |
260 | + else |
261 | + { |
262 | + backw_stop = idxmax; |
263 | + int32_t prev_idx = idx; |
264 | + |
265 | + while (*us != L('\0')) |
266 | + { |
267 | + int32_t tmp = findidx (&us, -1); |
268 | + unsigned char rule = tmp >> 24; |
269 | + prev_idx = idx; |
270 | + idx = tmp & 0xffffff; |
271 | + idxcnt = idxmax++; |
272 | + |
273 | + /* Save the rule for the first sequence. */ |
274 | + if (__glibc_unlikely (idxcnt == 0)) |
275 | + seq->rule = rule; |
276 | + |
277 | + if ((rulesets[rule * nrules + pass] |
278 | + & sort_backward) == 0) |
279 | + /* No more backward characters to push. */ |
280 | + break; |
281 | + ++idxcnt; |
282 | + } |
283 | + |
284 | + if (backw_stop >= idxcnt) |
285 | + { |
286 | + /* No sequence at all or just one. */ |
287 | + if (idxcnt == idxmax || backw_stop > idxcnt) |
288 | + /* Note that len is still zero. */ |
289 | + break; |
290 | + |
291 | + backw_stop = ~0ul; |
292 | + } |
293 | + else |
294 | + { |
295 | + /* We pushed backward sequences. If the stream ended with the |
296 | + backward sequence, then we process the last sequence we |
297 | + found. Otherwise we process the sequence before the last |
298 | + one since the last one was a forward sequence. */ |
299 | + seq->back_us = seq->us; |
300 | + seq->us = us; |
301 | + backw = idxcnt; |
302 | + if (idxmax > idxcnt) |
303 | + { |
304 | + backw--; |
305 | + seq->save_idx = idx; |
306 | + idx = prev_idx; |
307 | + } |
308 | + if (backw > backw_stop) |
309 | + backw--; |
310 | + } |
311 | + } |
312 | + |
313 | + len = weights[idx++]; |
314 | + /* Skip over indices of previous levels. */ |
315 | + for (int i = 0; i < pass; i++) |
316 | + { |
317 | + idx += len; |
318 | + len = weights[idx]; |
319 | + idx++; |
320 | + } |
321 | + } |
322 | + |
323 | + /* Update the structure. */ |
324 | + seq->val = val; |
325 | + seq->len = len; |
326 | + seq->backw_stop = backw_stop; |
327 | + seq->backw = backw; |
328 | + seq->idxcnt = idxcnt; |
329 | + seq->idxmax = idxmax; |
330 | + seq->us = us; |
331 | + seq->idx = idx; |
332 | +} |
333 | + |
334 | +/* Compare two sequences. This version does not use the index and rules |
335 | + cache. */ |
336 | +static int |
337 | +do_compare_nocache (coll_seq *seq1, coll_seq *seq2, int position, |
338 | + const USTRING_TYPE *weights) |
339 | +{ |
340 | + int seq1len = seq1->len; |
341 | + int seq2len = seq2->len; |
342 | + size_t val1 = seq1->val; |
343 | + size_t val2 = seq2->val; |
344 | + int idx1 = seq1->idx; |
345 | + int idx2 = seq2->idx; |
346 | + int result = 0; |
347 | + |
348 | + /* Test for position if necessary. */ |
349 | + if (position && val1 != val2) |
350 | + { |
351 | + result = val1 > val2 ? 1 : -1; |
352 | + goto out; |
353 | + } |
354 | + |
355 | + /* Compare the two sequences. */ |
356 | + do |
357 | + { |
358 | + if (weights[idx1] != weights[idx2]) |
359 | + { |
360 | + /* The sequences differ. */ |
361 | + result = weights[idx1] - weights[idx2]; |
362 | + goto out; |
363 | + } |
364 | + |
365 | + /* Increment the offsets. */ |
366 | + ++idx1; |
367 | + ++idx2; |
368 | + |
369 | + --seq1len; |
370 | + --seq2len; |
371 | + } |
372 | + while (seq1len > 0 && seq2len > 0); |
373 | + |
374 | + if (position && seq1len != seq2len) |
375 | + result = seq1len - seq2len; |
376 | + |
377 | +out: |
378 | + seq1->len = seq1len; |
379 | + seq2->len = seq2len; |
380 | + seq1->idx = idx1; |
381 | + seq2->idx = idx2; |
382 | + return result; |
383 | +} |
384 | + |
385 | +/* Compare two sequences using the index cache. */ |
386 | +static int |
387 | +do_compare (coll_seq *seq1, coll_seq *seq2, int position, |
388 | + const USTRING_TYPE *weights) |
389 | +{ |
390 | + int seq1len = seq1->len; |
391 | + int seq2len = seq2->len; |
392 | + size_t val1 = seq1->val; |
393 | + size_t val2 = seq2->val; |
394 | + int32_t *idx1arr = seq1->idxarr; |
395 | + int32_t *idx2arr = seq2->idxarr; |
396 | + int idx1now = seq1->idxnow; |
397 | + int idx2now = seq2->idxnow; |
398 | + int result = 0; |
399 | + |
400 | + /* Test for position if necessary. */ |
401 | + if (position && val1 != val2) |
402 | + { |
403 | + result = val1 > val2 ? 1 : -1; |
404 | + goto out; |
405 | + } |
406 | + |
407 | + /* Compare the two sequences. */ |
408 | + do |
409 | + { |
410 | + if (weights[idx1arr[idx1now]] != weights[idx2arr[idx2now]]) |
411 | + { |
412 | + /* The sequences differ. */ |
413 | + result = weights[idx1arr[idx1now]] - weights[idx2arr[idx2now]]; |
414 | + goto out; |
415 | + } |
416 | + |
417 | + /* Increment the offsets. */ |
418 | + ++idx1arr[idx1now]; |
419 | + ++idx2arr[idx2now]; |
420 | + |
421 | + --seq1len; |
422 | + --seq2len; |
423 | + } |
424 | + while (seq1len > 0 && seq2len > 0); |
425 | + |
426 | + if (position && seq1len != seq2len) |
427 | + result = seq1len - seq2len; |
428 | + |
429 | +out: |
430 | + seq1->len = seq1len; |
431 | + seq2->len = seq2len; |
432 | + return result; |
433 | +} |
434 | + |
435 | int |
436 | -STRCOLL (s1, s2, l) |
437 | - const STRING_TYPE *s1; |
438 | - const STRING_TYPE *s2; |
439 | - __locale_t l; |
440 | +STRCOLL (const STRING_TYPE *s1, const STRING_TYPE *s2, __locale_t l) |
441 | { |
442 | struct __locale_data *current = l->__locales[LC_COLLATE]; |
443 | uint_fast32_t nrules = current->values[_NL_ITEM_INDEX (_NL_COLLATE_NRULES)].word; |
444 | @@ -56,34 +479,6 @@ STRCOLL (s1, s2, l) |
445 | const USTRING_TYPE *weights; |
446 | const USTRING_TYPE *extra; |
447 | const int32_t *indirect; |
448 | - uint_fast32_t pass; |
449 | - int result = 0; |
450 | - const USTRING_TYPE *us1; |
451 | - const USTRING_TYPE *us2; |
452 | - size_t s1len; |
453 | - size_t s2len; |
454 | - int32_t *idx1arr; |
455 | - int32_t *idx2arr; |
456 | - unsigned char *rule1arr; |
457 | - unsigned char *rule2arr; |
458 | - size_t idx1max; |
459 | - size_t idx2max; |
460 | - size_t idx1cnt; |
461 | - size_t idx2cnt; |
462 | - size_t idx1now; |
463 | - size_t idx2now; |
464 | - size_t backw1_stop; |
465 | - size_t backw2_stop; |
466 | - size_t backw1; |
467 | - size_t backw2; |
468 | - int val1; |
469 | - int val2; |
470 | - int position; |
471 | - int seq1len; |
472 | - int seq2len; |
473 | - int use_malloc; |
474 | - |
475 | -#include WEIGHT_H |
476 | |
477 | if (nrules == 0) |
478 | return STRCMP (s1, s2); |
479 | @@ -98,7 +493,6 @@ STRCOLL (s1, s2, l) |
480 | current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_EXTRA,SUFFIX))].string; |
481 | indirect = (const int32_t *) |
482 | current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_INDIRECT,SUFFIX))].string; |
483 | - use_malloc = 0; |
484 | |
485 | assert (((uintptr_t) table) % __alignof__ (table[0]) == 0); |
486 | assert (((uintptr_t) weights) % __alignof__ (weights[0]) == 0); |
487 | @@ -106,18 +500,13 @@ STRCOLL (s1, s2, l) |
488 | assert (((uintptr_t) indirect) % __alignof__ (indirect[0]) == 0); |
489 | |
490 | /* We need this a few times. */ |
491 | - s1len = STRLEN (s1); |
492 | - s2len = STRLEN (s2); |
493 | + size_t s1len = STRLEN (s1); |
494 | + size_t s2len = STRLEN (s2); |
495 | |
496 | /* Catch empty strings. */ |
497 | - if (__builtin_expect (s1len == 0, 0) || __builtin_expect (s2len == 0, 0)) |
498 | + if (__glibc_unlikely (s1len == 0) || __glibc_unlikely (s2len == 0)) |
499 | return (s1len != 0) - (s2len != 0); |
500 | |
501 | - /* We need the elements of the strings as unsigned values since they |
502 | - are used as indeces. */ |
503 | - us1 = (const USTRING_TYPE *) s1; |
504 | - us2 = (const USTRING_TYPE *) s2; |
505 | - |
506 | /* Perform the first pass over the string and while doing this find |
507 | and store the weights for each character. Since we want this to |
508 | be as fast as possible we are using `alloca' to store the temporary |
509 | @@ -127,411 +516,124 @@ STRCOLL (s1, s2, l) |
510 | |
511 | Please note that the localedef programs makes sure that `position' |
512 | is not used at the first level. */ |
513 | - if (! __libc_use_alloca ((s1len + s2len) * (sizeof (int32_t) + 1))) |
514 | - { |
515 | - idx1arr = (int32_t *) malloc ((s1len + s2len) * (sizeof (int32_t) + 1)); |
516 | - idx2arr = &idx1arr[s1len]; |
517 | - rule1arr = (unsigned char *) &idx2arr[s2len]; |
518 | - rule2arr = &rule1arr[s1len]; |
519 | - |
520 | - if (idx1arr == NULL) |
521 | - /* No memory. Well, go with the stack then. |
522 | - |
523 | - XXX Once this implementation is stable we will handle this |
524 | - differently. Instead of precomputing the indeces we will |
525 | - do this in time. This means, though, that this happens for |
526 | - every pass again. */ |
527 | - goto try_stack; |
528 | - use_malloc = 1; |
529 | - } |
530 | - else |
531 | - { |
532 | - try_stack: |
533 | - idx1arr = (int32_t *) alloca (s1len * sizeof (int32_t)); |
534 | - idx2arr = (int32_t *) alloca (s2len * sizeof (int32_t)); |
535 | - rule1arr = (unsigned char *) alloca (s1len); |
536 | - rule2arr = (unsigned char *) alloca (s2len); |
537 | - } |
538 | |
539 | - idx1cnt = 0; |
540 | - idx2cnt = 0; |
541 | - idx1max = 0; |
542 | - idx2max = 0; |
543 | - idx1now = 0; |
544 | - idx2now = 0; |
545 | - backw1_stop = ~0ul; |
546 | - backw2_stop = ~0ul; |
547 | - backw1 = ~0ul; |
548 | - backw2 = ~0ul; |
549 | - seq1len = 0; |
550 | - seq2len = 0; |
551 | - position = rulesets[0] & sort_position; |
552 | - while (1) |
553 | - { |
554 | - val1 = 0; |
555 | - val2 = 0; |
556 | - |
557 | - /* Get the next non-IGNOREd element for string `s1'. */ |
558 | - if (seq1len == 0) |
559 | - do |
560 | - { |
561 | - ++val1; |
562 | - |
563 | - if (backw1_stop != ~0ul) |
564 | - { |
565 | - /* The is something pushed. */ |
566 | - if (backw1 == backw1_stop) |
567 | - { |
568 | - /* The last pushed character was handled. Continue |
569 | - with forward characters. */ |
570 | - if (idx1cnt < idx1max) |
571 | - { |
572 | - idx1now = idx1cnt; |
573 | - backw1_stop = ~0ul; |
574 | - } |
575 | - else |
576 | - /* Nothing anymore. The backward sequence ended with |
577 | - the last sequence in the string. Note that seq1len |
578 | - is still zero. */ |
579 | - break; |
580 | - } |
581 | - else |
582 | - idx1now = --backw1; |
583 | - } |
584 | - else |
585 | - { |
586 | - backw1_stop = idx1max; |
587 | - |
588 | - while (*us1 != L('\0')) |
589 | - { |
590 | - int32_t tmp = findidx (&us1, -1); |
591 | - rule1arr[idx1max] = tmp >> 24; |
592 | - idx1arr[idx1max] = tmp & 0xffffff; |
593 | - idx1cnt = idx1max++; |
594 | - |
595 | - if ((rulesets[rule1arr[idx1cnt] * nrules] |
596 | - & sort_backward) == 0) |
597 | - /* No more backward characters to push. */ |
598 | - break; |
599 | - ++idx1cnt; |
600 | - } |
601 | - |
602 | - if (backw1_stop >= idx1cnt) |
603 | - { |
604 | - /* No sequence at all or just one. */ |
605 | - if (idx1cnt == idx1max || backw1_stop > idx1cnt) |
606 | - /* Note that seq1len is still zero. */ |
607 | - break; |
608 | - |
609 | - backw1_stop = ~0ul; |
610 | - idx1now = idx1cnt; |
611 | - } |
612 | - else |
613 | - /* We pushed backward sequences. */ |
614 | - idx1now = backw1 = idx1cnt - 1; |
615 | - } |
616 | - } |
617 | - while ((seq1len = weights[idx1arr[idx1now]++]) == 0); |
618 | - |
619 | - /* And the same for string `s2'. */ |
620 | - if (seq2len == 0) |
621 | - do |
622 | - { |
623 | - ++val2; |
624 | - |
625 | - if (backw2_stop != ~0ul) |
626 | - { |
627 | - /* The is something pushed. */ |
628 | - if (backw2 == backw2_stop) |
629 | - { |
630 | - /* The last pushed character was handled. Continue |
631 | - with forward characters. */ |
632 | - if (idx2cnt < idx2max) |
633 | - { |
634 | - idx2now = idx2cnt; |
635 | - backw2_stop = ~0ul; |
636 | - } |
637 | - else |
638 | - /* Nothing anymore. The backward sequence ended with |
639 | - the last sequence in the string. Note that seq2len |
640 | - is still zero. */ |
641 | - break; |
642 | - } |
643 | - else |
644 | - idx2now = --backw2; |
645 | - } |
646 | - else |
647 | - { |
648 | - backw2_stop = idx2max; |
649 | - |
650 | - while (*us2 != L('\0')) |
651 | - { |
652 | - int32_t tmp = findidx (&us2, -1); |
653 | - rule2arr[idx2max] = tmp >> 24; |
654 | - idx2arr[idx2max] = tmp & 0xffffff; |
655 | - idx2cnt = idx2max++; |
656 | - |
657 | - if ((rulesets[rule2arr[idx2cnt] * nrules] |
658 | - & sort_backward) == 0) |
659 | - /* No more backward characters to push. */ |
660 | - break; |
661 | - ++idx2cnt; |
662 | - } |
663 | - |
664 | - if (backw2_stop >= idx2cnt) |
665 | - { |
666 | - /* No sequence at all or just one. */ |
667 | - if (idx2cnt == idx2max || backw2_stop > idx2cnt) |
668 | - /* Note that seq1len is still zero. */ |
669 | - break; |
670 | - |
671 | - backw2_stop = ~0ul; |
672 | - idx2now = idx2cnt; |
673 | - } |
674 | - else |
675 | - /* We pushed backward sequences. */ |
676 | - idx2now = backw2 = idx2cnt - 1; |
677 | - } |
678 | - } |
679 | - while ((seq2len = weights[idx2arr[idx2now]++]) == 0); |
680 | - |
681 | - /* See whether any or both strings are empty. */ |
682 | - if (seq1len == 0 || seq2len == 0) |
683 | - { |
684 | - if (seq1len == seq2len) |
685 | - /* Both ended. So far so good, both strings are equal at the |
686 | - first level. */ |
687 | - break; |
688 | - |
689 | - /* This means one string is shorter than the other. Find out |
690 | - which one and return an appropriate value. */ |
691 | - result = seq1len == 0 ? -1 : 1; |
692 | - goto free_and_return; |
693 | - } |
694 | + coll_seq seq1, seq2; |
695 | + bool use_malloc = false; |
696 | + int result = 0; |
697 | |
698 | - /* Test for position if necessary. */ |
699 | - if (position && val1 != val2) |
700 | - { |
701 | - result = val1 - val2; |
702 | - goto free_and_return; |
703 | - } |
704 | + memset (&seq1, 0, sizeof (seq1)); |
705 | + seq2 = seq1; |
706 | |
707 | - /* Compare the two sequences. */ |
708 | - do |
709 | - { |
710 | - if (weights[idx1arr[idx1now]] != weights[idx2arr[idx2now]]) |
711 | - { |
712 | - /* The sequences differ. */ |
713 | - result = weights[idx1arr[idx1now]] - weights[idx2arr[idx2now]]; |
714 | - goto free_and_return; |
715 | - } |
716 | + size_t size_max = SIZE_MAX / (sizeof (int32_t) + 1); |
717 | |
718 | - /* Increment the offsets. */ |
719 | - ++idx1arr[idx1now]; |
720 | - ++idx2arr[idx2now]; |
721 | + /* If the strings are long enough to cause overflow in the size request, then |
722 | + skip the allocation and proceed with the non-cached routines. */ |
723 | + if (MIN (s1len, s2len) > size_max |
724 | + || MAX (s1len, s2len) > size_max - MIN (s1len, s2len)) |
725 | + goto begin_collate; |
726 | |
727 | - --seq1len; |
728 | - --seq2len; |
729 | - } |
730 | - while (seq1len > 0 && seq2len > 0); |
731 | + if (! __libc_use_alloca ((s1len + s2len) * (sizeof (int32_t) + 1))) |
732 | + { |
733 | + seq1.idxarr = (int32_t *) malloc ((s1len + s2len) * (sizeof (int32_t) + 1)); |
734 | |
735 | - if (position && seq1len != seq2len) |
736 | + /* If we failed to allocate memory, we leave everything as NULL so that |
737 | + we use the nocache version of traversal and comparison functions. */ |
738 | + if (seq1.idxarr != NULL) |
739 | { |
740 | - result = seq1len - seq2len; |
741 | - goto free_and_return; |
742 | + seq2.idxarr = &seq1.idxarr[s1len]; |
743 | + seq1.rulearr = (unsigned char *) &seq2.idxarr[s2len]; |
744 | + seq2.rulearr = &seq1.rulearr[s1len]; |
745 | + use_malloc = true; |
746 | } |
747 | } |
748 | + else |
749 | + { |
750 | + seq1.idxarr = (int32_t *) alloca (s1len * sizeof (int32_t)); |
751 | + seq2.idxarr = (int32_t *) alloca (s2len * sizeof (int32_t)); |
752 | + seq1.rulearr = (unsigned char *) alloca (s1len); |
753 | + seq2.rulearr = (unsigned char *) alloca (s2len); |
754 | + } |
755 | |
756 | - /* Now the remaining passes over the weights. We now use the |
757 | - indeces we found before. */ |
758 | - for (pass = 1; pass < nrules; ++pass) |
759 | + int rule; |
760 | + |
761 | + begin_collate: |
762 | + rule = 0; |
763 | + /* Cache values in the first pass and if needed, use them in subsequent |
764 | + passes. */ |
765 | + for (int pass = 0; pass < nrules; ++pass) |
766 | { |
767 | + seq1.idxcnt = 0; |
768 | + seq1.idx = 0; |
769 | + seq2.idx = 0; |
770 | + seq1.backw_stop = ~0ul; |
771 | + seq1.backw = ~0ul; |
772 | + seq2.idxcnt = 0; |
773 | + seq2.backw_stop = ~0ul; |
774 | + seq2.backw = ~0ul; |
775 | + |
776 | + /* We need the elements of the strings as unsigned values since they |
777 | + are used as indices. */ |
778 | + seq1.us = (const USTRING_TYPE *) s1; |
779 | + seq2.us = (const USTRING_TYPE *) s2; |
780 | + |
781 | /* We assume that if a rule has defined `position' in one section |
782 | this is true for all of them. */ |
783 | - idx1cnt = 0; |
784 | - idx2cnt = 0; |
785 | - backw1_stop = ~0ul; |
786 | - backw2_stop = ~0ul; |
787 | - backw1 = ~0ul; |
788 | - backw2 = ~0ul; |
789 | - position = rulesets[rule1arr[0] * nrules + pass] & sort_position; |
790 | + int position = rulesets[rule * nrules + pass] & sort_position; |
791 | |
792 | while (1) |
793 | { |
794 | - val1 = 0; |
795 | - val2 = 0; |
796 | - |
797 | - /* Get the next non-IGNOREd element for string `s1'. */ |
798 | - if (seq1len == 0) |
799 | - do |
800 | - { |
801 | - ++val1; |
802 | - |
803 | - if (backw1_stop != ~0ul) |
804 | - { |
805 | - /* The is something pushed. */ |
806 | - if (backw1 == backw1_stop) |
807 | - { |
808 | - /* The last pushed character was handled. Continue |
809 | - with forward characters. */ |
810 | - if (idx1cnt < idx1max) |
811 | - { |
812 | - idx1now = idx1cnt; |
813 | - backw1_stop = ~0ul; |
814 | - } |
815 | - else |
816 | - { |
817 | - /* Nothing anymore. The backward sequence |
818 | - ended with the last sequence in the string. */ |
819 | - idx1now = ~0ul; |
820 | - break; |
821 | - } |
822 | - } |
823 | - else |
824 | - idx1now = --backw1; |
825 | - } |
826 | - else |
827 | - { |
828 | - backw1_stop = idx1cnt; |
829 | - |
830 | - while (idx1cnt < idx1max) |
831 | - { |
832 | - if ((rulesets[rule1arr[idx1cnt] * nrules + pass] |
833 | - & sort_backward) == 0) |
834 | - /* No more backward characters to push. */ |
835 | - break; |
836 | - ++idx1cnt; |
837 | - } |
838 | - |
839 | - if (backw1_stop == idx1cnt) |
840 | - { |
841 | - /* No sequence at all or just one. */ |
842 | - if (idx1cnt == idx1max) |
843 | - /* Note that seq1len is still zero. */ |
844 | - break; |
845 | - |
846 | - backw1_stop = ~0ul; |
847 | - idx1now = idx1cnt++; |
848 | - } |
849 | - else |
850 | - /* We pushed backward sequences. */ |
851 | - idx1now = backw1 = idx1cnt - 1; |
852 | - } |
853 | - } |
854 | - while ((seq1len = weights[idx1arr[idx1now]++]) == 0); |
855 | - |
856 | - /* And the same for string `s2'. */ |
857 | - if (seq2len == 0) |
858 | - do |
859 | - { |
860 | - ++val2; |
861 | - |
862 | - if (backw2_stop != ~0ul) |
863 | - { |
864 | - /* The is something pushed. */ |
865 | - if (backw2 == backw2_stop) |
866 | - { |
867 | - /* The last pushed character was handled. Continue |
868 | - with forward characters. */ |
869 | - if (idx2cnt < idx2max) |
870 | - { |
871 | - idx2now = idx2cnt; |
872 | - backw2_stop = ~0ul; |
873 | - } |
874 | - else |
875 | - { |
876 | - /* Nothing anymore. The backward sequence |
877 | - ended with the last sequence in the string. */ |
878 | - idx2now = ~0ul; |
879 | - break; |
880 | - } |
881 | - } |
882 | - else |
883 | - idx2now = --backw2; |
884 | - } |
885 | - else |
886 | - { |
887 | - backw2_stop = idx2cnt; |
888 | - |
889 | - while (idx2cnt < idx2max) |
890 | - { |
891 | - if ((rulesets[rule2arr[idx2cnt] * nrules + pass] |
892 | - & sort_backward) == 0) |
893 | - /* No more backward characters to push. */ |
894 | - break; |
895 | - ++idx2cnt; |
896 | - } |
897 | - |
898 | - if (backw2_stop == idx2cnt) |
899 | - { |
900 | - /* No sequence at all or just one. */ |
901 | - if (idx2cnt == idx2max) |
902 | - /* Note that seq2len is still zero. */ |
903 | - break; |
904 | - |
905 | - backw2_stop = ~0ul; |
906 | - idx2now = idx2cnt++; |
907 | - } |
908 | - else |
909 | - /* We pushed backward sequences. */ |
910 | - idx2now = backw2 = idx2cnt - 1; |
911 | - } |
912 | - } |
913 | - while ((seq2len = weights[idx2arr[idx2now]++]) == 0); |
914 | + if (__glibc_unlikely (seq1.idxarr == NULL)) |
915 | + { |
916 | + get_next_seq_nocache (&seq1, nrules, rulesets, weights, table, |
917 | + extra, indirect, pass); |
918 | + get_next_seq_nocache (&seq2, nrules, rulesets, weights, table, |
919 | + extra, indirect, pass); |
920 | + } |
921 | + else if (pass == 0) |
922 | + { |
923 | + get_next_seq (&seq1, nrules, rulesets, weights, table, extra, |
924 | + indirect); |
925 | + get_next_seq (&seq2, nrules, rulesets, weights, table, extra, |
926 | + indirect); |
927 | + } |
928 | + else |
929 | + { |
930 | + get_next_seq_cached (&seq1, nrules, pass, rulesets, weights); |
931 | + get_next_seq_cached (&seq2, nrules, pass, rulesets, weights); |
932 | + } |
933 | |
934 | /* See whether any or both strings are empty. */ |
935 | - if (seq1len == 0 || seq2len == 0) |
936 | + if (seq1.len == 0 || seq2.len == 0) |
937 | { |
938 | - if (seq1len == seq2len) |
939 | + if (seq1.len == seq2.len) |
940 | /* Both ended. So far so good, both strings are equal |
941 | at this level. */ |
942 | break; |
943 | |
944 | /* This means one string is shorter than the other. Find out |
945 | which one and return an appropriate value. */ |
946 | - result = seq1len == 0 ? -1 : 1; |
947 | + result = seq1.len == 0 ? -1 : 1; |
948 | goto free_and_return; |
949 | } |
950 | |
951 | - /* Test for position if necessary. */ |
952 | - if (position && val1 != val2) |
953 | - { |
954 | - result = val1 - val2; |
955 | - goto free_and_return; |
956 | - } |
957 | - |
958 | - /* Compare the two sequences. */ |
959 | - do |
960 | - { |
961 | - if (weights[idx1arr[idx1now]] != weights[idx2arr[idx2now]]) |
962 | - { |
963 | - /* The sequences differ. */ |
964 | - result = (weights[idx1arr[idx1now]] |
965 | - - weights[idx2arr[idx2now]]); |
966 | - goto free_and_return; |
967 | - } |
968 | - |
969 | - /* Increment the offsets. */ |
970 | - ++idx1arr[idx1now]; |
971 | - ++idx2arr[idx2now]; |
972 | - |
973 | - --seq1len; |
974 | - --seq2len; |
975 | - } |
976 | - while (seq1len > 0 && seq2len > 0); |
977 | - |
978 | - if (position && seq1len != seq2len) |
979 | - { |
980 | - result = seq1len - seq2len; |
981 | - goto free_and_return; |
982 | - } |
983 | + if (__glibc_unlikely (seq1.idxarr == NULL)) |
984 | + result = do_compare_nocache (&seq1, &seq2, position, weights); |
985 | + else |
986 | + result = do_compare (&seq1, &seq2, position, weights); |
987 | + if (result != 0) |
988 | + goto free_and_return; |
989 | } |
990 | + |
991 | + if (__glibc_likely (seq1.rulearr != NULL)) |
992 | + rule = seq1.rulearr[0]; |
993 | + else |
994 | + rule = seq1.rule; |
995 | } |
996 | |
997 | /* Free the memory if needed. */ |
998 | free_and_return: |
999 | if (use_malloc) |
1000 | - free (idx1arr); |
1001 | + free (seq1.idxarr); |
1002 | |
1003 | return result; |
1004 | } |