Annotation of /trunk/glibc/patches/glibc-2.18-strcoll-CVE-2012-4412+4424.patch
Parent Directory | Revision Log
Revision 2283 -
(hide annotations)
(download)
Mon Sep 16 11:57:11 2013 UTC (11 years ago) by niro
File size: 25987 byte(s)
Mon Sep 16 11:57:11 2013 UTC (11 years ago) by niro
File size: 25987 byte(s)
-glibc-2.18 CVEs and fixes
1 | niro | 2283 | diff --git a/string/strcoll_l.c b/string/strcoll_l.c |
2 | index ecda08f..bb34a72 100644 | ||
3 | --- a/string/strcoll_l.c | ||
4 | +++ b/string/strcoll_l.c | ||
5 | @@ -41,11 +41,434 @@ | ||
6 | |||
7 | #include "../locale/localeinfo.h" | ||
8 | |||
9 | +/* Track status while looking for sequences in a string. */ | ||
10 | +typedef struct | ||
11 | +{ | ||
12 | + int len; /* Length of the current sequence. */ | ||
13 | + size_t val; /* Position of the sequence relative to the | ||
14 | + previous non-ignored sequence. */ | ||
15 | + size_t idxnow; /* Current index in sequences. */ | ||
16 | + size_t idxmax; /* Maximum index in sequences. */ | ||
17 | + size_t idxcnt; /* Current count of indices. */ | ||
18 | + size_t backw; /* Current Backward sequence index. */ | ||
19 | + size_t backw_stop; /* Index where the backward sequences stop. */ | ||
20 | + const USTRING_TYPE *us; /* The string. */ | ||
21 | + int32_t *idxarr; /* Array to cache weight indices. */ | ||
22 | + unsigned char *rulearr; /* Array to cache rules. */ | ||
23 | + unsigned char rule; /* Saved rule for the first sequence. */ | ||
24 | + int32_t idx; /* Index to weight of the current sequence. */ | ||
25 | + int32_t save_idx; /* Save looked up index of a forward | ||
26 | + sequence after the last backward | ||
27 | + sequence. */ | ||
28 | + const USTRING_TYPE *back_us; /* Beginning of the backward sequence. */ | ||
29 | +} coll_seq; | ||
30 | + | ||
31 | +/* Get next sequence. The weight indices are cached, so we don't need to | ||
32 | + traverse the string. */ | ||
33 | +static void | ||
34 | +get_next_seq_cached (coll_seq *seq, int nrules, int pass, | ||
35 | + const unsigned char *rulesets, | ||
36 | + const USTRING_TYPE *weights) | ||
37 | +{ | ||
38 | + size_t val = seq->val = 0; | ||
39 | + int len = seq->len; | ||
40 | + size_t backw_stop = seq->backw_stop; | ||
41 | + size_t backw = seq->backw; | ||
42 | + size_t idxcnt = seq->idxcnt; | ||
43 | + size_t idxmax = seq->idxmax; | ||
44 | + size_t idxnow = seq->idxnow; | ||
45 | + unsigned char *rulearr = seq->rulearr; | ||
46 | + int32_t *idxarr = seq->idxarr; | ||
47 | + | ||
48 | + while (len == 0) | ||
49 | + { | ||
50 | + ++val; | ||
51 | + if (backw_stop != ~0ul) | ||
52 | + { | ||
53 | + /* There is something pushed. */ | ||
54 | + if (backw == backw_stop) | ||
55 | + { | ||
56 | + /* The last pushed character was handled. Continue | ||
57 | + with forward characters. */ | ||
58 | + if (idxcnt < idxmax) | ||
59 | + { | ||
60 | + idxnow = idxcnt; | ||
61 | + backw_stop = ~0ul; | ||
62 | + } | ||
63 | + else | ||
64 | + { | ||
65 | + /* Nothing any more. The backward sequence | ||
66 | + ended with the last sequence in the string. */ | ||
67 | + idxnow = ~0ul; | ||
68 | + break; | ||
69 | + } | ||
70 | + } | ||
71 | + else | ||
72 | + idxnow = --backw; | ||
73 | + } | ||
74 | + else | ||
75 | + { | ||
76 | + backw_stop = idxcnt; | ||
77 | + | ||
78 | + while (idxcnt < idxmax) | ||
79 | + { | ||
80 | + if ((rulesets[rulearr[idxcnt] * nrules + pass] | ||
81 | + & sort_backward) == 0) | ||
82 | + /* No more backward characters to push. */ | ||
83 | + break; | ||
84 | + ++idxcnt; | ||
85 | + } | ||
86 | + | ||
87 | + if (backw_stop == idxcnt) | ||
88 | + { | ||
89 | + /* No sequence at all or just one. */ | ||
90 | + if (idxcnt == idxmax) | ||
91 | + /* Note that LEN is still zero. */ | ||
92 | + break; | ||
93 | + | ||
94 | + backw_stop = ~0ul; | ||
95 | + idxnow = idxcnt++; | ||
96 | + } | ||
97 | + else | ||
98 | + /* We pushed backward sequences. */ | ||
99 | + idxnow = backw = idxcnt - 1; | ||
100 | + } | ||
101 | + len = weights[idxarr[idxnow]++]; | ||
102 | + } | ||
103 | + | ||
104 | + /* Update the structure. */ | ||
105 | + seq->val = val; | ||
106 | + seq->len = len; | ||
107 | + seq->backw_stop = backw_stop; | ||
108 | + seq->backw = backw; | ||
109 | + seq->idxcnt = idxcnt; | ||
110 | + seq->idxnow = idxnow; | ||
111 | +} | ||
112 | + | ||
113 | +/* Get next sequence. Traverse the string as required. */ | ||
114 | +static void | ||
115 | +get_next_seq (coll_seq *seq, int nrules, const unsigned char *rulesets, | ||
116 | + const USTRING_TYPE *weights, const int32_t *table, | ||
117 | + const USTRING_TYPE *extra, const int32_t *indirect) | ||
118 | +{ | ||
119 | +#include WEIGHT_H | ||
120 | + size_t val = seq->val = 0; | ||
121 | + int len = seq->len; | ||
122 | + size_t backw_stop = seq->backw_stop; | ||
123 | + size_t backw = seq->backw; | ||
124 | + size_t idxcnt = seq->idxcnt; | ||
125 | + size_t idxmax = seq->idxmax; | ||
126 | + size_t idxnow = seq->idxnow; | ||
127 | + unsigned char *rulearr = seq->rulearr; | ||
128 | + int32_t *idxarr = seq->idxarr; | ||
129 | + const USTRING_TYPE *us = seq->us; | ||
130 | + | ||
131 | + while (len == 0) | ||
132 | + { | ||
133 | + ++val; | ||
134 | + if (backw_stop != ~0ul) | ||
135 | + { | ||
136 | + /* There is something pushed. */ | ||
137 | + if (backw == backw_stop) | ||
138 | + { | ||
139 | + /* The last pushed character was handled. Continue | ||
140 | + with forward characters. */ | ||
141 | + if (idxcnt < idxmax) | ||
142 | + { | ||
143 | + idxnow = idxcnt; | ||
144 | + backw_stop = ~0ul; | ||
145 | + } | ||
146 | + else | ||
147 | + /* Nothing any more. The backward sequence ended with | ||
148 | + the last sequence in the string. Note that LEN | ||
149 | + is still zero. */ | ||
150 | + break; | ||
151 | + } | ||
152 | + else | ||
153 | + idxnow = --backw; | ||
154 | + } | ||
155 | + else | ||
156 | + { | ||
157 | + backw_stop = idxmax; | ||
158 | + | ||
159 | + while (*us != L('\0')) | ||
160 | + { | ||
161 | + int32_t tmp = findidx (&us, -1); | ||
162 | + rulearr[idxmax] = tmp >> 24; | ||
163 | + idxarr[idxmax] = tmp & 0xffffff; | ||
164 | + idxcnt = idxmax++; | ||
165 | + | ||
166 | + if ((rulesets[rulearr[idxcnt] * nrules] | ||
167 | + & sort_backward) == 0) | ||
168 | + /* No more backward characters to push. */ | ||
169 | + break; | ||
170 | + ++idxcnt; | ||
171 | + } | ||
172 | + | ||
173 | + if (backw_stop >= idxcnt) | ||
174 | + { | ||
175 | + /* No sequence at all or just one. */ | ||
176 | + if (idxcnt == idxmax || backw_stop > idxcnt) | ||
177 | + /* Note that LEN is still zero. */ | ||
178 | + break; | ||
179 | + | ||
180 | + backw_stop = ~0ul; | ||
181 | + idxnow = idxcnt; | ||
182 | + } | ||
183 | + else | ||
184 | + /* We pushed backward sequences. */ | ||
185 | + idxnow = backw = idxcnt - 1; | ||
186 | + } | ||
187 | + len = weights[idxarr[idxnow]++]; | ||
188 | + } | ||
189 | + | ||
190 | + /* Update the structure. */ | ||
191 | + seq->val = val; | ||
192 | + seq->len = len; | ||
193 | + seq->backw_stop = backw_stop; | ||
194 | + seq->backw = backw; | ||
195 | + seq->idxcnt = idxcnt; | ||
196 | + seq->idxmax = idxmax; | ||
197 | + seq->idxnow = idxnow; | ||
198 | + seq->us = us; | ||
199 | +} | ||
200 | + | ||
201 | +/* Get next sequence. Traverse the string as required. This function does not | ||
202 | + set or use any index or rule cache. */ | ||
203 | +static void | ||
204 | +get_next_seq_nocache (coll_seq *seq, int nrules, const unsigned char *rulesets, | ||
205 | + const USTRING_TYPE *weights, const int32_t *table, | ||
206 | + const USTRING_TYPE *extra, const int32_t *indirect, | ||
207 | + int pass) | ||
208 | +{ | ||
209 | +#include WEIGHT_H | ||
210 | + size_t val = seq->val = 0; | ||
211 | + int len = seq->len; | ||
212 | + size_t backw_stop = seq->backw_stop; | ||
213 | + size_t backw = seq->backw; | ||
214 | + size_t idxcnt = seq->idxcnt; | ||
215 | + size_t idxmax = seq->idxmax; | ||
216 | + int32_t idx = seq->idx; | ||
217 | + const USTRING_TYPE *us = seq->us; | ||
218 | + | ||
219 | + while (len == 0) | ||
220 | + { | ||
221 | + ++val; | ||
222 | + if (backw_stop != ~0ul) | ||
223 | + { | ||
224 | + /* There is something pushed. */ | ||
225 | + if (backw == backw_stop) | ||
226 | + { | ||
227 | + /* The last pushed character was handled. Continue | ||
228 | + with forward characters. */ | ||
229 | + if (idxcnt < idxmax) | ||
230 | + { | ||
231 | + idx = seq->save_idx; | ||
232 | + backw_stop = ~0ul; | ||
233 | + } | ||
234 | + else | ||
235 | + { | ||
236 | + /* Nothing anymore. The backward sequence ended with | ||
237 | + the last sequence in the string. Note that len is | ||
238 | + still zero. */ | ||
239 | + idx = 0; | ||
240 | + break; | ||
241 | + } | ||
242 | + } | ||
243 | + else | ||
244 | + { | ||
245 | + /* XXX Traverse BACKW sequences from the beginning of | ||
246 | + BACKW_STOP to get the next sequence. Is ther a quicker way | ||
247 | + to do this? */ | ||
248 | + size_t i = backw_stop; | ||
249 | + us = seq->back_us; | ||
250 | + while (i < backw) | ||
251 | + { | ||
252 | + int32_t tmp = findidx (&us, -1); | ||
253 | + idx = tmp & 0xffffff; | ||
254 | + i++; | ||
255 | + } | ||
256 | + --backw; | ||
257 | + us = seq->us; | ||
258 | + } | ||
259 | + } | ||
260 | + else | ||
261 | + { | ||
262 | + backw_stop = idxmax; | ||
263 | + int32_t prev_idx = idx; | ||
264 | + | ||
265 | + while (*us != L('\0')) | ||
266 | + { | ||
267 | + int32_t tmp = findidx (&us, -1); | ||
268 | + unsigned char rule = tmp >> 24; | ||
269 | + prev_idx = idx; | ||
270 | + idx = tmp & 0xffffff; | ||
271 | + idxcnt = idxmax++; | ||
272 | + | ||
273 | + /* Save the rule for the first sequence. */ | ||
274 | + if (__glibc_unlikely (idxcnt == 0)) | ||
275 | + seq->rule = rule; | ||
276 | + | ||
277 | + if ((rulesets[rule * nrules + pass] | ||
278 | + & sort_backward) == 0) | ||
279 | + /* No more backward characters to push. */ | ||
280 | + break; | ||
281 | + ++idxcnt; | ||
282 | + } | ||
283 | + | ||
284 | + if (backw_stop >= idxcnt) | ||
285 | + { | ||
286 | + /* No sequence at all or just one. */ | ||
287 | + if (idxcnt == idxmax || backw_stop > idxcnt) | ||
288 | + /* Note that len is still zero. */ | ||
289 | + break; | ||
290 | + | ||
291 | + backw_stop = ~0ul; | ||
292 | + } | ||
293 | + else | ||
294 | + { | ||
295 | + /* We pushed backward sequences. If the stream ended with the | ||
296 | + backward sequence, then we process the last sequence we | ||
297 | + found. Otherwise we process the sequence before the last | ||
298 | + one since the last one was a forward sequence. */ | ||
299 | + seq->back_us = seq->us; | ||
300 | + seq->us = us; | ||
301 | + backw = idxcnt; | ||
302 | + if (idxmax > idxcnt) | ||
303 | + { | ||
304 | + backw--; | ||
305 | + seq->save_idx = idx; | ||
306 | + idx = prev_idx; | ||
307 | + } | ||
308 | + if (backw > backw_stop) | ||
309 | + backw--; | ||
310 | + } | ||
311 | + } | ||
312 | + | ||
313 | + len = weights[idx++]; | ||
314 | + /* Skip over indices of previous levels. */ | ||
315 | + for (int i = 0; i < pass; i++) | ||
316 | + { | ||
317 | + idx += len; | ||
318 | + len = weights[idx]; | ||
319 | + idx++; | ||
320 | + } | ||
321 | + } | ||
322 | + | ||
323 | + /* Update the structure. */ | ||
324 | + seq->val = val; | ||
325 | + seq->len = len; | ||
326 | + seq->backw_stop = backw_stop; | ||
327 | + seq->backw = backw; | ||
328 | + seq->idxcnt = idxcnt; | ||
329 | + seq->idxmax = idxmax; | ||
330 | + seq->us = us; | ||
331 | + seq->idx = idx; | ||
332 | +} | ||
333 | + | ||
334 | +/* Compare two sequences. This version does not use the index and rules | ||
335 | + cache. */ | ||
336 | +static int | ||
337 | +do_compare_nocache (coll_seq *seq1, coll_seq *seq2, int position, | ||
338 | + const USTRING_TYPE *weights) | ||
339 | +{ | ||
340 | + int seq1len = seq1->len; | ||
341 | + int seq2len = seq2->len; | ||
342 | + size_t val1 = seq1->val; | ||
343 | + size_t val2 = seq2->val; | ||
344 | + int idx1 = seq1->idx; | ||
345 | + int idx2 = seq2->idx; | ||
346 | + int result = 0; | ||
347 | + | ||
348 | + /* Test for position if necessary. */ | ||
349 | + if (position && val1 != val2) | ||
350 | + { | ||
351 | + result = val1 > val2 ? 1 : -1; | ||
352 | + goto out; | ||
353 | + } | ||
354 | + | ||
355 | + /* Compare the two sequences. */ | ||
356 | + do | ||
357 | + { | ||
358 | + if (weights[idx1] != weights[idx2]) | ||
359 | + { | ||
360 | + /* The sequences differ. */ | ||
361 | + result = weights[idx1] - weights[idx2]; | ||
362 | + goto out; | ||
363 | + } | ||
364 | + | ||
365 | + /* Increment the offsets. */ | ||
366 | + ++idx1; | ||
367 | + ++idx2; | ||
368 | + | ||
369 | + --seq1len; | ||
370 | + --seq2len; | ||
371 | + } | ||
372 | + while (seq1len > 0 && seq2len > 0); | ||
373 | + | ||
374 | + if (position && seq1len != seq2len) | ||
375 | + result = seq1len - seq2len; | ||
376 | + | ||
377 | +out: | ||
378 | + seq1->len = seq1len; | ||
379 | + seq2->len = seq2len; | ||
380 | + seq1->idx = idx1; | ||
381 | + seq2->idx = idx2; | ||
382 | + return result; | ||
383 | +} | ||
384 | + | ||
385 | +/* Compare two sequences using the index cache. */ | ||
386 | +static int | ||
387 | +do_compare (coll_seq *seq1, coll_seq *seq2, int position, | ||
388 | + const USTRING_TYPE *weights) | ||
389 | +{ | ||
390 | + int seq1len = seq1->len; | ||
391 | + int seq2len = seq2->len; | ||
392 | + size_t val1 = seq1->val; | ||
393 | + size_t val2 = seq2->val; | ||
394 | + int32_t *idx1arr = seq1->idxarr; | ||
395 | + int32_t *idx2arr = seq2->idxarr; | ||
396 | + int idx1now = seq1->idxnow; | ||
397 | + int idx2now = seq2->idxnow; | ||
398 | + int result = 0; | ||
399 | + | ||
400 | + /* Test for position if necessary. */ | ||
401 | + if (position && val1 != val2) | ||
402 | + { | ||
403 | + result = val1 > val2 ? 1 : -1; | ||
404 | + goto out; | ||
405 | + } | ||
406 | + | ||
407 | + /* Compare the two sequences. */ | ||
408 | + do | ||
409 | + { | ||
410 | + if (weights[idx1arr[idx1now]] != weights[idx2arr[idx2now]]) | ||
411 | + { | ||
412 | + /* The sequences differ. */ | ||
413 | + result = weights[idx1arr[idx1now]] - weights[idx2arr[idx2now]]; | ||
414 | + goto out; | ||
415 | + } | ||
416 | + | ||
417 | + /* Increment the offsets. */ | ||
418 | + ++idx1arr[idx1now]; | ||
419 | + ++idx2arr[idx2now]; | ||
420 | + | ||
421 | + --seq1len; | ||
422 | + --seq2len; | ||
423 | + } | ||
424 | + while (seq1len > 0 && seq2len > 0); | ||
425 | + | ||
426 | + if (position && seq1len != seq2len) | ||
427 | + result = seq1len - seq2len; | ||
428 | + | ||
429 | +out: | ||
430 | + seq1->len = seq1len; | ||
431 | + seq2->len = seq2len; | ||
432 | + return result; | ||
433 | +} | ||
434 | + | ||
435 | int | ||
436 | -STRCOLL (s1, s2, l) | ||
437 | - const STRING_TYPE *s1; | ||
438 | - const STRING_TYPE *s2; | ||
439 | - __locale_t l; | ||
440 | +STRCOLL (const STRING_TYPE *s1, const STRING_TYPE *s2, __locale_t l) | ||
441 | { | ||
442 | struct __locale_data *current = l->__locales[LC_COLLATE]; | ||
443 | uint_fast32_t nrules = current->values[_NL_ITEM_INDEX (_NL_COLLATE_NRULES)].word; | ||
444 | @@ -56,34 +479,6 @@ STRCOLL (s1, s2, l) | ||
445 | const USTRING_TYPE *weights; | ||
446 | const USTRING_TYPE *extra; | ||
447 | const int32_t *indirect; | ||
448 | - uint_fast32_t pass; | ||
449 | - int result = 0; | ||
450 | - const USTRING_TYPE *us1; | ||
451 | - const USTRING_TYPE *us2; | ||
452 | - size_t s1len; | ||
453 | - size_t s2len; | ||
454 | - int32_t *idx1arr; | ||
455 | - int32_t *idx2arr; | ||
456 | - unsigned char *rule1arr; | ||
457 | - unsigned char *rule2arr; | ||
458 | - size_t idx1max; | ||
459 | - size_t idx2max; | ||
460 | - size_t idx1cnt; | ||
461 | - size_t idx2cnt; | ||
462 | - size_t idx1now; | ||
463 | - size_t idx2now; | ||
464 | - size_t backw1_stop; | ||
465 | - size_t backw2_stop; | ||
466 | - size_t backw1; | ||
467 | - size_t backw2; | ||
468 | - int val1; | ||
469 | - int val2; | ||
470 | - int position; | ||
471 | - int seq1len; | ||
472 | - int seq2len; | ||
473 | - int use_malloc; | ||
474 | - | ||
475 | -#include WEIGHT_H | ||
476 | |||
477 | if (nrules == 0) | ||
478 | return STRCMP (s1, s2); | ||
479 | @@ -98,7 +493,6 @@ STRCOLL (s1, s2, l) | ||
480 | current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_EXTRA,SUFFIX))].string; | ||
481 | indirect = (const int32_t *) | ||
482 | current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_INDIRECT,SUFFIX))].string; | ||
483 | - use_malloc = 0; | ||
484 | |||
485 | assert (((uintptr_t) table) % __alignof__ (table[0]) == 0); | ||
486 | assert (((uintptr_t) weights) % __alignof__ (weights[0]) == 0); | ||
487 | @@ -106,18 +500,13 @@ STRCOLL (s1, s2, l) | ||
488 | assert (((uintptr_t) indirect) % __alignof__ (indirect[0]) == 0); | ||
489 | |||
490 | /* We need this a few times. */ | ||
491 | - s1len = STRLEN (s1); | ||
492 | - s2len = STRLEN (s2); | ||
493 | + size_t s1len = STRLEN (s1); | ||
494 | + size_t s2len = STRLEN (s2); | ||
495 | |||
496 | /* Catch empty strings. */ | ||
497 | - if (__builtin_expect (s1len == 0, 0) || __builtin_expect (s2len == 0, 0)) | ||
498 | + if (__glibc_unlikely (s1len == 0) || __glibc_unlikely (s2len == 0)) | ||
499 | return (s1len != 0) - (s2len != 0); | ||
500 | |||
501 | - /* We need the elements of the strings as unsigned values since they | ||
502 | - are used as indeces. */ | ||
503 | - us1 = (const USTRING_TYPE *) s1; | ||
504 | - us2 = (const USTRING_TYPE *) s2; | ||
505 | - | ||
506 | /* Perform the first pass over the string and while doing this find | ||
507 | and store the weights for each character. Since we want this to | ||
508 | be as fast as possible we are using `alloca' to store the temporary | ||
509 | @@ -127,411 +516,124 @@ STRCOLL (s1, s2, l) | ||
510 | |||
511 | Please note that the localedef programs makes sure that `position' | ||
512 | is not used at the first level. */ | ||
513 | - if (! __libc_use_alloca ((s1len + s2len) * (sizeof (int32_t) + 1))) | ||
514 | - { | ||
515 | - idx1arr = (int32_t *) malloc ((s1len + s2len) * (sizeof (int32_t) + 1)); | ||
516 | - idx2arr = &idx1arr[s1len]; | ||
517 | - rule1arr = (unsigned char *) &idx2arr[s2len]; | ||
518 | - rule2arr = &rule1arr[s1len]; | ||
519 | - | ||
520 | - if (idx1arr == NULL) | ||
521 | - /* No memory. Well, go with the stack then. | ||
522 | - | ||
523 | - XXX Once this implementation is stable we will handle this | ||
524 | - differently. Instead of precomputing the indeces we will | ||
525 | - do this in time. This means, though, that this happens for | ||
526 | - every pass again. */ | ||
527 | - goto try_stack; | ||
528 | - use_malloc = 1; | ||
529 | - } | ||
530 | - else | ||
531 | - { | ||
532 | - try_stack: | ||
533 | - idx1arr = (int32_t *) alloca (s1len * sizeof (int32_t)); | ||
534 | - idx2arr = (int32_t *) alloca (s2len * sizeof (int32_t)); | ||
535 | - rule1arr = (unsigned char *) alloca (s1len); | ||
536 | - rule2arr = (unsigned char *) alloca (s2len); | ||
537 | - } | ||
538 | |||
539 | - idx1cnt = 0; | ||
540 | - idx2cnt = 0; | ||
541 | - idx1max = 0; | ||
542 | - idx2max = 0; | ||
543 | - idx1now = 0; | ||
544 | - idx2now = 0; | ||
545 | - backw1_stop = ~0ul; | ||
546 | - backw2_stop = ~0ul; | ||
547 | - backw1 = ~0ul; | ||
548 | - backw2 = ~0ul; | ||
549 | - seq1len = 0; | ||
550 | - seq2len = 0; | ||
551 | - position = rulesets[0] & sort_position; | ||
552 | - while (1) | ||
553 | - { | ||
554 | - val1 = 0; | ||
555 | - val2 = 0; | ||
556 | - | ||
557 | - /* Get the next non-IGNOREd element for string `s1'. */ | ||
558 | - if (seq1len == 0) | ||
559 | - do | ||
560 | - { | ||
561 | - ++val1; | ||
562 | - | ||
563 | - if (backw1_stop != ~0ul) | ||
564 | - { | ||
565 | - /* The is something pushed. */ | ||
566 | - if (backw1 == backw1_stop) | ||
567 | - { | ||
568 | - /* The last pushed character was handled. Continue | ||
569 | - with forward characters. */ | ||
570 | - if (idx1cnt < idx1max) | ||
571 | - { | ||
572 | - idx1now = idx1cnt; | ||
573 | - backw1_stop = ~0ul; | ||
574 | - } | ||
575 | - else | ||
576 | - /* Nothing anymore. The backward sequence ended with | ||
577 | - the last sequence in the string. Note that seq1len | ||
578 | - is still zero. */ | ||
579 | - break; | ||
580 | - } | ||
581 | - else | ||
582 | - idx1now = --backw1; | ||
583 | - } | ||
584 | - else | ||
585 | - { | ||
586 | - backw1_stop = idx1max; | ||
587 | - | ||
588 | - while (*us1 != L('\0')) | ||
589 | - { | ||
590 | - int32_t tmp = findidx (&us1, -1); | ||
591 | - rule1arr[idx1max] = tmp >> 24; | ||
592 | - idx1arr[idx1max] = tmp & 0xffffff; | ||
593 | - idx1cnt = idx1max++; | ||
594 | - | ||
595 | - if ((rulesets[rule1arr[idx1cnt] * nrules] | ||
596 | - & sort_backward) == 0) | ||
597 | - /* No more backward characters to push. */ | ||
598 | - break; | ||
599 | - ++idx1cnt; | ||
600 | - } | ||
601 | - | ||
602 | - if (backw1_stop >= idx1cnt) | ||
603 | - { | ||
604 | - /* No sequence at all or just one. */ | ||
605 | - if (idx1cnt == idx1max || backw1_stop > idx1cnt) | ||
606 | - /* Note that seq1len is still zero. */ | ||
607 | - break; | ||
608 | - | ||
609 | - backw1_stop = ~0ul; | ||
610 | - idx1now = idx1cnt; | ||
611 | - } | ||
612 | - else | ||
613 | - /* We pushed backward sequences. */ | ||
614 | - idx1now = backw1 = idx1cnt - 1; | ||
615 | - } | ||
616 | - } | ||
617 | - while ((seq1len = weights[idx1arr[idx1now]++]) == 0); | ||
618 | - | ||
619 | - /* And the same for string `s2'. */ | ||
620 | - if (seq2len == 0) | ||
621 | - do | ||
622 | - { | ||
623 | - ++val2; | ||
624 | - | ||
625 | - if (backw2_stop != ~0ul) | ||
626 | - { | ||
627 | - /* The is something pushed. */ | ||
628 | - if (backw2 == backw2_stop) | ||
629 | - { | ||
630 | - /* The last pushed character was handled. Continue | ||
631 | - with forward characters. */ | ||
632 | - if (idx2cnt < idx2max) | ||
633 | - { | ||
634 | - idx2now = idx2cnt; | ||
635 | - backw2_stop = ~0ul; | ||
636 | - } | ||
637 | - else | ||
638 | - /* Nothing anymore. The backward sequence ended with | ||
639 | - the last sequence in the string. Note that seq2len | ||
640 | - is still zero. */ | ||
641 | - break; | ||
642 | - } | ||
643 | - else | ||
644 | - idx2now = --backw2; | ||
645 | - } | ||
646 | - else | ||
647 | - { | ||
648 | - backw2_stop = idx2max; | ||
649 | - | ||
650 | - while (*us2 != L('\0')) | ||
651 | - { | ||
652 | - int32_t tmp = findidx (&us2, -1); | ||
653 | - rule2arr[idx2max] = tmp >> 24; | ||
654 | - idx2arr[idx2max] = tmp & 0xffffff; | ||
655 | - idx2cnt = idx2max++; | ||
656 | - | ||
657 | - if ((rulesets[rule2arr[idx2cnt] * nrules] | ||
658 | - & sort_backward) == 0) | ||
659 | - /* No more backward characters to push. */ | ||
660 | - break; | ||
661 | - ++idx2cnt; | ||
662 | - } | ||
663 | - | ||
664 | - if (backw2_stop >= idx2cnt) | ||
665 | - { | ||
666 | - /* No sequence at all or just one. */ | ||
667 | - if (idx2cnt == idx2max || backw2_stop > idx2cnt) | ||
668 | - /* Note that seq1len is still zero. */ | ||
669 | - break; | ||
670 | - | ||
671 | - backw2_stop = ~0ul; | ||
672 | - idx2now = idx2cnt; | ||
673 | - } | ||
674 | - else | ||
675 | - /* We pushed backward sequences. */ | ||
676 | - idx2now = backw2 = idx2cnt - 1; | ||
677 | - } | ||
678 | - } | ||
679 | - while ((seq2len = weights[idx2arr[idx2now]++]) == 0); | ||
680 | - | ||
681 | - /* See whether any or both strings are empty. */ | ||
682 | - if (seq1len == 0 || seq2len == 0) | ||
683 | - { | ||
684 | - if (seq1len == seq2len) | ||
685 | - /* Both ended. So far so good, both strings are equal at the | ||
686 | - first level. */ | ||
687 | - break; | ||
688 | - | ||
689 | - /* This means one string is shorter than the other. Find out | ||
690 | - which one and return an appropriate value. */ | ||
691 | - result = seq1len == 0 ? -1 : 1; | ||
692 | - goto free_and_return; | ||
693 | - } | ||
694 | + coll_seq seq1, seq2; | ||
695 | + bool use_malloc = false; | ||
696 | + int result = 0; | ||
697 | |||
698 | - /* Test for position if necessary. */ | ||
699 | - if (position && val1 != val2) | ||
700 | - { | ||
701 | - result = val1 - val2; | ||
702 | - goto free_and_return; | ||
703 | - } | ||
704 | + memset (&seq1, 0, sizeof (seq1)); | ||
705 | + seq2 = seq1; | ||
706 | |||
707 | - /* Compare the two sequences. */ | ||
708 | - do | ||
709 | - { | ||
710 | - if (weights[idx1arr[idx1now]] != weights[idx2arr[idx2now]]) | ||
711 | - { | ||
712 | - /* The sequences differ. */ | ||
713 | - result = weights[idx1arr[idx1now]] - weights[idx2arr[idx2now]]; | ||
714 | - goto free_and_return; | ||
715 | - } | ||
716 | + size_t size_max = SIZE_MAX / (sizeof (int32_t) + 1); | ||
717 | |||
718 | - /* Increment the offsets. */ | ||
719 | - ++idx1arr[idx1now]; | ||
720 | - ++idx2arr[idx2now]; | ||
721 | + /* If the strings are long enough to cause overflow in the size request, then | ||
722 | + skip the allocation and proceed with the non-cached routines. */ | ||
723 | + if (MIN (s1len, s2len) > size_max | ||
724 | + || MAX (s1len, s2len) > size_max - MIN (s1len, s2len)) | ||
725 | + goto begin_collate; | ||
726 | |||
727 | - --seq1len; | ||
728 | - --seq2len; | ||
729 | - } | ||
730 | - while (seq1len > 0 && seq2len > 0); | ||
731 | + if (! __libc_use_alloca ((s1len + s2len) * (sizeof (int32_t) + 1))) | ||
732 | + { | ||
733 | + seq1.idxarr = (int32_t *) malloc ((s1len + s2len) * (sizeof (int32_t) + 1)); | ||
734 | |||
735 | - if (position && seq1len != seq2len) | ||
736 | + /* If we failed to allocate memory, we leave everything as NULL so that | ||
737 | + we use the nocache version of traversal and comparison functions. */ | ||
738 | + if (seq1.idxarr != NULL) | ||
739 | { | ||
740 | - result = seq1len - seq2len; | ||
741 | - goto free_and_return; | ||
742 | + seq2.idxarr = &seq1.idxarr[s1len]; | ||
743 | + seq1.rulearr = (unsigned char *) &seq2.idxarr[s2len]; | ||
744 | + seq2.rulearr = &seq1.rulearr[s1len]; | ||
745 | + use_malloc = true; | ||
746 | } | ||
747 | } | ||
748 | + else | ||
749 | + { | ||
750 | + seq1.idxarr = (int32_t *) alloca (s1len * sizeof (int32_t)); | ||
751 | + seq2.idxarr = (int32_t *) alloca (s2len * sizeof (int32_t)); | ||
752 | + seq1.rulearr = (unsigned char *) alloca (s1len); | ||
753 | + seq2.rulearr = (unsigned char *) alloca (s2len); | ||
754 | + } | ||
755 | |||
756 | - /* Now the remaining passes over the weights. We now use the | ||
757 | - indeces we found before. */ | ||
758 | - for (pass = 1; pass < nrules; ++pass) | ||
759 | + int rule; | ||
760 | + | ||
761 | + begin_collate: | ||
762 | + rule = 0; | ||
763 | + /* Cache values in the first pass and if needed, use them in subsequent | ||
764 | + passes. */ | ||
765 | + for (int pass = 0; pass < nrules; ++pass) | ||
766 | { | ||
767 | + seq1.idxcnt = 0; | ||
768 | + seq1.idx = 0; | ||
769 | + seq2.idx = 0; | ||
770 | + seq1.backw_stop = ~0ul; | ||
771 | + seq1.backw = ~0ul; | ||
772 | + seq2.idxcnt = 0; | ||
773 | + seq2.backw_stop = ~0ul; | ||
774 | + seq2.backw = ~0ul; | ||
775 | + | ||
776 | + /* We need the elements of the strings as unsigned values since they | ||
777 | + are used as indices. */ | ||
778 | + seq1.us = (const USTRING_TYPE *) s1; | ||
779 | + seq2.us = (const USTRING_TYPE *) s2; | ||
780 | + | ||
781 | /* We assume that if a rule has defined `position' in one section | ||
782 | this is true for all of them. */ | ||
783 | - idx1cnt = 0; | ||
784 | - idx2cnt = 0; | ||
785 | - backw1_stop = ~0ul; | ||
786 | - backw2_stop = ~0ul; | ||
787 | - backw1 = ~0ul; | ||
788 | - backw2 = ~0ul; | ||
789 | - position = rulesets[rule1arr[0] * nrules + pass] & sort_position; | ||
790 | + int position = rulesets[rule * nrules + pass] & sort_position; | ||
791 | |||
792 | while (1) | ||
793 | { | ||
794 | - val1 = 0; | ||
795 | - val2 = 0; | ||
796 | - | ||
797 | - /* Get the next non-IGNOREd element for string `s1'. */ | ||
798 | - if (seq1len == 0) | ||
799 | - do | ||
800 | - { | ||
801 | - ++val1; | ||
802 | - | ||
803 | - if (backw1_stop != ~0ul) | ||
804 | - { | ||
805 | - /* The is something pushed. */ | ||
806 | - if (backw1 == backw1_stop) | ||
807 | - { | ||
808 | - /* The last pushed character was handled. Continue | ||
809 | - with forward characters. */ | ||
810 | - if (idx1cnt < idx1max) | ||
811 | - { | ||
812 | - idx1now = idx1cnt; | ||
813 | - backw1_stop = ~0ul; | ||
814 | - } | ||
815 | - else | ||
816 | - { | ||
817 | - /* Nothing anymore. The backward sequence | ||
818 | - ended with the last sequence in the string. */ | ||
819 | - idx1now = ~0ul; | ||
820 | - break; | ||
821 | - } | ||
822 | - } | ||
823 | - else | ||
824 | - idx1now = --backw1; | ||
825 | - } | ||
826 | - else | ||
827 | - { | ||
828 | - backw1_stop = idx1cnt; | ||
829 | - | ||
830 | - while (idx1cnt < idx1max) | ||
831 | - { | ||
832 | - if ((rulesets[rule1arr[idx1cnt] * nrules + pass] | ||
833 | - & sort_backward) == 0) | ||
834 | - /* No more backward characters to push. */ | ||
835 | - break; | ||
836 | - ++idx1cnt; | ||
837 | - } | ||
838 | - | ||
839 | - if (backw1_stop == idx1cnt) | ||
840 | - { | ||
841 | - /* No sequence at all or just one. */ | ||
842 | - if (idx1cnt == idx1max) | ||
843 | - /* Note that seq1len is still zero. */ | ||
844 | - break; | ||
845 | - | ||
846 | - backw1_stop = ~0ul; | ||
847 | - idx1now = idx1cnt++; | ||
848 | - } | ||
849 | - else | ||
850 | - /* We pushed backward sequences. */ | ||
851 | - idx1now = backw1 = idx1cnt - 1; | ||
852 | - } | ||
853 | - } | ||
854 | - while ((seq1len = weights[idx1arr[idx1now]++]) == 0); | ||
855 | - | ||
856 | - /* And the same for string `s2'. */ | ||
857 | - if (seq2len == 0) | ||
858 | - do | ||
859 | - { | ||
860 | - ++val2; | ||
861 | - | ||
862 | - if (backw2_stop != ~0ul) | ||
863 | - { | ||
864 | - /* The is something pushed. */ | ||
865 | - if (backw2 == backw2_stop) | ||
866 | - { | ||
867 | - /* The last pushed character was handled. Continue | ||
868 | - with forward characters. */ | ||
869 | - if (idx2cnt < idx2max) | ||
870 | - { | ||
871 | - idx2now = idx2cnt; | ||
872 | - backw2_stop = ~0ul; | ||
873 | - } | ||
874 | - else | ||
875 | - { | ||
876 | - /* Nothing anymore. The backward sequence | ||
877 | - ended with the last sequence in the string. */ | ||
878 | - idx2now = ~0ul; | ||
879 | - break; | ||
880 | - } | ||
881 | - } | ||
882 | - else | ||
883 | - idx2now = --backw2; | ||
884 | - } | ||
885 | - else | ||
886 | - { | ||
887 | - backw2_stop = idx2cnt; | ||
888 | - | ||
889 | - while (idx2cnt < idx2max) | ||
890 | - { | ||
891 | - if ((rulesets[rule2arr[idx2cnt] * nrules + pass] | ||
892 | - & sort_backward) == 0) | ||
893 | - /* No more backward characters to push. */ | ||
894 | - break; | ||
895 | - ++idx2cnt; | ||
896 | - } | ||
897 | - | ||
898 | - if (backw2_stop == idx2cnt) | ||
899 | - { | ||
900 | - /* No sequence at all or just one. */ | ||
901 | - if (idx2cnt == idx2max) | ||
902 | - /* Note that seq2len is still zero. */ | ||
903 | - break; | ||
904 | - | ||
905 | - backw2_stop = ~0ul; | ||
906 | - idx2now = idx2cnt++; | ||
907 | - } | ||
908 | - else | ||
909 | - /* We pushed backward sequences. */ | ||
910 | - idx2now = backw2 = idx2cnt - 1; | ||
911 | - } | ||
912 | - } | ||
913 | - while ((seq2len = weights[idx2arr[idx2now]++]) == 0); | ||
914 | + if (__glibc_unlikely (seq1.idxarr == NULL)) | ||
915 | + { | ||
916 | + get_next_seq_nocache (&seq1, nrules, rulesets, weights, table, | ||
917 | + extra, indirect, pass); | ||
918 | + get_next_seq_nocache (&seq2, nrules, rulesets, weights, table, | ||
919 | + extra, indirect, pass); | ||
920 | + } | ||
921 | + else if (pass == 0) | ||
922 | + { | ||
923 | + get_next_seq (&seq1, nrules, rulesets, weights, table, extra, | ||
924 | + indirect); | ||
925 | + get_next_seq (&seq2, nrules, rulesets, weights, table, extra, | ||
926 | + indirect); | ||
927 | + } | ||
928 | + else | ||
929 | + { | ||
930 | + get_next_seq_cached (&seq1, nrules, pass, rulesets, weights); | ||
931 | + get_next_seq_cached (&seq2, nrules, pass, rulesets, weights); | ||
932 | + } | ||
933 | |||
934 | /* See whether any or both strings are empty. */ | ||
935 | - if (seq1len == 0 || seq2len == 0) | ||
936 | + if (seq1.len == 0 || seq2.len == 0) | ||
937 | { | ||
938 | - if (seq1len == seq2len) | ||
939 | + if (seq1.len == seq2.len) | ||
940 | /* Both ended. So far so good, both strings are equal | ||
941 | at this level. */ | ||
942 | break; | ||
943 | |||
944 | /* This means one string is shorter than the other. Find out | ||
945 | which one and return an appropriate value. */ | ||
946 | - result = seq1len == 0 ? -1 : 1; | ||
947 | + result = seq1.len == 0 ? -1 : 1; | ||
948 | goto free_and_return; | ||
949 | } | ||
950 | |||
951 | - /* Test for position if necessary. */ | ||
952 | - if (position && val1 != val2) | ||
953 | - { | ||
954 | - result = val1 - val2; | ||
955 | - goto free_and_return; | ||
956 | - } | ||
957 | - | ||
958 | - /* Compare the two sequences. */ | ||
959 | - do | ||
960 | - { | ||
961 | - if (weights[idx1arr[idx1now]] != weights[idx2arr[idx2now]]) | ||
962 | - { | ||
963 | - /* The sequences differ. */ | ||
964 | - result = (weights[idx1arr[idx1now]] | ||
965 | - - weights[idx2arr[idx2now]]); | ||
966 | - goto free_and_return; | ||
967 | - } | ||
968 | - | ||
969 | - /* Increment the offsets. */ | ||
970 | - ++idx1arr[idx1now]; | ||
971 | - ++idx2arr[idx2now]; | ||
972 | - | ||
973 | - --seq1len; | ||
974 | - --seq2len; | ||
975 | - } | ||
976 | - while (seq1len > 0 && seq2len > 0); | ||
977 | - | ||
978 | - if (position && seq1len != seq2len) | ||
979 | - { | ||
980 | - result = seq1len - seq2len; | ||
981 | - goto free_and_return; | ||
982 | - } | ||
983 | + if (__glibc_unlikely (seq1.idxarr == NULL)) | ||
984 | + result = do_compare_nocache (&seq1, &seq2, position, weights); | ||
985 | + else | ||
986 | + result = do_compare (&seq1, &seq2, position, weights); | ||
987 | + if (result != 0) | ||
988 | + goto free_and_return; | ||
989 | } | ||
990 | + | ||
991 | + if (__glibc_likely (seq1.rulearr != NULL)) | ||
992 | + rule = seq1.rulearr[0]; | ||
993 | + else | ||
994 | + rule = seq1.rule; | ||
995 | } | ||
996 | |||
997 | /* Free the memory if needed. */ | ||
998 | free_and_return: | ||
999 | if (use_malloc) | ||
1000 | - free (idx1arr); | ||
1001 | + free (seq1.idxarr); | ||
1002 | |||
1003 | return result; | ||
1004 | } |