Magellan Linux

Contents of /trunk/coreutils/patches-5.94/coreutils-5.94-i18n.patch

Parent Directory Parent Directory | Revision Log Revision Log


Revision 115 - (show annotations) (download)
Sun Mar 18 15:57:37 2007 UTC (17 years, 1 month ago) by niro
File size: 108739 byte(s)
patches for 5.94

1 --- coreutils-5.93/lib/linebuffer.h.i18n 2005-05-14 08:58:06.000000000 +0100
2 +++ coreutils-5.93/lib/linebuffer.h 2005-12-23 08:53:01.000000000 +0000
3 @@ -22,6 +22,11 @@
4
5 # include <stdio.h>
6
7 +/* Get mbstate_t. */
8 +# if HAVE_WCHAR_H
9 +# include <wchar.h>
10 +# endif
11 +
12 /* A `struct linebuffer' holds a line of text. */
13
14 struct linebuffer
15 @@ -29,6 +34,9 @@
16 size_t size; /* Allocated. */
17 size_t length; /* Used. */
18 char *buffer;
19 +# if HAVE_WCHAR_H
20 + mbstate_t state;
21 +# endif
22 };
23
24 /* Initialize linebuffer LINEBUFFER for use. */
25 --- coreutils-5.93/src/cut.c.i18n 2005-08-12 08:16:25.000000000 +0100
26 +++ coreutils-5.93/src/cut.c 2005-12-23 08:53:01.000000000 +0000
27 @@ -29,6 +29,11 @@
28 #include <assert.h>
29 #include <getopt.h>
30 #include <sys/types.h>
31 +
32 +/* Get mbstate_t, mbrtowc(). */
33 +#if HAVE_WCHAR_H
34 +# include <wchar.h>
35 +#endif
36 #include "system.h"
37
38 #include "error.h"
39 @@ -37,6 +42,18 @@
40 #include "quote.h"
41 #include "xstrndup.h"
42
43 +/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
44 + installation; work around this configuration error. */
45 +#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
46 +# undef MB_LEN_MAX
47 +# define MB_LEN_MAX 16
48 +#endif
49 +
50 +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
51 +#if HAVE_MBRTOWC && defined mbstate_t
52 +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
53 +#endif
54 +
55 /* The official name of this program (e.g., no `g' prefix). */
56 #define PROGRAM_NAME "cut"
57
58 @@ -67,6 +84,52 @@
59 } \
60 while (0)
61
62 +/* Refill the buffer BUF to get a multibyte character. */
63 +#define REFILL_BUFFER(BUF, BUFPOS, BUFLEN, STREAM) \
64 + do \
65 + { \
66 + if (BUFLEN < MB_LEN_MAX && !feof (STREAM) && !ferror (STREAM)) \
67 + { \
68 + memmove (BUF, BUFPOS, BUFLEN); \
69 + BUFLEN += fread (BUF + BUFLEN, sizeof(char), BUFSIZ, STREAM); \
70 + BUFPOS = BUF; \
71 + } \
72 + } \
73 + while (0)
74 +
75 +/* Get wide character on BUFPOS. BUFPOS is not included after that.
76 + If byte sequence is not valid as a character, CONVFAIL is 1. Otherwise 0. */
77 +#define GET_NEXT_WC_FROM_BUFFER(WC, BUFPOS, BUFLEN, MBLENGTH, STATE, CONVFAIL) \
78 + do \
79 + { \
80 + mbstate_t state_bak; \
81 + \
82 + if (BUFLEN < 1) \
83 + { \
84 + WC = WEOF; \
85 + break; \
86 + } \
87 + \
88 + /* Get a wide character. */ \
89 + CONVFAIL = 0; \
90 + state_bak = STATE; \
91 + MBLENGTH = mbrtowc ((wchar_t *)&WC, BUFPOS, BUFLEN, &STATE); \
92 + \
93 + switch (MBLENGTH) \
94 + { \
95 + case (size_t)-1: \
96 + case (size_t)-2: \
97 + CONVFAIL++; \
98 + STATE = state_bak; \
99 + /* Fall througn. */ \
100 + \
101 + case 0: \
102 + MBLENGTH = 1; \
103 + break; \
104 + } \
105 + } \
106 + while (0)
107 +
108 struct range_pair
109 {
110 size_t lo;
111 @@ -85,7 +148,7 @@
112 /* The number of bytes allocated for FIELD_1_BUFFER. */
113 static size_t field_1_bufsize;
114
115 -/* The largest field or byte index used as an endpoint of a closed
116 +/* The largest byte, character or field index used as an endpoint of a closed
117 or degenerate range specification; this doesn't include the starting
118 index of right-open-ended ranges. For example, with either range spec
119 `2-5,9-', `2-3,5,9-' this variable would be set to 5. */
120 @@ -97,10 +160,11 @@
121
122 /* This is a bit vector.
123 In byte mode, which bytes to output.
124 + In character mode, which characters to output.
125 In field mode, which DELIM-separated fields to output.
126 - Both bytes and fields are numbered starting with 1,
127 + Bytes, characters and fields are numbered starting with 1,
128 so the zeroth bit of this array is unused.
129 - A field or byte K has been selected if
130 + A byte, character or field K has been selected if
131 (K <= MAX_RANGE_ENDPOINT and is_printable_field(K))
132 || (EOL_RANGE_START > 0 && K >= EOL_RANGE_START). */
133 static unsigned char *printable_field;
134 @@ -109,9 +173,12 @@
135 {
136 undefined_mode,
137
138 - /* Output characters that are in the given bytes. */
139 + /* Output bytes that are at the given positions. */
140 byte_mode,
141
142 + /* Output characters that are at the given positions. */
143 + character_mode,
144 +
145 /* Output the given delimeter-separated fields. */
146 field_mode
147 };
148 @@ -121,6 +188,13 @@
149
150 static enum operating_mode operating_mode;
151
152 +/* If nonzero, when in byte mode, don't split multibyte characters. */
153 +static int byte_mode_character_aware;
154 +
155 +/* If nonzero, the function for single byte locale is work
156 + if this program runs on multibyte locale. */
157 +static int force_singlebyte_mode;
158 +
159 /* If true do not output lines containing no delimeter characters.
160 Otherwise, all such lines are printed. This option is valid only
161 with field mode. */
162 @@ -132,6 +206,9 @@
163
164 /* The delimeter character for field mode. */
165 static unsigned char delim;
166 +#if HAVE_WCHAR_H
167 +static wchar_t wcdelim;
168 +#endif
169
170 /* True if the --output-delimiter=STRING option was specified. */
171 static bool output_delimiter_specified;
172 @@ -205,7 +282,7 @@
173 -f, --fields=LIST select only these fields; also print any line\n\
174 that contains no delimiter character, unless\n\
175 the -s option is specified\n\
176 - -n (ignored)\n\
177 + -n with -b: don't split multibyte characters\n\
178 "), stdout);
179 fputs (_("\
180 --complement complement the set of selected bytes, characters\n\
181 @@ -360,7 +437,7 @@
182 in_digits = false;
183 /* Starting a range. */
184 if (dash_found)
185 - FATAL_ERROR (_("invalid byte or field list"));
186 + FATAL_ERROR (_("invalid byte, character or field list"));
187 dash_found = true;
188 fieldstr++;
189
190 @@ -385,14 +462,16 @@
191 if (value == 0)
192 {
193 /* `n-'. From `initial' to end of line. */
194 - eol_range_start = initial;
195 + if (eol_range_start == 0 ||
196 + (eol_range_start != 0 && eol_range_start > initial))
197 + eol_range_start = initial;
198 field_found = true;
199 }
200 else
201 {
202 /* `m-n' or `-n' (1-n). */
203 if (value < initial)
204 - FATAL_ERROR (_("invalid byte or field list"));
205 + FATAL_ERROR (_("invalid byte, character or field list"));
206
207 /* Is there already a range going to end of line? */
208 if (eol_range_start != 0)
209 @@ -465,6 +544,9 @@
210 if (operating_mode == byte_mode)
211 error (0, 0,
212 _("byte offset %s is too large"), quote (bad_num));
213 + else if (operating_mode == character_mode)
214 + error (0, 0,
215 + _("character offset %s is too large"), quote (bad_num));
216 else
217 error (0, 0,
218 _("field number %s is too large"), quote (bad_num));
219 @@ -475,7 +557,7 @@
220 fieldstr++;
221 }
222 else
223 - FATAL_ERROR (_("invalid byte or field list"));
224 + FATAL_ERROR (_("invalid byte, character or field list"));
225 }
226
227 max_range_endpoint = 0;
228 @@ -568,6 +650,63 @@
229 }
230 }
231
232 +#if HAVE_MBRTOWC
233 +/* This function is in use for the following case.
234 +
235 + 1. Read from the stream STREAM, printing to standard output any selected
236 + characters.
237 +
238 + 2. Read from stream STREAM, printing to standard output any selected bytes,
239 + without splitting multibyte characters. */
240 +
241 +static void
242 +cut_characters_or_cut_bytes_no_split (FILE *stream)
243 +{
244 + int idx; /* number of bytes or characters in the line so far. */
245 + char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
246 + char *bufpos; /* Next read position of BUF. */
247 + size_t buflen; /* The length of the byte sequence in buf. */
248 + wint_t wc = 0; /* A gotten wide character. */
249 + size_t mblength; /* The byte size of a multibyte character which shows
250 + as same character as WC. */
251 + mbstate_t state; /* State of the stream. */
252 + int convfail; /* 1, when conversion is failed. Otherwise 0. */
253 +
254 + idx = 0;
255 + buflen = 0;
256 + bufpos = buf;
257 + memset (&state, '\0', sizeof(mbstate_t));
258 +
259 + while (1)
260 + {
261 + REFILL_BUFFER (buf, bufpos, buflen, stream);
262 +
263 + GET_NEXT_WC_FROM_BUFFER (wc, bufpos, buflen, mblength, state, convfail);
264 +
265 + if (wc == WEOF)
266 + {
267 + if (idx > 0)
268 + putchar ('\n');
269 + break;
270 + }
271 + else if (wc == L'\n')
272 + {
273 + putchar ('\n');
274 + idx = 0;
275 + }
276 + else
277 + {
278 + idx += (operating_mode == byte_mode) ? mblength : 1;
279 + if (print_kth (idx, NULL))
280 + fwrite (bufpos, mblength, sizeof(char), stdout);
281 + }
282 +
283 + buflen -= mblength;
284 + bufpos += mblength;
285 + }
286 +}
287 +#endif
288 +
289 /* Read from stream STREAM, printing to standard output any selected fields. */
290
291 static void
292 @@ -689,13 +828,192 @@
293 }
294 }
295
296 +#if HAVE_MBRTOWC
297 +static void
298 +cut_fields_mb (FILE *stream)
299 +{
300 + int c;
301 + unsigned int field_idx;
302 + int found_any_selected_field;
303 + int buffer_first_field;
304 + int empty_input;
305 + char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
306 + char *bufpos; /* Next read position of BUF. */
307 + size_t buflen; /* The length of the byte sequence in buf. */
308 + wint_t wc = 0; /* A gotten wide character. */
309 + size_t mblength; /* The byte size of a multibyte character which shows
310 + as same character as WC. */
311 + mbstate_t state; /* State of the stream. */
312 + int convfail; /* 1, when conversion is failed. Otherwise 0. */
313 +
314 + found_any_selected_field = 0;
315 + field_idx = 1;
316 + bufpos = buf;
317 + buflen = 0;
318 + memset (&state, '\0', sizeof(mbstate_t));
319 +
320 + c = getc (stream);
321 + empty_input = (c == EOF);
322 + if (c != EOF)
323 + ungetc (c, stream);
324 + else
325 + wc = WEOF;
326 +
327 + /* To support the semantics of the -s flag, we may have to buffer
328 + all of the first field to determine whether it is `delimited.'
329 + But that is unnecessary if all non-delimited lines must be printed
330 + and the first field has been selected, or if non-delimited lines
331 + must be suppressed and the first field has *not* been selected.
332 + That is because a non-delimited line has exactly one field. */
333 + buffer_first_field = (suppress_non_delimited ^ !print_kth (1, NULL));
334 +
335 + while (1)
336 + {
337 + if (field_idx == 1 && buffer_first_field)
338 + {
339 + int len = 0;
340 +
341 + while (1)
342 + {
343 + REFILL_BUFFER (buf, bufpos, buflen, stream);
344 +
345 + GET_NEXT_WC_FROM_BUFFER
346 + (wc, bufpos, buflen, mblength, state, convfail);
347 +
348 + if (wc == WEOF)
349 + break;
350 +
351 + field_1_buffer = xrealloc (field_1_buffer, len + mblength);
352 + memcpy (field_1_buffer + len, bufpos, mblength);
353 + len += mblength;
354 + buflen -= mblength;
355 + bufpos += mblength;
356 +
357 + if (!convfail && (wc == L'\n' || wc == wcdelim))
358 + break;
359 + }
360 +
361 + if (wc == WEOF)
362 + break;
363 +
364 + /* If the first field extends to the end of line (it is not
365 + delimited) and we are printing all non-delimited lines,
366 + print this one. */
367 + if (convfail || (!convfail && wc != wcdelim))
368 + {
369 + if (suppress_non_delimited)
370 + {
371 + /* Empty. */
372 + }
373 + else
374 + {
375 + fwrite (field_1_buffer, sizeof (char), len, stdout);
376 + /* Make sure the output line is newline terminated. */
377 + if (convfail || (!convfail && wc != L'\n'))
378 + putchar ('\n');
379 + }
380 + continue;
381 + }
382 +
383 + if (print_kth (1, NULL))
384 + {
385 + /* Print the field, but not the trailing delimiter. */
386 + fwrite (field_1_buffer, sizeof (char), len - 1, stdout);
387 + found_any_selected_field = 1;
388 + }
389 + ++field_idx;
390 + }
391 +
392 + if (wc != WEOF)
393 + {
394 + if (print_kth (field_idx, NULL))
395 + {
396 + if (found_any_selected_field)
397 + {
398 + fwrite (output_delimiter_string, sizeof (char),
399 + output_delimiter_length, stdout);
400 + }
401 + found_any_selected_field = 1;
402 + }
403 +
404 + while (1)
405 + {
406 + REFILL_BUFFER (buf, bufpos, buflen, stream);
407 +
408 + GET_NEXT_WC_FROM_BUFFER
409 + (wc, bufpos, buflen, mblength, state, convfail);
410 +
411 + if (wc == WEOF)
412 + break;
413 + else if (!convfail && (wc == wcdelim || wc == L'\n'))
414 + {
415 + buflen -= mblength;
416 + bufpos += mblength;
417 + break;
418 + }
419 +
420 + if (print_kth (field_idx, NULL))
421 + fwrite (bufpos, mblength, sizeof(char), stdout);
422 +
423 + buflen -= mblength;
424 + bufpos += mblength;
425 + }
426 + }
427 +
428 + if ((!convfail || wc == L'\n') && buflen < 1)
429 + wc = WEOF;
430 +
431 + if (!convfail && wc == wcdelim)
432 + ++field_idx;
433 + else if (wc == WEOF || (!convfail && wc == L'\n'))
434 + {
435 + if (found_any_selected_field
436 + || (!empty_input && !(suppress_non_delimited && field_idx == 1)))
437 + putchar ('\n');
438 + if (wc == WEOF)
439 + break;
440 + field_idx = 1;
441 + found_any_selected_field = 0;
442 + }
443 + }
444 +}
445 +#endif
446 +
447 static void
448 cut_stream (FILE *stream)
449 {
450 - if (operating_mode == byte_mode)
451 - cut_bytes (stream);
452 +#if HAVE_MBRTOWC
453 + if (MB_CUR_MAX > 1 && !force_singlebyte_mode)
454 + {
455 + switch (operating_mode)
456 + {
457 + case byte_mode:
458 + if (byte_mode_character_aware)
459 + cut_characters_or_cut_bytes_no_split (stream);
460 + else
461 + cut_bytes (stream);
462 + break;
463 +
464 + case character_mode:
465 + cut_characters_or_cut_bytes_no_split (stream);
466 + break;
467 +
468 + case field_mode:
469 + cut_fields_mb (stream);
470 + break;
471 +
472 + default:
473 + abort ();
474 + }
475 + }
476 else
477 - cut_fields (stream);
478 +#endif
479 + {
480 + if (operating_mode == field_mode)
481 + cut_fields (stream);
482 + else
483 + cut_bytes (stream);
484 + }
485 }
486
487 /* Process file FILE to standard output.
488 @@ -745,6 +1063,8 @@
489 bool ok;
490 bool delim_specified = false;
491 char *spec_list_string IF_LINT(= NULL);
492 + char mbdelim[MB_LEN_MAX + 1];
493 + size_t delimlen = 0;
494
495 initialize_main (&argc, &argv);
496 program_name = argv[0];
497 @@ -767,7 +1087,6 @@
498 switch (optc)
499 {
500 case 'b':
501 - case 'c':
502 /* Build the byte list. */
503 if (operating_mode != undefined_mode)
504 FATAL_ERROR (_("only one type of list may be specified"));
505 @@ -775,6 +1094,14 @@
506 spec_list_string = optarg;
507 break;
508
509 + case 'c':
510 + /* Build the character list. */
511 + if (operating_mode != undefined_mode)
512 + FATAL_ERROR (_("only one type of list may be specified"));
513 + operating_mode = character_mode;
514 + spec_list_string = optarg;
515 + break;
516 +
517 case 'f':
518 /* Build the field list. */
519 if (operating_mode != undefined_mode)
520 @@ -786,10 +1113,35 @@
521 case 'd':
522 /* New delimiter. */
523 /* Interpret -d '' to mean `use the NUL byte as the delimiter.' */
524 - if (optarg[0] != '\0' && optarg[1] != '\0')
525 - FATAL_ERROR (_("the delimiter must be a single character"));
526 - delim = optarg[0];
527 - delim_specified = true;
528 +#if HAVE_MBRTOWC
529 + {
530 + if(MB_CUR_MAX > 1)
531 + {
532 + mbstate_t state;
533 +
534 + memset (&state, '\0', sizeof(mbstate_t));
535 + delimlen = mbrtowc (&wcdelim, optarg, strnlen(optarg, MB_LEN_MAX), &state);
536 +
537 + if (delimlen == (size_t)-1 || delimlen == (size_t)-2)
538 + ++force_singlebyte_mode;
539 + else
540 + {
541 + delimlen = (delimlen < 1) ? 1 : delimlen;
542 + if (wcdelim != L'\0' && *(optarg + delimlen) != '\0')
543 + FATAL_ERROR (_("the delimiter must be a single character"));
544 + memcpy (mbdelim, optarg, delimlen);
545 + }
546 + }
547 +
548 + if (MB_CUR_MAX <= 1 || force_singlebyte_mode)
549 +#endif
550 + {
551 + if (optarg[0] != '\0' && optarg[1] != '\0')
552 + FATAL_ERROR (_("the delimiter must be a single character"));
553 + delim = (unsigned char) optarg[0];
554 + }
555 + delim_specified = true;
556 + }
557 break;
558
559 case OUTPUT_DELIMITER_OPTION:
560 @@ -802,6 +1154,7 @@
561 break;
562
563 case 'n':
564 + byte_mode_character_aware = 1;
565 break;
566
567 case 's':
568 @@ -824,7 +1177,7 @@
569 if (operating_mode == undefined_mode)
570 FATAL_ERROR (_("you must specify a list of bytes, characters, or fields"));
571
572 - if (delim != '\0' && operating_mode != field_mode)
573 + if (delim_specified && operating_mode != field_mode)
574 FATAL_ERROR (_("an input delimiter may be specified only\
575 when operating on fields"));
576
577 @@ -851,15 +1204,34 @@
578 }
579
580 if (!delim_specified)
581 - delim = '\t';
582 + {
583 + delim = '\t';
584 +#ifdef HAVE_MBRTOWC
585 + wcdelim = L'\t';
586 + mbdelim[0] = '\t';
587 + mbdelim[1] = '\0';
588 + delimlen = 1;
589 +#endif
590 + }
591
592 if (output_delimiter_string == NULL)
593 {
594 - static char dummy[2];
595 - dummy[0] = delim;
596 - dummy[1] = '\0';
597 - output_delimiter_string = dummy;
598 - output_delimiter_length = 1;
599 +#ifdef HAVE_MBRTOWC
600 + if (MB_CUR_MAX > 1 && !force_singlebyte_mode)
601 + {
602 + output_delimiter_string = xstrdup(mbdelim);
603 + output_delimiter_length = delimlen;
604 + }
605 +
606 + if (MB_CUR_MAX <= 1 || force_singlebyte_mode)
607 +#endif
608 + {
609 + static char dummy[2];
610 + dummy[0] = delim;
611 + dummy[1] = '\0';
612 + output_delimiter_string = dummy;
613 + output_delimiter_length = 1;
614 + }
615 }
616
617 if (optind == argc)
618 --- coreutils-5.93/src/pr.c.i18n 2005-09-16 08:50:33.000000000 +0100
619 +++ coreutils-5.93/src/pr.c 2005-12-23 08:53:01.000000000 +0000
620 @@ -313,6 +313,32 @@
621
622 #include <getopt.h>
623 #include <sys/types.h>
624 +
625 +/* Get MB_LEN_MAX. */
626 +#include <limits.h>
627 +/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
628 + installation; work around this configuration error. */
629 +#if !defined MB_LEN_MAX || MB_LEN_MAX == 1
630 +# define MB_LEN_MAX 16
631 +#endif
632 +
633 +/* Get MB_CUR_MAX. */
634 +#include <stdlib.h>
635 +
636 +/* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>. */
637 +/* Get mbstate_t, mbrtowc(), wcwidth(). */
638 +#if HAVE_WCHAR_H
639 +# include <wchar.h>
640 +#endif
641 +
642 +/* Get iswprint(). -- for wcwidth(). */
643 +#if HAVE_WCTYPE_H
644 +# include <wctype.h>
645 +#endif
646 +#if !defined iswprint && !HAVE_ISWPRINT
647 +# define iswprint(wc) 1
648 +#endif
649 +
650 #include "system.h"
651 #include "error.h"
652 #include "hard-locale.h"
653 @@ -324,6 +350,18 @@
654 #include "strftime.h"
655 #include "xstrtol.h"
656
657 +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
658 +#if HAVE_MBRTOWC && defined mbstate_t
659 +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
660 +#endif
661 +
662 +#ifndef HAVE_DECL_WCWIDTH
663 +"this configure-time declaration test was not run"
664 +#endif
665 +#if !HAVE_DECL_WCWIDTH
666 +extern int wcwidth ();
667 +#endif
668 +
669 /* The official name of this program (e.g., no `g' prefix). */
670 #define PROGRAM_NAME "pr"
671
672 @@ -416,7 +454,20 @@
673
674 #define NULLCOL (COLUMN *)0
675
676 -static int char_to_clump (char c);
677 +/* Funtion pointers to switch functions for single byte locale or for
678 + multibyte locale. If multibyte functions do not exist in your sysytem,
679 + these pointers always point the function for single byte locale. */
680 +static void (*print_char) (char c);
681 +static int (*char_to_clump) (char c);
682 +
683 +/* Functions for single byte locale. */
684 +static void print_char_single (char c);
685 +static int char_to_clump_single (char c);
686 +
687 +/* Functions for multibyte locale. */
688 +static void print_char_multi (char c);
689 +static int char_to_clump_multi (char c);
690 +
691 static bool read_line (COLUMN *p);
692 static bool print_page (void);
693 static bool print_stored (COLUMN *p);
694 @@ -426,6 +477,7 @@
695 static void pad_across_to (int position);
696 static void add_line_number (COLUMN *p);
697 static void getoptarg (char *arg, char switch_char, char *character,
698 + int *character_length, int *character_width,
699 int *number);
700 void usage (int status);
701 static void print_files (int number_of_files, char **av);
702 @@ -440,7 +492,6 @@
703 static void pad_down (int lines);
704 static void read_rest_of_line (COLUMN *p);
705 static void skip_read (COLUMN *p, int column_number);
706 -static void print_char (char c);
707 static void cleanup (void);
708 static void print_sep_string (void);
709 static void separator_string (const char *optarg_S);
710 @@ -455,7 +506,7 @@
711 we store the leftmost columns contiguously in buff.
712 To print a line from buff, get the index of the first character
713 from line_vector[i], and print up to line_vector[i + 1]. */
714 -static char *buff;
715 +static unsigned char *buff;
716
717 /* Index of the position in buff where the next character
718 will be stored. */
719 @@ -559,7 +610,7 @@
720 static bool untabify_input = false;
721
722 /* (-e) The input tab character. */
723 -static char input_tab_char = '\t';
724 +static char input_tab_char[MB_LEN_MAX] = "\t";
725
726 /* (-e) Tabstops are at chars_per_tab, 2*chars_per_tab, 3*chars_per_tab, ...
727 where the leftmost column is 1. */
728 @@ -569,7 +620,10 @@
729 static bool tabify_output = false;
730
731 /* (-i) The output tab character. */
732 -static char output_tab_char = '\t';
733 +static char output_tab_char[MB_LEN_MAX] = "\t";
734 +
735 +/* (-i) The byte length of output tab character. */
736 +static int output_tab_char_length = 1;
737
738 /* (-i) The width of the output tab. */
739 static int chars_per_output_tab = 8;
740 @@ -643,7 +697,13 @@
741 static bool numbered_lines = false;
742
743 /* (-n) Character which follows each line number. */
744 -static char number_separator = '\t';
745 +static char number_separator[MB_LEN_MAX] = "\t";
746 +
747 +/* (-n) The byte length of the character which follows each line number. */
748 +static int number_separator_length = 1;
749 +
750 +/* (-n) The character width of the character which follows each line number. */
751 +static int number_separator_width = 0;
752
753 /* (-n) line counting starts with 1st line of input file (not with 1st
754 line of 1st page printed). */
755 @@ -696,6 +756,7 @@
756 -a|COLUMN|-m is a `space' and with the -J option a `tab'. */
757 static char *col_sep_string = "";
758 static int col_sep_length = 0;
759 +static int col_sep_width = 0;
760 static char *column_separator = " ";
761 static char *line_separator = "\t";
762
763 @@ -852,6 +913,13 @@
764 col_sep_length = (int) strlen (optarg_S);
765 col_sep_string = xmalloc (col_sep_length + 1);
766 strcpy (col_sep_string, optarg_S);
767 +
768 +#if HAVE_MBRTOWC
769 + if (MB_CUR_MAX > 1)
770 + col_sep_width = mbswidth (col_sep_string, 0);
771 + else
772 +#endif
773 + col_sep_width = col_sep_length;
774 }
775
776 int
777 @@ -877,6 +945,21 @@
778
779 atexit (close_stdout);
780
781 +/* Define which functions are used, the ones for single byte locale or the ones
782 + for multibyte locale. */
783 +#if HAVE_MBRTOWC
784 + if (MB_CUR_MAX > 1)
785 + {
786 + print_char = print_char_multi;
787 + char_to_clump = char_to_clump_multi;
788 + }
789 + else
790 +#endif
791 + {
792 + print_char = print_char_single;
793 + char_to_clump = char_to_clump_single;
794 + }
795 +
796 n_files = 0;
797 file_names = (argc > 1
798 ? xmalloc ((argc - 1) * sizeof (char *))
799 @@ -949,8 +1032,12 @@
800 break;
801 case 'e':
802 if (optarg)
803 - getoptarg (optarg, 'e', &input_tab_char,
804 - &chars_per_input_tab);
805 + {
806 + int dummy_length, dummy_width;
807 +
808 + getoptarg (optarg, 'e', input_tab_char, &dummy_length,
809 + &dummy_width, &chars_per_input_tab);
810 + }
811 /* Could check tab width > 0. */
812 untabify_input = true;
813 break;
814 @@ -963,8 +1050,12 @@
815 break;
816 case 'i':
817 if (optarg)
818 - getoptarg (optarg, 'i', &output_tab_char,
819 - &chars_per_output_tab);
820 + {
821 + int dummy_width;
822 +
823 + getoptarg (optarg, 'i', output_tab_char, &output_tab_char_length,
824 + &dummy_width, &chars_per_output_tab);
825 + }
826 /* Could check tab width > 0. */
827 tabify_output = true;
828 break;
829 @@ -991,8 +1082,8 @@
830 case 'n':
831 numbered_lines = true;
832 if (optarg)
833 - getoptarg (optarg, 'n', &number_separator,
834 - &chars_per_number);
835 + getoptarg (optarg, 'n', number_separator, &number_separator_length,
836 + &number_separator_width, &chars_per_number);
837 break;
838 case 'N':
839 skip_count = false;
840 @@ -1031,7 +1122,7 @@
841 old_s = false;
842 /* Reset an additional input of -s, -S dominates -s */
843 col_sep_string = "";
844 - col_sep_length = 0;
845 + col_sep_length = col_sep_width = 0;
846 use_col_separator = true;
847 if (optarg)
848 separator_string (optarg);
849 @@ -1188,10 +1279,45 @@
850 a number. */
851
852 static void
853 -getoptarg (char *arg, char switch_char, char *character, int *number)
854 +getoptarg (char *arg, char switch_char, char *character, int *character_length,
855 + int *character_width, int *number)
856 {
857 if (!ISDIGIT (*arg))
858 - *character = *arg++;
859 + {
860 +#ifdef HAVE_MBRTOWC
861 + if (MB_CUR_MAX > 1) /* for multibyte locale. */
862 + {
863 + wchar_t wc;
864 + size_t mblength;
865 + int width;
866 + mbstate_t state = {'\0'};
867 +
868 + mblength = mbrtowc (&wc, arg, strnlen(arg, MB_LEN_MAX), &state);
869 +
870 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
871 + {
872 + *character_length = 1;
873 + *character_width = 1;
874 + }
875 + else
876 + {
877 + *character_length = (mblength < 1) ? 1 : mblength;
878 + width = wcwidth (wc);
879 + *character_width = (width < 0) ? 0 : width;
880 + }
881 +
882 + strncpy (character, arg, *character_length);
883 + arg += *character_length;
884 + }
885 + else /* for single byte locale. */
886 +#endif
887 + {
888 + *character = *arg++;
889 + *character_length = 1;
890 + *character_width = 1;
891 + }
892 + }
893 +
894 if (*arg)
895 {
896 long int tmp_long;
897 @@ -1256,7 +1382,7 @@
898 else
899 col_sep_string = column_separator;
900
901 - col_sep_length = 1;
902 + col_sep_length = col_sep_width = 1;
903 use_col_separator = true;
904 }
905 /* It's rather pointless to define a TAB separator with column
906 @@ -1288,11 +1414,11 @@
907 TAB_WIDTH (chars_per_input_tab, chars_per_number); */
908
909 /* Estimate chars_per_text without any margin and keep it constant. */
910 - if (number_separator == '\t')
911 + if (number_separator[0] == '\t')
912 number_width = chars_per_number +
913 TAB_WIDTH (chars_per_default_tab, chars_per_number);
914 else
915 - number_width = chars_per_number + 1;
916 + number_width = chars_per_number + number_separator_width;
917
918 /* The number is part of the column width unless we are
919 printing files in parallel. */
920 @@ -1307,7 +1433,7 @@
921 }
922
923 chars_per_column = (chars_per_line - chars_used_by_number -
924 - (columns - 1) * col_sep_length) / columns;
925 + (columns - 1) * col_sep_width) / columns;
926
927 if (chars_per_column < 1)
928 error (EXIT_FAILURE, 0, _("page width too narrow"));
929 @@ -1432,7 +1558,7 @@
930
931 /* Enlarge p->start_position of first column to use the same form of
932 padding_not_printed with all columns. */
933 - h = h + col_sep_length;
934 + h = h + col_sep_width;
935
936 /* This loop takes care of all but the rightmost column. */
937
938 @@ -1466,7 +1592,7 @@
939 }
940 else
941 {
942 - h = h_next + col_sep_length;
943 + h = h_next + col_sep_width;
944 h_next = h + chars_per_column;
945 }
946 }
947 @@ -1756,9 +1882,9 @@
948 align_column (COLUMN *p)
949 {
950 padding_not_printed = p->start_position;
951 - if (padding_not_printed - col_sep_length > 0)
952 + if (padding_not_printed - col_sep_width > 0)
953 {
954 - pad_across_to (padding_not_printed - col_sep_length);
955 + pad_across_to (padding_not_printed - col_sep_width);
956 padding_not_printed = ANYWHERE;
957 }
958
959 @@ -2029,13 +2155,13 @@
960 /* May be too generous. */
961 buff = X2REALLOC (buff, &buff_allocated);
962 }
963 - buff[buff_current++] = c;
964 + buff[buff_current++] = (unsigned char) c;
965 }
966
967 static void
968 add_line_number (COLUMN *p)
969 {
970 - int i;
971 + int i, j;
972 char *s;
973 int left_cut;
974
975 @@ -2058,22 +2184,24 @@
976 /* Tabification is assumed for multiple columns, also for n-separators,
977 but `default n-separator = TAB' hasn't been given priority over
978 equal column_width also specified by POSIX. */
979 - if (number_separator == '\t')
980 + if (number_separator[0] == '\t')
981 {
982 i = number_width - chars_per_number;
983 while (i-- > 0)
984 (p->char_func) (' ');
985 }
986 else
987 - (p->char_func) (number_separator);
988 + for (j = 0; j < number_separator_length; j++)
989 + (p->char_func) (number_separator[j]);
990 }
991 else
992 /* To comply with POSIX, we avoid any expansion of default TAB
993 separator with a single column output. No column_width requirement
994 has to be considered. */
995 {
996 - (p->char_func) (number_separator);
997 - if (number_separator == '\t')
998 + for (j = 0; j < number_separator_length; j++)
999 + (p->char_func) (number_separator[j]);
1000 + if (number_separator[0] == '\t')
1001 output_position = POS_AFTER_TAB (chars_per_output_tab,
1002 output_position);
1003 }
1004 @@ -2234,7 +2362,7 @@
1005 while (goal - h_old > 1
1006 && (h_new = POS_AFTER_TAB (chars_per_output_tab, h_old)) <= goal)
1007 {
1008 - putchar (output_tab_char);
1009 + fwrite (output_tab_char, sizeof(char), output_tab_char_length, stdout);
1010 h_old = h_new;
1011 }
1012 while (++h_old <= goal)
1013 @@ -2254,6 +2382,7 @@
1014 {
1015 char *s;
1016 int l = col_sep_length;
1017 + int not_space_flag;
1018
1019 s = col_sep_string;
1020
1021 @@ -2267,6 +2396,7 @@
1022 {
1023 for (; separators_not_printed > 0; --separators_not_printed)
1024 {
1025 + not_space_flag = 0;
1026 while (l-- > 0)
1027 {
1028 /* 3 types of sep_strings: spaces only, spaces and chars,
1029 @@ -2280,12 +2410,15 @@
1030 }
1031 else
1032 {
1033 + not_space_flag = 1;
1034 if (spaces_not_printed > 0)
1035 print_white_space ();
1036 putchar (*s++);
1037 - ++output_position;
1038 }
1039 }
1040 + if (not_space_flag)
1041 + output_position += col_sep_width;
1042 +
1043 /* sep_string ends with some spaces */
1044 if (spaces_not_printed > 0)
1045 print_white_space ();
1046 @@ -2313,7 +2446,7 @@
1047 required number of tabs and spaces. */
1048
1049 static void
1050 -print_char (char c)
1051 +print_char_single (char c)
1052 {
1053 if (tabify_output)
1054 {
1055 @@ -2337,6 +2470,74 @@
1056 putchar (c);
1057 }
1058
1059 +#ifdef HAVE_MBRTOWC
1060 +static void
1061 +print_char_multi (char c)
1062 +{
1063 + static size_t mbc_pos = 0;
1064 + static unsigned char mbc[MB_LEN_MAX] = {'\0'};
1065 + static mbstate_t state = {'\0'};
1066 + mbstate_t state_bak;
1067 + wchar_t wc;
1068 + size_t mblength;
1069 + int width;
1070 +
1071 + if (tabify_output)
1072 + {
1073 + state_bak = state;
1074 + mbc[mbc_pos++] = (unsigned char)c;
1075 + mblength = mbrtowc (&wc, mbc, mbc_pos, &state);
1076 +
1077 + while (mbc_pos > 0)
1078 + {
1079 + switch (mblength)
1080 + {
1081 + case (size_t)-2:
1082 + state = state_bak;
1083 + return;
1084 +
1085 + case (size_t)-1:
1086 + state = state_bak;
1087 + ++output_position;
1088 + putchar (mbc[0]);
1089 + memmove (mbc, mbc + 1, MB_CUR_MAX - 1);
1090 + --mbc_pos;
1091 + break;
1092 +
1093 + case 0:
1094 + mblength = 1;
1095 +
1096 + default:
1097 + if (wc == L' ')
1098 + {
1099 + memmove (mbc, mbc + mblength, MB_CUR_MAX - mblength);
1100 + --mbc_pos;
1101 + ++spaces_not_printed;
1102 + return;
1103 + }
1104 + else if (spaces_not_printed > 0)
1105 + print_white_space ();
1106 +
1107 + /* Nonprintables are assumed to have width 0, except L'\b'. */
1108 + if ((width = wcwidth (wc)) < 1)
1109 + {
1110 + if (wc == L'\b')
1111 + --output_position;
1112 + }
1113 + else
1114 + output_position += width;
1115 +
1116 + fwrite (mbc, sizeof(char), mblength, stdout);
1117 + memmove (mbc, mbc + mblength, MB_CUR_MAX - mblength);
1118 + mbc_pos -= mblength;
1119 + }
1120 + }
1121 + return;
1122 + }
1123 + putchar (c);
1124 +}
1125 +#endif
1126 +
1127 /* Skip to page PAGE before printing.
1128 PAGE may be larger than total number of pages. */
1129
1130 @@ -2517,9 +2718,9 @@
1131 align_empty_cols = false;
1132 }
1133
1134 - if (padding_not_printed - col_sep_length > 0)
1135 + if (padding_not_printed - col_sep_width > 0)
1136 {
1137 - pad_across_to (padding_not_printed - col_sep_length);
1138 + pad_across_to (padding_not_printed - col_sep_width);
1139 padding_not_printed = ANYWHERE;
1140 }
1141
1142 @@ -2620,9 +2821,9 @@
1143 }
1144 }
1145
1146 - if (padding_not_printed - col_sep_length > 0)
1147 + if (padding_not_printed - col_sep_width > 0)
1148 {
1149 - pad_across_to (padding_not_printed - col_sep_length);
1150 + pad_across_to (padding_not_printed - col_sep_width);
1151 padding_not_printed = ANYWHERE;
1152 }
1153
1154 @@ -2635,8 +2836,8 @@
1155 if (spaces_not_printed == 0)
1156 {
1157 output_position = p->start_position + end_vector[line];
1158 - if (p->start_position - col_sep_length == chars_per_margin)
1159 - output_position -= col_sep_length;
1160 + if (p->start_position - col_sep_width == chars_per_margin)
1161 + output_position -= col_sep_width;
1162 }
1163
1164 return true;
1165 @@ -2655,7 +2856,7 @@
1166 number of characters is 1.) */
1167
1168 static int
1169 -char_to_clump (char c)
1170 +char_to_clump_single (char c)
1171 {
1172 unsigned char uc = c;
1173 char *s = clump_buff;
1174 @@ -2665,10 +2866,10 @@
1175 int chars;
1176 int chars_per_c = 8;
1177
1178 - if (c == input_tab_char)
1179 + if (c == input_tab_char[0])
1180 chars_per_c = chars_per_input_tab;
1181
1182 - if (c == input_tab_char || c == '\t')
1183 + if (c == input_tab_char[0] || c == '\t')
1184 {
1185 width = TAB_WIDTH (chars_per_c, input_position);
1186
1187 @@ -2739,6 +2940,154 @@
1188 return chars;
1189 }
1190
1191 +#ifdef HAVE_MBRTOWC
1192 +static int
1193 +char_to_clump_multi (char c)
1194 +{
1195 + static size_t mbc_pos = 0;
1196 + static char mbc[MB_LEN_MAX] = {'\0'};
1197 + static mbstate_t state = {'\0'};
1198 + mbstate_t state_bak;
1199 + wchar_t wc;
1200 + size_t mblength;
1201 + int wc_width;
1202 + register int *s = clump_buff;
1203 + register int i, j;
1204 + char esc_buff[4];
1205 + int width;
1206 + int chars;
1207 + int chars_per_c = 8;
1208 +
1209 + state_bak = state;
1210 + mbc[mbc_pos++] = c;
1211 + mblength = mbrtowc (&wc, mbc, mbc_pos, &state);
1212 +
1213 + width = 0;
1214 + chars = 0;
1215 + while (mbc_pos > 0)
1216 + {
1217 + switch (mblength)
1218 + {
1219 + case (size_t)-2:
1220 + state = state_bak;
1221 + return 0;
1222 +
1223 + case (size_t)-1:
1224 + state = state_bak;
1225 + mblength = 1;
1226 +
1227 + if (use_esc_sequence || use_cntrl_prefix)
1228 + {
1229 + width = +4;
1230 + chars = +4;
1231 + *s++ = '\\';
1232 + sprintf (esc_buff, "%03o", mbc[0]);
1233 + for (i = 0; i <= 2; ++i)
1234 + *s++ = (int) esc_buff[i];
1235 + }
1236 + else
1237 + {
1238 + width += 1;
1239 + chars += 1;
1240 + *s++ = mbc[0];
1241 + }
1242 + break;
1243 +
1244 + case 0:
1245 + mblength = 1;
1246 + /* Fall through */
1247 +
1248 + default:
1249 + if (memcmp (mbc, input_tab_char, mblength) == 0)
1250 + chars_per_c = chars_per_input_tab;
1251 +
1252 + if (memcmp (mbc, input_tab_char, mblength) == 0 || c == '\t')
1253 + {
1254 + int width_inc;
1255 +
1256 + width_inc = TAB_WIDTH (chars_per_c, input_position);
1257 + width += width_inc;
1258 +
1259 + if (untabify_input)
1260 + {
1261 + for (i = width_inc; i; --i)
1262 + *s++ = ' ';
1263 + chars += width_inc;
1264 + }
1265 + else
1266 + {
1267 + for (i = 0; i < mblength; i++)
1268 + *s++ = mbc[i];
1269 + chars += mblength;
1270 + }
1271 + }
1272 + else if ((wc_width = wcwidth (wc)) < 1)
1273 + {
1274 + if (use_esc_sequence)
1275 + {
1276 + for (i = 0; i < mblength; i++)
1277 + {
1278 + width += 4;
1279 + chars += 4;
1280 + *s++ = '\\';
1281 + sprintf (esc_buff, "%03o", c);
1282 + for (j = 0; j <= 2; ++j)
1283 + *s++ = (int) esc_buff[j];
1284 + }
1285 + }
1286 + else if (use_cntrl_prefix)
1287 + {
1288 + if (wc < 0200)
1289 + {
1290 + width += 2;
1291 + chars += 2;
1292 + *s++ = '^';
1293 + *s++ = wc ^ 0100;
1294 + }
1295 + else
1296 + {
1297 + for (i = 0; i < mblength; i++)
1298 + {
1299 + width += 4;
1300 + chars += 4;
1301 + *s++ = '\\';
1302 + sprintf (esc_buff, "%03o", c);
1303 + for (j = 0; j <= 2; ++j)
1304 + *s++ = (int) esc_buff[j];
1305 + }
1306 + }
1307 + }
1308 + else if (wc == L'\b')
1309 + {
1310 + width += -1;
1311 + chars += 1;
1312 + *s++ = c;
1313 + }
1314 + else
1315 + {
1316 + width += 0;
1317 + chars += mblength;
1318 + for (i = 0; i < mblength; i++)
1319 + *s++ = mbc[i];
1320 + }
1321 + }
1322 + else
1323 + {
1324 + width += wc_width;
1325 + chars += mblength;
1326 + for (i = 0; i < mblength; i++)
1327 + *s++ = mbc[i];
1328 + }
1329 + }
1330 + memmove (mbc, mbc + mblength, MB_CUR_MAX - mblength);
1331 + mbc_pos -= mblength;
1332 + }
1333 +
1334 + input_position += width;
1335 + return chars;
1336 +}
1337 +#endif
1338 +
1339 /* We've just printed some files and need to clean up things before
1340 looking for more options and printing the next batch of files.
1341
1342 --- coreutils-5.93/src/uniq.c.i18n 2005-07-05 07:32:54.000000000 +0100
1343 +++ coreutils-5.93/src/uniq.c 2005-12-23 08:53:01.000000000 +0000
1344 @@ -23,6 +23,16 @@
1345 #include <getopt.h>
1346 #include <sys/types.h>
1347
1348 +/* Get mbstate_t, mbrtowc(). */
1349 +#if HAVE_WCHAR_H
1350 +# include <wchar.h>
1351 +#endif
1352 +
1353 +/* Get isw* functions. */
1354 +#if HAVE_WCTYPE_H
1355 +# include <wctype.h>
1356 +#endif
1357 +
1358 #include "system.h"
1359 #include "argmatch.h"
1360 #include "linebuffer.h"
1361 @@ -32,7 +42,19 @@
1362 #include "quote.h"
1363 #include "xmemcoll.h"
1364 #include "xstrtol.h"
1365 -#include "memcasecmp.h"
1366 +#include "xmemcoll.h"
1367 +
1368 +/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
1369 + installation; work around this configuration error. */
1370 +#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
1371 +# define MB_LEN_MAX 16
1372 +#endif
1373 +
1374 +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
1375 +#if HAVE_MBRTOWC && defined mbstate_t
1376 +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
1377 +#endif
1378 +
1379
1380 /* The official name of this program (e.g., no `g' prefix). */
1381 #define PROGRAM_NAME "uniq"
1382 @@ -109,6 +131,10 @@
1383 /* Select whether/how to delimit groups of duplicate lines. */
1384 static enum delimit_method delimit_groups;
1385
1386 +/* Function pointers. */
1387 +static char *
1388 +(*find_field) (struct linebuffer *line);
1389 +
1390 static struct option const longopts[] =
1391 {
1392 {"count", no_argument, NULL, 'c'},
1393 @@ -189,7 +215,7 @@
1394 return a pointer to the beginning of the line's field to be compared. */
1395
1396 static char *
1397 -find_field (const struct linebuffer *line)
1398 +find_field_uni (struct linebuffer *line)
1399 {
1400 size_t count;
1401 char *lp = line->buffer;
1402 @@ -210,6 +236,83 @@
1403 return lp + i;
1404 }
1405
1406 +#if HAVE_MBRTOWC
1407 +
1408 +# define MBCHAR_TO_WCHAR(WC, MBLENGTH, LP, POS, SIZE, STATEP, CONVFAIL) \
1409 + do \
1410 + { \
1411 + mbstate_t state_bak; \
1412 + \
1413 + CONVFAIL = 0; \
1414 + state_bak = *STATEP; \
1415 + \
1416 + MBLENGTH = mbrtowc (&WC, LP + POS, SIZE - POS, STATEP); \
1417 + \
1418 + switch (MBLENGTH) \
1419 + { \
1420 + case (size_t)-2: \
1421 + case (size_t)-1: \
1422 + *STATEP = state_bak; \
1423 + CONVFAIL++; \
1424 + /* Fall through */ \
1425 + case 0: \
1426 + MBLENGTH = 1; \
1427 + } \
1428 + } \
1429 + while (0)
1430 +
1431 +static char *
1432 +find_field_multi (struct linebuffer *line)
1433 +{
1434 + size_t count;
1435 + char *lp = line->buffer;
1436 + size_t size = line->length - 1;
1437 + size_t pos;
1438 + size_t mblength;
1439 + wchar_t wc;
1440 + mbstate_t *statep;
1441 + int convfail;
1442 +
1443 + pos = 0;
1444 + statep = &(line->state);
1445 +
1446 + /* skip fields. */
1447 + for (count = 0; count < skip_fields && pos < size; count++)
1448 + {
1449 + while (pos < size)
1450 + {
1451 + MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail);
1452 +
1453 + if (convfail || !iswblank (wc))
1454 + {
1455 + pos += mblength;
1456 + break;
1457 + }
1458 + pos += mblength;
1459 + }
1460 +
1461 + while (pos < size)
1462 + {
1463 + MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail);
1464 +
1465 + if (!convfail && iswblank (wc))
1466 + break;
1467 +
1468 + pos += mblength;
1469 + }
1470 + }
1471 +
1472 + /* skip fields. */
1473 + for (count = 0; count < skip_chars && pos < size; count++)
1474 + {
1475 + MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail);
1476 + pos += mblength;
1477 + }
1478 +
1479 + return lp + pos;
1480 +}
1481 +#endif
1482 +
1483 /* Return false if two strings OLD and NEW match, true if not.
1484 OLD and NEW point not to the beginnings of the lines
1485 but rather to the beginnings of the fields to compare.
1486 @@ -218,6 +321,8 @@
1487 static bool
1488 different (char *old, char *new, size_t oldlen, size_t newlen)
1489 {
1490 + char *copy_old, *copy_new;
1491 +
1492 if (check_chars < oldlen)
1493 oldlen = check_chars;
1494 if (check_chars < newlen)
1495 @@ -225,14 +330,92 @@
1496
1497 if (ignore_case)
1498 {
1499 - /* FIXME: This should invoke strcoll somehow. */
1500 - return oldlen != newlen || memcasecmp (old, new, oldlen);
1501 + size_t i;
1502 +
1503 + copy_old = alloca (oldlen + 1);
1504 + copy_new = alloca (oldlen + 1);
1505 +
1506 + for (i = 0; i < oldlen; i++)
1507 + {
1508 + copy_old[i] = toupper (old[i]);
1509 + copy_new[i] = toupper (new[i]);
1510 + }
1511 }
1512 - else if (hard_LC_COLLATE)
1513 - return xmemcoll (old, oldlen, new, newlen) != 0;
1514 else
1515 - return oldlen != newlen || memcmp (old, new, oldlen);
1516 + {
1517 + copy_old = (char *)old;
1518 + copy_new = (char *)new;
1519 + }
1520 +
1521 + return xmemcoll (copy_old, oldlen, copy_new, newlen);
1522 +}
1523 +
1524 +#if HAVE_MBRTOWC
1525 +static int
1526 +different_multi (const char *old, const char *new, size_t oldlen, size_t newlen, mbstate_t oldstate, mbstate_t newstate)
1527 +{
1528 + size_t i, j, chars;
1529 + const char *str[2];
1530 + char *copy[2];
1531 + size_t len[2];
1532 + mbstate_t state[2];
1533 + size_t mblength;
1534 + wchar_t wc, uwc;
1535 + mbstate_t state_bak;
1536 +
1537 + str[0] = old;
1538 + str[1] = new;
1539 + len[0] = oldlen;
1540 + len[1] = newlen;
1541 + state[0] = oldstate;
1542 + state[1] = newstate;
1543 +
1544 + for (i = 0; i < 2; i++)
1545 + {
1546 + copy[i] = alloca (len[i] + 1);
1547 +
1548 + for (j = 0, chars = 0; j < len[i] && chars < check_chars; chars++)
1549 + {
1550 + state_bak = state[i];
1551 + mblength = mbrtowc (&wc, str[i] + j, len[i] - j, &(state[i]));
1552 +
1553 + switch (mblength)
1554 + {
1555 + case (size_t)-1:
1556 + case (size_t)-2:
1557 + state[i] = state_bak;
1558 + /* Fall through */
1559 + case 0:
1560 + mblength = 1;
1561 + break;
1562 +
1563 + default:
1564 + if (ignore_case)
1565 + {
1566 + uwc = towupper (wc);
1567 +
1568 + if (uwc != wc)
1569 + {
1570 + mbstate_t state_wc;
1571 +
1572 + memset (&state_wc, '\0', sizeof(mbstate_t));
1573 + wcrtomb (copy[i] + j, uwc, &state_wc);
1574 + }
1575 + else
1576 + memcpy (copy[i] + j, str[i] + j, mblength);
1577 + }
1578 + else
1579 + memcpy (copy[i] + j, str[i] + j, mblength);
1580 + }
1581 + j += mblength;
1582 + }
1583 + copy[i][j] = '\0';
1584 + len[i] = j;
1585 + }
1586 +
1587 + return xmemcoll (copy[0], len[0], copy[1], len[1]);
1588 }
1589 +#endif
1590
1591 /* Output the line in linebuffer LINE to standard output
1592 provided that the switches say it should be output.
1593 @@ -286,15 +469,43 @@
1594 {
1595 char *prevfield IF_LINT (= NULL);
1596 size_t prevlen IF_LINT (= 0);
1597 +#if HAVE_MBRTOWC
1598 + mbstate_t prevstate;
1599 +
1600 + memset (&prevstate, '\0', sizeof (mbstate_t));
1601 +#endif
1602
1603 while (!feof (stdin))
1604 {
1605 char *thisfield;
1606 size_t thislen;
1607 +#if HAVE_MBRTOWC
1608 + mbstate_t thisstate;
1609 +#endif
1610 +
1611 if (readlinebuffer (thisline, stdin) == 0)
1612 break;
1613 thisfield = find_field (thisline);
1614 thislen = thisline->length - 1 - (thisfield - thisline->buffer);
1615 +#if HAVE_MBRTOWC
1616 + if (MB_CUR_MAX > 1)
1617 + {
1618 + thisstate = thisline->state;
1619 +
1620 + if (prevline->length == 0 || different_multi
1621 + (thisfield, prevfield, thislen, prevlen, thisstate, prevstate))
1622 + {
1623 + fwrite (thisline->buffer, sizeof (char),
1624 + thisline->length, stdout);
1625 +
1626 + SWAP_LINES (prevline, thisline);
1627 + prevfield = thisfield;
1628 + prevlen = thislen;
1629 + prevstate = thisstate;
1630 + }
1631 + }
1632 + else
1633 +#endif
1634 if (prevline->length == 0
1635 || different (thisfield, prevfield, thislen, prevlen))
1636 {
1637 @@ -313,17 +524,26 @@
1638 size_t prevlen;
1639 uintmax_t match_count = 0;
1640 bool first_delimiter = true;
1641 +#if HAVE_MBRTOWC
1642 + mbstate_t prevstate;
1643 +#endif
1644
1645 if (readlinebuffer (prevline, stdin) == 0)
1646 goto closefiles;
1647 prevfield = find_field (prevline);
1648 prevlen = prevline->length - 1 - (prevfield - prevline->buffer);
1649 +#if HAVE_MBRTOWC
1650 + prevstate = prevline->state;
1651 +#endif
1652
1653 while (!feof (stdin))
1654 {
1655 bool match;
1656 char *thisfield;
1657 size_t thislen;
1658 +#if HAVE_MBRTOWC
1659 + mbstate_t thisstate;
1660 +#endif
1661 if (readlinebuffer (thisline, stdin) == 0)
1662 {
1663 if (ferror (stdin))
1664 @@ -332,6 +552,15 @@
1665 }
1666 thisfield = find_field (thisline);
1667 thislen = thisline->length - 1 - (thisfield - thisline->buffer);
1668 +#if HAVE_MBRTOWC
1669 + if (MB_CUR_MAX > 1)
1670 + {
1671 + thisstate = thisline->state;
1672 + match = !different_multi (thisfield, prevfield,
1673 + thislen, prevlen, thisstate, prevstate);
1674 + }
1675 + else
1676 +#endif
1677 match = !different (thisfield, prevfield, thislen, prevlen);
1678 match_count += match;
1679
1680 @@ -364,6 +593,9 @@
1681 SWAP_LINES (prevline, thisline);
1682 prevfield = thisfield;
1683 prevlen = thislen;
1684 +#if HAVE_MBRTOWC
1685 + prevstate = thisstate;
1686 +#endif
1687 if (!match)
1688 match_count = 0;
1689 }
1690 @@ -408,6 +640,19 @@
1691
1692 atexit (close_stdout);
1693
1694 +#if HAVE_MBRTOWC
1695 + if (MB_CUR_MAX > 1)
1696 + {
1697 + find_field = find_field_multi;
1698 + }
1699 + else
1700 +#endif
1701 + {
1702 + find_field = find_field_uni;
1703 + }
1704 +
1705 +
1706 +
1707 skip_chars = 0;
1708 skip_fields = 0;
1709 check_chars = SIZE_MAX;
1710 --- coreutils-5.93/src/expand.c.i18n 2005-08-12 08:16:25.000000000 +0100
1711 +++ coreutils-5.93/src/expand.c 2005-12-23 08:53:01.000000000 +0000
1712 @@ -38,11 +38,28 @@
1713 #include <stdio.h>
1714 #include <getopt.h>
1715 #include <sys/types.h>
1716 +
1717 +/* Get mbstate_t, mbrtowc(), wcwidth(). */
1718 +#if HAVE_WCHAR_H
1719 +# include <wchar.h>
1720 +#endif
1721 +
1722 #include "system.h"
1723 #include "error.h"
1724 #include "quote.h"
1725 #include "xstrndup.h"
1726
1727 +/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
1728 + installation; work around this configuration error. */
1729 +#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
1730 +# define MB_LEN_MAX 16
1731 +#endif
1732 +
1733 +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
1734 +#if HAVE_MBRTOWC && defined mbstate_t
1735 +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
1736 +#endif
1737 +
1738 /* The official name of this program (e.g., no `g' prefix). */
1739 #define PROGRAM_NAME "expand"
1740
1741 @@ -364,6 +382,142 @@
1742 }
1743 }
1744
1745 +#if HAVE_MBRTOWC
1746 +static void
1747 +expand_multibyte (void)
1748 +{
1749 + FILE *fp; /* Input strem. */
1750 + mbstate_t i_state; /* Current shift state of the input stream. */
1751 + mbstate_t i_state_bak; /* Back up the I_STATE. */
1752 + mbstate_t o_state; /* Current shift state of the output stream. */
1753 + char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
1754 + char *bufpos; /* Next read position of BUF. */
1755 + size_t buflen = 0; /* The length of the byte sequence in buf. */
1756 + wchar_t wc; /* A gotten wide character. */
1757 + size_t mblength; /* The byte size of a multibyte character
1758 + which shows as same character as WC. */
1759 + int tab_index = 0; /* Index in `tab_list' of next tabstop. */
1760 + int column = 0; /* Column on screen of the next char. */
1761 + int next_tab_column; /* Column the next tab stop is on. */
1762 + int convert = 1; /* If nonzero, perform translations. */
1763 +
1764 + fp = next_file ((FILE *) NULL);
1765 + if (fp == NULL)
1766 + return;
1767 +
1768 + memset (&o_state, '\0', sizeof(mbstate_t));
1769 + memset (&i_state, '\0', sizeof(mbstate_t));
1770 +
1771 + for (;;)
1772 + {
1773 + /* Refill the buffer BUF. */
1774 + if (buflen < MB_LEN_MAX && !feof(fp) && !ferror(fp))
1775 + {
1776 + memmove (buf, bufpos, buflen);
1777 + buflen += fread (buf + buflen, sizeof(char), BUFSIZ, fp);
1778 + bufpos = buf;
1779 + }
1780 +
1781 + /* No character is left in BUF. */
1782 + if (buflen < 1)
1783 + {
1784 + fp = next_file (fp);
1785 +
1786 + if (fp == NULL)
1787 + break; /* No more files. */
1788 + else
1789 + {
1790 + memset (&i_state, '\0', sizeof(mbstate_t));
1791 + continue;
1792 + }
1793 + }
1794 +
1795 + /* Get a wide character. */
1796 + i_state_bak = i_state;
1797 + mblength = mbrtowc (&wc, bufpos, buflen, &i_state);
1798 +
1799 + switch (mblength)
1800 + {
1801 + case (size_t)-1: /* illegal byte sequence. */
1802 + case (size_t)-2:
1803 + mblength = 1;
1804 + i_state = i_state_bak;
1805 + if (convert)
1806 + {
1807 + ++column;
1808 + if (convert_entire_line == 0)
1809 + convert = 0;
1810 + }
1811 + putchar (*bufpos);
1812 + break;
1813 +
1814 + case 0: /* null. */
1815 + mblength = 1;
1816 + if (convert && convert_entire_line == 0)
1817 + convert = 0;
1818 + putchar ('\0');
1819 + break;
1820 +
1821 + default:
1822 + if (wc == L'\n') /* LF. */
1823 + {
1824 + tab_index = 0;
1825 + column = 0;
1826 + convert = 1;
1827 + putchar ('\n');
1828 + }
1829 + else if (wc == L'\t' && convert) /* Tab. */
1830 + {
1831 + if (tab_size == 0)
1832 + {
1833 + /* Do not let tab_index == first_free_tab;
1834 + stop when it is 1 less. */
1835 + while (tab_index < first_free_tab - 1
1836 + && column >= tab_list[tab_index])
1837 + tab_index++;
1838 + next_tab_column = tab_list[tab_index];
1839 + if (tab_index < first_free_tab - 1)
1840 + tab_index++;
1841 + if (column >= next_tab_column)
1842 + next_tab_column = column + 1;
1843 + }
1844 + else
1845 + next_tab_column = column + tab_size - column % tab_size;
1846 +
1847 + while (column < next_tab_column)
1848 + {
1849 + putchar (' ');
1850 + ++column;
1851 + }
1852 + }
1853 + else /* Others. */
1854 + {
1855 + if (convert)
1856 + {
1857 + if (wc == L'\b')
1858 + {
1859 + if (column > 0)
1860 + --column;
1861 + }
1862 + else
1863 + {
1864 + int width; /* The width of WC. */
1865 +
1866 + width = wcwidth (wc);
1867 + column += (width > 0) ? width : 0;
1868 + if (convert_entire_line == 0)
1869 + convert = 0;
1870 + }
1871 + }
1872 + fwrite (bufpos, sizeof(char), mblength, stdout);
1873 + }
1874 + }
1875 + buflen -= mblength;
1876 + bufpos += mblength;
1877 + }
1878 +}
1879 +#endif
1880 +
1881 int
1882 main (int argc, char **argv)
1883 {
1884 @@ -428,7 +582,12 @@
1885
1886 file_list = (optind < argc ? &argv[optind] : stdin_argv);
1887
1888 - expand ();
1889 +#if HAVE_MBRTOWC
1890 + if (MB_CUR_MAX > 1)
1891 + expand_multibyte ();
1892 + else
1893 +#endif
1894 + expand ();
1895
1896 if (have_read_stdin && fclose (stdin) != 0)
1897 error (EXIT_FAILURE, errno, "-");
1898 --- coreutils-5.93/src/fold.c.i18n 2005-08-12 08:29:38.000000000 +0100
1899 +++ coreutils-5.93/src/fold.c 2005-12-23 08:53:01.000000000 +0000
1900 @@ -23,11 +23,33 @@
1901 #include <getopt.h>
1902 #include <sys/types.h>
1903
1904 +/* Get mbstate_t, mbrtowc(), wcwidth(). */
1905 +#if HAVE_WCHAR_H
1906 +# include <wchar.h>
1907 +#endif
1908 +
1909 +/* Get iswprint(), iswblank(), wcwidth(). */
1910 +#if HAVE_WCTYPE_H
1911 +# include <wctype.h>
1912 +#endif
1913 +
1914 #include "system.h"
1915 #include "error.h"
1916 #include "quote.h"
1917 #include "xstrtol.h"
1918
1919 +/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
1920 + installation; work around this configuration error. */
1921 +#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
1922 +# undef MB_LEN_MAX
1923 +# define MB_LEN_MAX 16
1924 +#endif
1925 +
1926 +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
1927 +#if HAVE_MBRTOWC && defined mbstate_t
1928 +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
1929 +#endif
1930 +
1931 #define TAB_WIDTH 8
1932
1933 /* The official name of this program (e.g., no `g' prefix). */
1934 @@ -35,23 +57,44 @@
1935
1936 #define AUTHORS "David MacKenzie"
1937
1938 +#define FATAL_ERROR(Message) \
1939 + do \
1940 + { \
1941 + error (0, 0, (Message)); \
1942 + usage (2); \
1943 + } \
1944 + while (0)
1945 +
1946 +enum operating_mode
1947 +{
1948 + /* Fold texts by columns that are at the given positions. */
1949 + column_mode,
1950 +
1951 + /* Fold texts by bytes that are at the given positions. */
1952 + byte_mode,
1953 +
1954 + /* Fold texts by characters that are at the given positions. */
1955 + character_mode,
1956 +};
1957 +
1958 /* The name this program was run with. */
1959 char *program_name;
1960
1961 +/* The argument shows current mode. (Default: column_mode) */
1962 +static enum operating_mode operating_mode;
1963 +
1964 /* If nonzero, try to break on whitespace. */
1965 static bool break_spaces;
1966
1967 -/* If nonzero, count bytes, not column positions. */
1968 -static bool count_bytes;
1969 -
1970 /* If nonzero, at least one of the files we read was standard input. */
1971 static bool have_read_stdin;
1972
1973 -static char const shortopts[] = "bsw:0::1::2::3::4::5::6::7::8::9::";
1974 +static char const shortopts[] = "bcsw:0::1::2::3::4::5::6::7::8::9::";
1975
1976 static struct option const longopts[] =
1977 {
1978 {"bytes", no_argument, NULL, 'b'},
1979 + {"characters", no_argument, NULL, 'c'},
1980 {"spaces", no_argument, NULL, 's'},
1981 {"width", required_argument, NULL, 'w'},
1982 {GETOPT_HELP_OPTION_DECL},
1983 @@ -81,6 +124,7 @@
1984 "), stdout);
1985 fputs (_("\
1986 -b, --bytes count bytes rather than columns\n\
1987 + -c, --characters count characters rather than columns\n\
1988 -s, --spaces break at spaces\n\
1989 -w, --width=WIDTH use WIDTH columns instead of 80\n\
1990 "), stdout);
1991 @@ -98,7 +142,7 @@
1992 static size_t
1993 adjust_column (size_t column, char c)
1994 {
1995 - if (!count_bytes)
1996 + if (operating_mode != byte_mode)
1997 {
1998 if (c == '\b')
1999 {
2000 @@ -117,35 +161,14 @@
2001 return column;
2002 }
2003
2004 -/* Fold file FILENAME, or standard input if FILENAME is "-",
2005 - to stdout, with maximum line length WIDTH.
2006 - Return true if successful. */
2007 -
2008 -static bool
2009 -fold_file (char *filename, size_t width)
2010 +static void
2011 +fold_text (FILE *istream, size_t width, int *saved_errno)
2012 {
2013 - FILE *istream;
2014 int c;
2015 size_t column = 0; /* Screen column where next char will go. */
2016 size_t offset_out = 0; /* Index in `line_out' for next char. */
2017 static char *line_out = NULL;
2018 static size_t allocated_out = 0;
2019 - int saved_errno;
2020 -
2021 - if (STREQ (filename, "-"))
2022 - {
2023 - istream = stdin;
2024 - have_read_stdin = true;
2025 - }
2026 - else
2027 - istream = fopen (filename, "r");
2028 -
2029 - if (istream == NULL)
2030 - {
2031 - error (0, errno, "%s", filename);
2032 - return false;
2033 - }
2034 -
2035 while ((c = getc (istream)) != EOF)
2036 {
2037 if (offset_out + 1 >= allocated_out)
2038 @@ -172,6 +195,15 @@
2039 bool found_blank = false;
2040 size_t logical_end = offset_out;
2041
2042 + /* If LINE_OUT has no wide character,
2043 + put a new wide character in LINE_OUT
2044 + if column is bigger than width. */
2045 + if (offset_out == 0)
2046 + {
2047 + line_out[offset_out++] = c;
2048 + continue;
2049 + }
2050 +
2051 /* Look for the last blank. */
2052 while (logical_end)
2053 {
2054 @@ -218,11 +250,225 @@
2055 line_out[offset_out++] = c;
2056 }
2057
2058 - saved_errno = errno;
2059 + *saved_errno = errno;
2060 +
2061 + if (offset_out)
2062 + fwrite (line_out, sizeof (char), (size_t) offset_out, stdout);
2063 +
2064 + free(line_out);
2065 +}
2066 +
2067 +#if HAVE_MBRTOWC
2068 +static void
2069 +fold_multibyte_text (FILE *istream, int width, int *saved_errno)
2070 +{
2071 + char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
2072 + size_t buflen = 0; /* The length of the byte sequence in buf. */
2073 + char *bufpos; /* Next read position of BUF. */
2074 + wint_t wc; /* A gotten wide character. */
2075 + size_t mblength; /* The byte size of a multibyte character which shows
2076 + as same character as WC. */
2077 + mbstate_t state, state_bak; /* State of the stream. */
2078 + int convfail; /* 1, when conversion is failed. Otherwise 0. */
2079 +
2080 + char *line_out = NULL;
2081 + size_t offset_out = 0; /* Index in `line_out' for next char. */
2082 + size_t allocated_out = 0;
2083 +
2084 + int increment;
2085 + size_t column = 0;
2086 +
2087 + size_t last_blank_pos;
2088 + size_t last_blank_column;
2089 + int is_blank_seen;
2090 + int last_blank_increment;
2091 + int is_bs_following_last_blank;
2092 + size_t bs_following_last_blank_num;
2093 + int is_cr_after_last_blank;
2094 +
2095 +#define CLEAR_FLAGS \
2096 + do \
2097 + { \
2098 + last_blank_pos = 0; \
2099 + last_blank_column = 0; \
2100 + is_blank_seen = 0; \
2101 + is_bs_following_last_blank = 0; \
2102 + bs_following_last_blank_num = 0; \
2103 + is_cr_after_last_blank = 0; \
2104 + } \
2105 + while (0)
2106 +
2107 +#define START_NEW_LINE \
2108 + do \
2109 + { \
2110 + putchar ('\n'); \
2111 + column = 0; \
2112 + offset_out = 0; \
2113 + CLEAR_FLAGS; \
2114 + } \
2115 + while (0)
2116 +
2117 + CLEAR_FLAGS;
2118 + memset (&state, '\0', sizeof(mbstate_t));
2119 +
2120 + for (;; bufpos += mblength, buflen -= mblength)
2121 + {
2122 + if (buflen < MB_LEN_MAX && !feof (istream) && !ferror (istream))
2123 + {
2124 + memmove (buf, bufpos, buflen);
2125 + buflen += fread (buf + buflen, sizeof(char), BUFSIZ, istream);
2126 + bufpos = buf;
2127 + }
2128 +
2129 + if (buflen < 1)
2130 + break;
2131 +
2132 + /* Get a wide character. */
2133 + convfail = 0;
2134 + state_bak = state;
2135 + mblength = mbrtowc ((wchar_t *)&wc, bufpos, buflen, &state);
2136 +
2137 + switch (mblength)
2138 + {
2139 + case (size_t)-1:
2140 + case (size_t)-2:
2141 + convfail++;
2142 + state = state_bak;
2143 + /* Fall through. */
2144 +
2145 + case 0:
2146 + mblength = 1;
2147 + break;
2148 + }
2149 +
2150 +rescan:
2151 + if (operating_mode == byte_mode) /* byte mode */
2152 + increment = mblength;
2153 + else if (operating_mode == character_mode) /* character mode */
2154 + increment = 1;
2155 + else /* column mode */
2156 + {
2157 + if (convfail)
2158 + increment = 1;
2159 + else
2160 + {
2161 + switch (wc)
2162 + {
2163 + case L'\n':
2164 + fwrite (line_out, sizeof(char), offset_out, stdout);
2165 + START_NEW_LINE;
2166 + continue;
2167 +
2168 + case L'\b':
2169 + increment = (column > 0) ? -1 : 0;
2170 + break;
2171 +
2172 + case L'\r':
2173 + increment = -1 * column;
2174 + break;
2175 +
2176 + case L'\t':
2177 + increment = 8 - column % 8;
2178 + break;
2179 +
2180 + default:
2181 + increment = wcwidth (wc);
2182 + increment = (increment < 0) ? 0 : increment;
2183 + }
2184 + }
2185 + }
2186 +
2187 + if (column + increment > width && break_spaces && last_blank_pos)
2188 + {
2189 + fwrite (line_out, sizeof(char), last_blank_pos, stdout);
2190 + putchar ('\n');
2191 +
2192 + offset_out = offset_out - last_blank_pos;
2193 + column = column - last_blank_column + ((is_cr_after_last_blank)
2194 + ? last_blank_increment : bs_following_last_blank_num);
2195 + memmove (line_out, line_out + last_blank_pos, offset_out);
2196 + CLEAR_FLAGS;
2197 + goto rescan;
2198 + }
2199 +
2200 + if (column + increment > width && column != 0)
2201 + {
2202 + fwrite (line_out, sizeof(char), offset_out, stdout);
2203 + START_NEW_LINE;
2204 + goto rescan;
2205 + }
2206 +
2207 + if (allocated_out < offset_out + mblength)
2208 + {
2209 + allocated_out += 1024;
2210 + line_out = xrealloc (line_out, allocated_out);
2211 + }
2212 +
2213 + memcpy (line_out + offset_out, bufpos, mblength);
2214 + offset_out += mblength;
2215 + column += increment;
2216 +
2217 + if (is_blank_seen && !convfail && wc == L'\r')
2218 + is_cr_after_last_blank = 1;
2219 +
2220 + if (is_bs_following_last_blank && !convfail && wc == L'\b')
2221 + ++bs_following_last_blank_num;
2222 + else
2223 + is_bs_following_last_blank = 0;
2224 +
2225 + if (break_spaces && !convfail && iswblank (wc))
2226 + {
2227 + last_blank_pos = offset_out;
2228 + last_blank_column = column;
2229 + is_blank_seen = 1;
2230 + last_blank_increment = increment;
2231 + is_bs_following_last_blank = 1;
2232 + bs_following_last_blank_num = 0;
2233 + is_cr_after_last_blank = 0;
2234 + }
2235 + }
2236 +
2237 + *saved_errno = errno;
2238
2239 if (offset_out)
2240 fwrite (line_out, sizeof (char), (size_t) offset_out, stdout);
2241
2242 + free(line_out);
2243 +}
2244 +#endif
2245 +
2246 +/* Fold file FILENAME, or standard input if FILENAME is "-",
2247 + to stdout, with maximum line length WIDTH.
2248 + Return 0 if successful, 1 if an error occurs. */
2249 +
2250 +static int
2251 +fold_file (char *filename, int width)
2252 +{
2253 + FILE *istream;
2254 + int saved_errno;
2255 +
2256 + if (STREQ (filename, "-"))
2257 + {
2258 + istream = stdin;
2259 + have_read_stdin = 1;
2260 + }
2261 + else
2262 + istream = fopen (filename, "r");
2263 +
2264 + if (istream == NULL)
2265 + {
2266 + error (0, errno, "%s", filename);
2267 + return 1;
2268 + }
2269 +
2270 + /* Define how ISTREAM is being folded. */
2271 +#if HAVE_MBRTOWC
2272 + if (MB_CUR_MAX > 1)
2273 + fold_multibyte_text (istream, width, &saved_errno);
2274 + else
2275 +#endif
2276 + fold_text (istream, width, &saved_errno);
2277 +
2278 if (ferror (istream))
2279 {
2280 error (0, saved_errno, "%s", filename);
2281 @@ -255,7 +501,8 @@
2282
2283 atexit (close_stdout);
2284
2285 - break_spaces = count_bytes = have_read_stdin = false;
2286 + operating_mode = column_mode;
2287 + break_spaces = have_read_stdin = false;
2288
2289 while ((optc = getopt_long (argc, argv, shortopts, longopts, NULL)) != -1)
2290 {
2291 @@ -264,7 +511,15 @@
2292 switch (optc)
2293 {
2294 case 'b': /* Count bytes rather than columns. */
2295 - count_bytes = true;
2296 + if (operating_mode != column_mode)
2297 + FATAL_ERROR (_("only one way of folding may be specified"));
2298 + operating_mode = byte_mode;
2299 + break;
2300 +
2301 + case 'c':
2302 + if (operating_mode != column_mode)
2303 + FATAL_ERROR (_("only one way of folding may be specified"));
2304 + operating_mode = character_mode;
2305 break;
2306
2307 case 's': /* Break at word boundaries. */
2308 --- coreutils-5.93/src/join.c.i18n 2005-08-12 08:16:25.000000000 +0100
2309 +++ coreutils-5.93/src/join.c 2005-12-23 08:53:01.000000000 +0000
2310 @@ -23,16 +23,30 @@
2311 #include <sys/types.h>
2312 #include <getopt.h>
2313
2314 +/* Get mbstate_t, mbrtowc(), mbrtowc(), wcwidth(). */
2315 +#if HAVE_WCHAR_H
2316 +# include <wchar.h>
2317 +#endif
2318 +
2319 +/* Get iswblank(), towupper. */
2320 +#if HAVE_WCTYPE_H
2321 +# include <wctype.h>
2322 +#endif
2323 +
2324 #include "system.h"
2325 #include "error.h"
2326 #include "hard-locale.h"
2327 #include "linebuffer.h"
2328 -#include "memcasecmp.h"
2329 #include "quote.h"
2330 #include "stdio--.h"
2331 #include "xmemcoll.h"
2332 #include "xstrtol.h"
2333
2334 +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
2335 +#if HAVE_MBRTOWC && defined mbstate_t
2336 +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
2337 +#endif
2338 +
2339 /* The official name of this program (e.g., no `g' prefix). */
2340 #define PROGRAM_NAME "join"
2341
2342 @@ -104,10 +118,12 @@
2343 /* Last element in `outlist', where a new element can be added. */
2344 static struct outlist *outlist_end = &outlist_head;
2345
2346 -/* Tab character separating fields. If negative, fields are separated
2347 - by any nonempty string of blanks, otherwise by exactly one
2348 - tab character whose value (when cast to unsigned char) equals TAB. */
2349 -static int tab = -1;
2350 +/* Tab character separating fields. If NULL, fields are separated
2351 + by any nonempty string of blanks. */
2352 +static char *tab = NULL;
2353 +
2354 +/* The number of bytes used for tab. */
2355 +static size_t tablen = 0;
2356
2357 static struct option const longopts[] =
2358 {
2359 @@ -197,6 +213,8 @@
2360
2361 /* Fill in the `fields' structure in LINE. */
2362
2363 +/* Fill in the `fields' structure in LINE. */
2364 +
2365 static void
2366 xfields (struct line *line)
2367 {
2368 @@ -206,10 +224,11 @@
2369 if (ptr == lim)
2370 return;
2371
2372 - if (0 <= tab)
2373 + if (tab != NULL)
2374 {
2375 + unsigned char t = tab[0];
2376 char *sep;
2377 - for (; (sep = memchr (ptr, tab, lim - ptr)) != NULL; ptr = sep + 1)
2378 + for (; (sep = memchr (ptr, t, lim - ptr)) != NULL; ptr = sep + 1)
2379 extract_field (line, ptr, sep - ptr);
2380 }
2381 else
2382 @@ -236,6 +255,148 @@
2383 extract_field (line, ptr, lim - ptr);
2384 }
2385
2386 +#if HAVE_MBRTOWC
2387 +static void
2388 +xfields_multibyte (struct line *line)
2389 +{
2390 + char *ptr = line->buf.buffer;
2391 + char const *lim = ptr + line->buf.length - 1;
2392 + wchar_t wc = 0;
2393 + size_t mblength = 1;
2394 + mbstate_t state, state_bak;
2395 +
2396 + memset (&state, 0, sizeof (mbstate_t));
2397 +
2398 + if (ptr == lim)
2399 + return;
2400 +
2401 + if (tab != NULL)
2402 + {
2403 + unsigned char t = tab[0];
2404 + char *sep = ptr;
2405 + for (; ptr < lim; ptr = sep + mblength)
2406 + {
2407 + sep = ptr;
2408 + while (sep < lim)
2409 + {
2410 + state_bak = state;
2411 + mblength = mbrtowc (&wc, sep, lim - sep + 1, &state);
2412 +
2413 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
2414 + {
2415 + mblength = 1;
2416 + state = state_bak;
2417 + }
2418 + mblength = (mblength < 1) ? 1 : mblength;
2419 +
2420 + if (mblength == tablen && !memcmp (sep, tab, mblength))
2421 + break;
2422 + else
2423 + {
2424 + sep += mblength;
2425 + continue;
2426 + }
2427 + }
2428 +
2429 + if (sep == lim)
2430 + break;
2431 +
2432 + extract_field (line, ptr, sep - ptr);
2433 + }
2434 + }
2435 + else
2436 + {
2437 + /* Skip leading blanks before the first field. */
2438 + while(ptr < lim)
2439 + {
2440 + state_bak = state;
2441 + mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state);
2442 +
2443 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
2444 + {
2445 + mblength = 1;
2446 + state = state_bak;
2447 + break;
2448 + }
2449 + mblength = (mblength < 1) ? 1 : mblength;
2450 +
2451 + if (!iswblank(wc))
2452 + break;
2453 + ptr += mblength;
2454 + }
2455 +
2456 + do
2457 + {
2458 + char *sep;
2459 + state_bak = state;
2460 + mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state);
2461 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
2462 + {
2463 + mblength = 1;
2464 + state = state_bak;
2465 + break;
2466 + }
2467 + mblength = (mblength < 1) ? 1 : mblength;
2468 +
2469 + sep = ptr + mblength;
2470 + while (sep != lim)
2471 + {
2472 + state_bak = state;
2473 + mblength = mbrtowc (&wc, sep, lim - sep + 1, &state);
2474 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
2475 + {
2476 + mblength = 1;
2477 + state = state_bak;
2478 + break;
2479 + }
2480 + mblength = (mblength < 1) ? 1 : mblength;
2481 +
2482 + if (iswblank (wc))
2483 + break;
2484 +
2485 + sep += mblength;
2486 + }
2487 +
2488 + extract_field (line, ptr, sep - ptr);
2489 + if (sep == lim)
2490 + return;
2491 +
2492 + state_bak = state;
2493 + mblength = mbrtowc (&wc, sep, lim - sep + 1, &state);
2494 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
2495 + {
2496 + mblength = 1;
2497 + state = state_bak;
2498 + break;
2499 + }
2500 + mblength = (mblength < 1) ? 1 : mblength;
2501 +
2502 + ptr = sep + mblength;
2503 + while (ptr != lim)
2504 + {
2505 + state_bak = state;
2506 + mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state);
2507 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
2508 + {
2509 + mblength = 1;
2510 + state = state_bak;
2511 + break;
2512 + }
2513 + mblength = (mblength < 1) ? 1 : mblength;
2514 +
2515 + if (!iswblank (wc))
2516 + break;
2517 +
2518 + ptr += mblength;
2519 + }
2520 + }
2521 + while (ptr != lim);
2522 + }
2523 +
2524 + extract_field (line, ptr, lim - ptr);
2525 +}
2526 +#endif
2527 +
2528 /* Read a line from FP into LINE and split it into fields.
2529 Return true if successful. */
2530
2531 @@ -256,6 +417,11 @@
2532 line->nfields_allocated = 0;
2533 line->nfields = 0;
2534 line->fields = NULL;
2535 +#if HAVE_MBRTOWC
2536 + if (MB_CUR_MAX > 1)
2537 + xfields_multibyte (line);
2538 + else
2539 +#endif
2540 xfields (line);
2541 return true;
2542 }
2543 @@ -310,56 +476,114 @@
2544 keycmp (struct line const *line1, struct line const *line2)
2545 {
2546 /* Start of field to compare in each file. */
2547 - char *beg1;
2548 - char *beg2;
2549 -
2550 - size_t len1;
2551 - size_t len2; /* Length of fields to compare. */
2552 + char *beg[2];
2553 + char *copy[2];
2554 + size_t len[2]; /* Length of fields to compare. */
2555 int diff;
2556 + int i, j;
2557
2558 if (join_field_1 < line1->nfields)
2559 {
2560 - beg1 = line1->fields[join_field_1].beg;
2561 - len1 = line1->fields[join_field_1].len;
2562 + beg[0] = line1->fields[join_field_1].beg;
2563 + len[0] = line1->fields[join_field_1].len;
2564 }
2565 else
2566 {
2567 - beg1 = NULL;
2568 - len1 = 0;
2569 + beg[0] = NULL;
2570 + len[0] = 0;
2571 }
2572
2573 if (join_field_2 < line2->nfields)
2574 {
2575 - beg2 = line2->fields[join_field_2].beg;
2576 - len2 = line2->fields[join_field_2].len;
2577 + beg[1] = line2->fields[join_field_2].beg;
2578 + len[1] = line2->fields[join_field_2].len;
2579 }
2580 else
2581 {
2582 - beg2 = NULL;
2583 - len2 = 0;
2584 + beg[1] = NULL;
2585 + len[1] = 0;
2586 }
2587
2588 - if (len1 == 0)
2589 - return len2 == 0 ? 0 : -1;
2590 - if (len2 == 0)
2591 + if (len[0] == 0)
2592 + return len[1] == 0 ? 0 : -1;
2593 + if (len[1] == 0)
2594 return 1;
2595
2596 if (ignore_case)
2597 {
2598 - /* FIXME: ignore_case does not work with NLS (in particular,
2599 - with multibyte chars). */
2600 - diff = memcasecmp (beg1, beg2, MIN (len1, len2));
2601 +#ifdef HAVE_MBRTOWC
2602 + if (MB_CUR_MAX > 1)
2603 + {
2604 + size_t mblength;
2605 + wchar_t wc, uwc;
2606 + mbstate_t state, state_bak;
2607 +
2608 + memset (&state, '\0', sizeof (mbstate_t));
2609 +
2610 + for (i = 0; i < 2; i++)
2611 + {
2612 + copy[i] = alloca (len[i] + 1);
2613 +
2614 + for (j = 0; j < MIN (len[0], len[1]);)
2615 + {
2616 + state_bak = state;
2617 + mblength = mbrtowc (&wc, beg[i] + j, len[i] - j, &state);
2618 +
2619 + switch (mblength)
2620 + {
2621 + case (size_t) -1:
2622 + case (size_t) -2:
2623 + state = state_bak;
2624 + /* Fall through */
2625 + case 0:
2626 + mblength = 1;
2627 + break;
2628 +
2629 + default:
2630 + uwc = towupper (wc);
2631 +
2632 + if (uwc != wc)
2633 + {
2634 + mbstate_t state_wc;
2635 +
2636 + memset (&state_wc, '\0', sizeof (mbstate_t));
2637 + wcrtomb (copy[i] + j, uwc, &state_wc);
2638 + }
2639 + else
2640 + memcpy (copy[i] + j, beg[i] + j, mblength);
2641 + }
2642 + j += mblength;
2643 + }
2644 + copy[i][j] = '\0';
2645 + }
2646 + }
2647 + else
2648 +#endif
2649 + {
2650 + for (i = 0; i < 2; i++)
2651 + {
2652 + copy[i] = alloca (len[i] + 1);
2653 +
2654 + for (j = 0; j < MIN (len[0], len[1]); j++)
2655 + copy[i][j] = toupper (beg[i][j]);
2656 +
2657 + copy[i][j] = '\0';
2658 + }
2659 + }
2660 }
2661 else
2662 {
2663 - if (hard_LC_COLLATE)
2664 - return xmemcoll (beg1, len1, beg2, len2);
2665 - diff = memcmp (beg1, beg2, MIN (len1, len2));
2666 + copy[0] = (unsigned char *) beg[0];
2667 + copy[1] = (unsigned char *) beg[1];
2668 }
2669
2670 + if (HAVE_SETLOCALE && hard_LC_COLLATE)
2671 + return xmemcoll ((char *) copy[0], len[0], (char *) copy[1], len[1]);
2672 + diff = memcmp (copy[0], copy[1], MIN (len[0], len[1]));
2673 +
2674 if (diff)
2675 return diff;
2676 - return len1 < len2 ? -1 : len1 != len2;
2677 + return len[0] - len[1];
2678 }
2679
2680 /* Print field N of LINE if it exists and is nonempty, otherwise
2681 @@ -384,11 +608,18 @@
2682
2683 /* Print the join of LINE1 and LINE2. */
2684
2685 +#define PUT_TAB_CHAR \
2686 + do \
2687 + { \
2688 + (tab != NULL) ? \
2689 + fwrite(tab, sizeof(char), tablen, stdout) : putchar (' '); \
2690 + } \
2691 + while (0)
2692 +
2693 static void
2694 prjoin (struct line const *line1, struct line const *line2)
2695 {
2696 const struct outlist *outlist;
2697 - char output_separator = tab < 0 ? ' ' : tab;
2698
2699 outlist = outlist_head.next;
2700 if (outlist)
2701 @@ -404,12 +635,12 @@
2702 if (o->file == 0)
2703 {
2704 if (line1 == &uni_blank)
2705 - {
2706 + {
2707 line = line2;
2708 field = join_field_2;
2709 }
2710 else
2711 - {
2712 + {
2713 line = line1;
2714 field = join_field_1;
2715 }
2716 @@ -423,7 +654,7 @@
2717 o = o->next;
2718 if (o == NULL)
2719 break;
2720 - putchar (output_separator);
2721 + PUT_TAB_CHAR;
2722 }
2723 putchar ('\n');
2724 }
2725 @@ -441,23 +672,23 @@
2726 prfield (join_field_1, line1);
2727 for (i = 0; i < join_field_1 && i < line1->nfields; ++i)
2728 {
2729 - putchar (output_separator);
2730 + PUT_TAB_CHAR;
2731 prfield (i, line1);
2732 }
2733 for (i = join_field_1 + 1; i < line1->nfields; ++i)
2734 {
2735 - putchar (output_separator);
2736 + PUT_TAB_CHAR;
2737 prfield (i, line1);
2738 }
2739
2740 for (i = 0; i < join_field_2 && i < line2->nfields; ++i)
2741 {
2742 - putchar (output_separator);
2743 + PUT_TAB_CHAR;
2744 prfield (i, line2);
2745 }
2746 for (i = join_field_2 + 1; i < line2->nfields; ++i)
2747 {
2748 - putchar (output_separator);
2749 + PUT_TAB_CHAR;
2750 prfield (i, line2);
2751 }
2752 putchar ('\n');
2753 @@ -869,20 +1100,41 @@
2754
2755 case 't':
2756 {
2757 - unsigned char newtab = optarg[0];
2758 - if (! newtab)
2759 + char *newtab;
2760 + size_t newtablen;
2761 + if (! optarg[0])
2762 error (EXIT_FAILURE, 0, _("empty tab"));
2763 - if (optarg[1])
2764 + newtab = xstrdup (optarg);
2765 +#if HAVE_MBRTOWC
2766 + if (MB_CUR_MAX > 1)
2767 + {
2768 + mbstate_t state;
2769 +
2770 + memset (&state, 0, sizeof (mbstate_t));
2771 + newtablen = mbrtowc (NULL, newtab,
2772 + strnlen (newtab, MB_LEN_MAX),
2773 + &state);
2774 + if (newtablen == (size_t) 0
2775 + || newtablen == (size_t) -1
2776 + || newtablen == (size_t) -2)
2777 + newtablen = 1;
2778 + }
2779 + else
2780 +#endif
2781 + newtablen = 1;
2782 +
2783 + if (newtablen == 1 && newtab[1])
2784 + {
2785 + if (STREQ (newtab, "\\0"))
2786 + newtab[0] = '\0';
2787 + }
2788 + if (tab != NULL && strcmp (tab, newtab))
2789 {
2790 - if (STREQ (optarg, "\\0"))
2791 - newtab = '\0';
2792 - else
2793 - error (EXIT_FAILURE, 0, _("multi-character tab %s"),
2794 - quote (optarg));
2795 + free (newtab);
2796 + error (EXIT_FAILURE, 0, _("incompatible tabs"));
2797 }
2798 - if (0 <= tab && tab != newtab)
2799 - error (EXIT_FAILURE, 0, _("incompatible tabs"));
2800 tab = newtab;
2801 + tablen = newtablen;
2802 }
2803 break;
2804
2805 --- coreutils-5.93/src/unexpand.c.i18n 2005-08-12 08:16:25.000000000 +0100
2806 +++ coreutils-5.93/src/unexpand.c 2005-12-23 08:53:01.000000000 +0000
2807 @@ -39,11 +39,28 @@
2808 #include <stdio.h>
2809 #include <getopt.h>
2810 #include <sys/types.h>
2811 +
2812 +/* Get mbstate_t, mbrtowc(), wcwidth(). */
2813 +#if HAVE_WCHAR_H
2814 +# include <wchar.h>
2815 +#endif
2816 +
2817 #include "system.h"
2818 #include "error.h"
2819 #include "quote.h"
2820 #include "xstrndup.h"
2821
2822 +/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
2823 + installation; work around this configuration error. */
2824 +#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
2825 +# define MB_LEN_MAX 16
2826 +#endif
2827 +
2828 +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
2829 +#if HAVE_MBRTOWC && defined mbstate_t
2830 +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
2831 +#endif
2832 +
2833 /* The official name of this program (e.g., no `g' prefix). */
2834 #define PROGRAM_NAME "unexpand"
2835
2836 @@ -110,6 +127,208 @@
2837 {NULL, 0, NULL, 0}
2838 };
2839
2840 +static FILE *next_file (FILE *fp);
2841 +
2842 +#if HAVE_MBRTOWC
2843 +static void
2844 +unexpand_multibyte (void)
2845 +{
2846 + FILE *fp; /* Input stream. */
2847 + mbstate_t i_state; /* Current shift state of the input stream. */
2848 + mbstate_t i_state_bak; /* Back up the I_STATE. */
2849 + mbstate_t o_state; /* Current shift state of the output stream. */
2850 + char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
2851 + char *bufpos; /* Next read position of BUF. */
2852 + size_t buflen = 0; /* The length of the byte sequence in buf. */
2853 + wint_t wc; /* A gotten wide character. */
2854 + size_t mblength; /* The byte size of a multibyte character
2855 + which shows as same character as WC. */
2856 +
2857 + /* Index in `tab_list' of next tabstop: */
2858 + int tab_index = 0; /* For calculating width of pending tabs. */
2859 + int print_tab_index = 0; /* For printing as many tabs as possible. */
2860 + unsigned int column = 0; /* Column on screen of next char. */
2861 + int next_tab_column; /* Column the next tab stop is on. */
2862 + int convert = 1; /* If nonzero, perform translations. */
2863 + unsigned int pending = 0; /* Pending columns of blanks. */
2864 +
2865 + fp = next_file ((FILE *) NULL);
2866 + if (fp == NULL)
2867 + return;
2868 +
2869 + memset (&o_state, '\0', sizeof(mbstate_t));
2870 + memset (&i_state, '\0', sizeof(mbstate_t));
2871 +
2872 + for (;;)
2873 + {
2874 + if (buflen < MB_LEN_MAX && !feof(fp) && !ferror(fp))
2875 + {
2876 + memmove (buf, bufpos, buflen);
2877 + buflen += fread (buf + buflen, sizeof(char), BUFSIZ, fp);
2878 + bufpos = buf;
2879 + }
2880 +
2881 + /* Get a wide character. */
2882 + if (buflen < 1)
2883 + {
2884 + mblength = 1;
2885 + wc = WEOF;
2886 + }
2887 + else
2888 + {
2889 + i_state_bak = i_state;
2890 + mblength = mbrtowc ((wchar_t *)&wc, bufpos, buflen, &i_state);
2891 + }
2892 +
2893 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
2894 + {
2895 + i_state = i_state_bak;
2896 + wc = L'\0';
2897 + }
2898 +
2899 + if (wc == L' ' && convert && column < INT_MAX)
2900 + {
2901 + ++pending;
2902 + ++column;
2903 + }
2904 + else if (wc == L'\t' && convert)
2905 + {
2906 + if (tab_size == 0)
2907 + {
2908 + /* Do not let tab_index == first_free_tab;
2909 + stop when it is 1 less. */
2910 + while (tab_index < first_free_tab - 1
2911 + && column >= tab_list[tab_index])
2912 + tab_index++;
2913 + next_tab_column = tab_list[tab_index];
2914 + if (tab_index < first_free_tab - 1)
2915 + tab_index++;
2916 + if (column >= next_tab_column)
2917 + {
2918 + convert = 0; /* Ran out of tab stops. */
2919 + goto flush_pend_mb;
2920 + }
2921 + }
2922 + else
2923 + {
2924 + next_tab_column = column + tab_size - column % tab_size;
2925 + }
2926 + pending += next_tab_column - column;
2927 + column = next_tab_column;
2928 + }
2929 + else
2930 + {
2931 +flush_pend_mb:
2932 + /* Flush pending spaces. Print as many tabs as possible,
2933 + then print the rest as spaces. */
2934 + if (pending == 1)
2935 + {
2936 + putchar (' ');
2937 + pending = 0;
2938 + }
2939 + column -= pending;
2940 + while (pending > 0)
2941 + {
2942 + if (tab_size == 0)
2943 + {
2944 + /* Do not let print_tab_index == first_free_tab;
2945 + stop when it is 1 less. */
2946 + while (print_tab_index < first_free_tab - 1
2947 + && column >= tab_list[print_tab_index])
2948 + print_tab_index++;
2949 + next_tab_column = tab_list[print_tab_index];
2950 + if (print_tab_index < first_free_tab - 1)
2951 + print_tab_index++;
2952 + }
2953 + else
2954 + {
2955 + next_tab_column =
2956 + column + tab_size - column % tab_size;
2957 + }
2958 + if (next_tab_column - column <= pending)
2959 + {
2960 + putchar ('\t');
2961 + pending -= next_tab_column - column;
2962 + column = next_tab_column;
2963 + }
2964 + else
2965 + {
2966 + --print_tab_index;
2967 + column += pending;
2968 + while (pending != 0)
2969 + {
2970 + putchar (' ');
2971 + pending--;
2972 + }
2973 + }
2974 + }
2975 +
2976 + if (wc == WEOF)
2977 + {
2978 + fp = next_file (fp);
2979 + if (fp == NULL)
2980 + break; /* No more files. */
2981 + else
2982 + {
2983 + memset (&i_state, '\0', sizeof(mbstate_t));
2984 + continue;
2985 + }
2986 + }
2987 +
2988 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
2989 + {
2990 + if (convert)
2991 + {
2992 + ++column;
2993 + if (convert_entire_line == 0)
2994 + convert = 0;
2995 + }
2996 + mblength = 1;
2997 + putchar (buf[0]);
2998 + }
2999 + else if (mblength == 0)
3000 + {
3001 + if (convert && convert_entire_line == 0)
3002 + convert = 0;
3003 + mblength = 1;
3004 + putchar ('\0');
3005 + }
3006 + else
3007 + {
3008 + if (convert)
3009 + {
3010 + if (wc == L'\b')
3011 + {
3012 + if (column > 0)
3013 + --column;
3014 + }
3015 + else
3016 + {
3017 + int width; /* The width of WC. */
3018 +
3019 + width = wcwidth (wc);
3020 + column += (width > 0) ? width : 0;
3021 + if (convert_entire_line == 0)
3022 + convert = 0;
3023 + }
3024 + }
3025 +
3026 + if (wc == L'\n')
3027 + {
3028 + tab_index = print_tab_index = 0;
3029 + column = pending = 0;
3030 + convert = 1;
3031 + }
3032 + fwrite (bufpos, sizeof(char), mblength, stdout);
3033 + }
3034 + }
3035 + buflen -= mblength;
3036 + bufpos += mblength;
3037 + }
3038 +}
3039 +#endif
3040 +
3041 +
3042 void
3043 usage (int status)
3044 {
3045 @@ -532,7 +751,12 @@
3046
3047 file_list = (optind < argc ? &argv[optind] : stdin_argv);
3048
3049 - unexpand ();
3050 +#if HAVE_MBRTOWC
3051 + if (MB_CUR_MAX > 1)
3052 + unexpand_multibyte ();
3053 + else
3054 +#endif
3055 + unexpand ();
3056
3057 if (have_read_stdin && fclose (stdin) != 0)
3058 error (EXIT_FAILURE, errno, "-");
3059 --- coreutils-5.93/src/sort.c.i18n 2005-10-07 19:48:28.000000000 +0100
3060 +++ coreutils-5.93/src/sort.c 2005-12-23 10:38:44.000000000 +0000
3061 @@ -23,9 +23,18 @@
3062
3063 #include <config.h>
3064
3065 +#include <assert.h>
3066 #include <getopt.h>
3067 #include <sys/types.h>
3068 #include <signal.h>
3069 +#if HAVE_WCHAR_H
3070 +# include <wchar.h>
3071 +#endif
3072 +/* Get isw* functions. */
3073 +#if HAVE_WCTYPE_H
3074 +# include <wctype.h>
3075 +#endif
3076 +
3077 #include "system.h"
3078 #include "error.h"
3079 #include "hard-locale.h"
3080 @@ -95,14 +104,38 @@
3081 /* Thousands separator; if -1, then there isn't one. */
3082 static int thousands_sep;
3083
3084 +static int force_general_numcompare = 0;
3085 +
3086 /* Nonzero if the corresponding locales are hard. */
3087 static bool hard_LC_COLLATE;
3088 -#if HAVE_NL_LANGINFO
3089 +#if HAVE_LANGINFO_CODESET
3090 static bool hard_LC_TIME;
3091 #endif
3092
3093 #define NONZERO(x) ((x) != 0)
3094
3095 +/* get a multibyte character's byte length. */
3096 +#define GET_BYTELEN_OF_CHAR(LIM, PTR, MBLENGTH, STATE) \
3097 + do \
3098 + { \
3099 + wchar_t wc; \
3100 + mbstate_t state_bak; \
3101 + \
3102 + state_bak = STATE; \
3103 + mblength = mbrtowc (&wc, PTR, LIM - PTR, &STATE); \
3104 + \
3105 + switch (MBLENGTH) \
3106 + { \
3107 + case (size_t)-1: \
3108 + case (size_t)-2: \
3109 + STATE = state_bak; \
3110 + /* Fall through. */ \
3111 + case 0: \
3112 + MBLENGTH = 1; \
3113 + } \
3114 + } \
3115 + while (0)
3116 +
3117 /* The kind of blanks for '-b' to skip in various options. */
3118 enum blanktype { bl_start, bl_end, bl_both };
3119
3120 @@ -239,13 +272,11 @@
3121 they were read if all keys compare equal. */
3122 static bool stable;
3123
3124 -/* If TAB has this value, blanks separate fields. */
3125 -enum { TAB_DEFAULT = CHAR_MAX + 1 };
3126 -
3127 -/* Tab character separating fields. If TAB_DEFAULT, then fields are
3128 +/* Tab character separating fields. If tab_length is 0, then fields are
3129 separated by the empty string between a non-blank character and a blank
3130 character. */
3131 -static int tab = TAB_DEFAULT;
3132 +static char tab[MB_LEN_MAX + 1];
3133 +static size_t tab_length = 0;
3134
3135 /* Flag to remove consecutive duplicate lines from the output.
3136 Only the last of a sequence of equal lines will be output. */
3137 @@ -392,6 +423,44 @@
3138 static struct tempnode *volatile temphead;
3139 static struct tempnode *volatile *temptail = &temphead;
3140
3141 +/* Function pointers. */
3142 +static void
3143 +(*inittables) (void);
3144 +static char *
3145 +(*begfield) (const struct line*, const struct keyfield *);
3146 +static char *
3147 +(*limfield) (const struct line*, const struct keyfield *);
3148 +static int
3149 +(*getmonth) (char const *, size_t);
3150 +static int
3151 +(*keycompare) (const struct line *, const struct line *);
3152 +static int
3153 +(*numcompare) (const char *, const char *);
3154 +
3155 +/* Test for white space multibyte character.
3156 + Set LENGTH the byte length of investigated multibyte character. */
3157 +#if HAVE_MBRTOWC
3158 +static int
3159 +ismbblank (const char *str, size_t len, size_t *length)
3160 +{
3161 + size_t mblength;
3162 + wchar_t wc;
3163 + mbstate_t state;
3164 +
3165 + memset (&state, '\0', sizeof(mbstate_t));
3166 + mblength = mbrtowc (&wc, str, len, &state);
3167 +
3168 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
3169 + {
3170 + *length = 1;
3171 + return 0;
3172 + }
3173 +
3174 + *length = (mblength < 1) ? 1 : mblength;
3175 + return iswblank (wc);
3176 +}
3177 +#endif
3178 +
3179 /* Clean up any remaining temporary files. */
3180
3181 static void
3182 @@ -545,7 +614,7 @@
3183 free (node);
3184 }
3185
3186 -#if HAVE_NL_LANGINFO
3187 +#if HAVE_LANGINFO_CODESET
3188
3189 static int
3190 struct_month_cmp (const void *m1, const void *m2)
3191 @@ -560,7 +629,7 @@
3192 /* Initialize the character class tables. */
3193
3194 static void
3195 -inittables (void)
3196 +inittables_uni (void)
3197 {
3198 size_t i;
3199
3200 @@ -572,7 +641,7 @@
3201 fold_toupper[i] = (ISLOWER (i) ? toupper (i) : i);
3202 }
3203
3204 -#if HAVE_NL_LANGINFO
3205 +#if HAVE_LANGINFO_CODESET
3206 /* If we're not in the "C" locale, read different names for months. */
3207 if (hard_LC_TIME)
3208 {
3209 @@ -598,6 +667,64 @@
3210 #endif
3211 }
3212
3213 +#if HAVE_MBRTOWC
3214 +static void
3215 +inittables_mb (void)
3216 +{
3217 + int i, j, k, l;
3218 + char *name, *s;
3219 + size_t s_len, mblength;
3220 + char mbc[MB_LEN_MAX];
3221 + wchar_t wc, pwc;
3222 + mbstate_t state_mb, state_wc;
3223 +
3224 + for (i = 0; i < MONTHS_PER_YEAR; i++)
3225 + {
3226 + s = (char *) nl_langinfo (ABMON_1 + i);
3227 + s_len = strlen (s);
3228 + monthtab[i].name = name = (char *) xmalloc (s_len + 1);
3229 + monthtab[i].val = i + 1;
3230 +
3231 + memset (&state_mb, '\0', sizeof (mbstate_t));
3232 + memset (&state_wc, '\0', sizeof (mbstate_t));
3233 +
3234 + for (j = 0; j < s_len;)
3235 + {
3236 + if (!ismbblank (s + j, s_len - j, &mblength))
3237 + break;
3238 + j += mblength;
3239 + }
3240 +
3241 + for (k = 0; j < s_len;)
3242 + {
3243 + mblength = mbrtowc (&wc, (s + j), (s_len - j), &state_mb);
3244 + assert (mblength != (size_t)-1 && mblength != (size_t)-2);
3245 + if (mblength == 0)
3246 + break;
3247 +
3248 + pwc = towupper (wc);
3249 + if (pwc == wc)
3250 + {
3251 + memcpy (mbc, s + j, mblength);
3252 + j += mblength;
3253 + }
3254 + else
3255 + {
3256 + j += mblength;
3257 + mblength = wcrtomb (mbc, pwc, &state_wc);
3258 + assert (mblength != (size_t)0 && mblength != (size_t)-1);
3259 + }
3260 +
3261 + for (l = 0; l < mblength; l++)
3262 + name[k++] = mbc[l];
3263 + }
3264 + name[k] = '\0';
3265 + }
3266 + qsort ((void *) monthtab, MONTHS_PER_YEAR,
3267 + sizeof (struct month), struct_month_cmp);
3268 +}
3269 +#endif
3270 +
3271 /* Specify the amount of main memory to use when sorting. */
3272 static void
3273 specify_sort_size (char const *s)
3274 @@ -808,7 +935,7 @@
3275 by KEY in LINE. */
3276
3277 static char *
3278 -begfield (const struct line *line, const struct keyfield *key)
3279 +begfield_uni (const struct line *line, const struct keyfield *key)
3280 {
3281 char *ptr = line->text, *lim = ptr + line->length - 1;
3282 size_t sword = key->sword;
3283 @@ -818,10 +945,10 @@
3284 /* The leading field separator itself is included in a field when -t
3285 is absent. */
3286
3287 - if (tab != TAB_DEFAULT)
3288 + if (tab_length)
3289 while (ptr < lim && sword--)
3290 {
3291 - while (ptr < lim && *ptr != tab)
3292 + while (ptr < lim && *ptr != tab[0])
3293 ++ptr;
3294 if (ptr < lim)
3295 ++ptr;
3296 @@ -849,11 +976,70 @@
3297 return ptr;
3298 }
3299
3300 +#if HAVE_MBRTOWC
3301 +static char *
3302 +begfield_mb (const struct line *line, const struct keyfield *key)
3303 +{
3304 + int i;
3305 + char *ptr = line->text, *lim = ptr + line->length - 1;
3306 + size_t sword = key->sword;
3307 + size_t schar = key->schar;
3308 + size_t mblength;
3309 + mbstate_t state;
3310 +
3311 + memset (&state, '\0', sizeof(mbstate_t));
3312 +
3313 + if (tab_length)
3314 + while (ptr < lim && sword--)
3315 + {
3316 + while (ptr < lim && memcmp (ptr, tab, tab_length) != 0)
3317 + {
3318 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
3319 + ptr += mblength;
3320 + }
3321 + if (ptr < lim)
3322 + {
3323 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
3324 + ptr += mblength;
3325 + }
3326 + }
3327 + else
3328 + while (ptr < lim && sword--)
3329 + {
3330 + while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength))
3331 + ptr += mblength;
3332 + if (ptr < lim)
3333 + {
3334 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
3335 + ptr += mblength;
3336 + }
3337 + while (ptr < lim && !ismbblank (ptr, lim - ptr, &mblength))
3338 + ptr += mblength;
3339 + }
3340 +
3341 + if (key->skipsblanks)
3342 + while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength))
3343 + ptr += mblength;
3344 +
3345 + for (i = 0; i < schar; i++)
3346 + {
3347 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
3348 +
3349 + if (ptr + mblength > lim)
3350 + break;
3351 + else
3352 + ptr += mblength;
3353 + }
3354 +
3355 + return ptr;
3356 +}
3357 +#endif
3358 +
3359 /* Return the limit of (a pointer to the first character after) the field
3360 in LINE specified by KEY. */
3361
3362 static char *
3363 -limfield (const struct line *line, const struct keyfield *key)
3364 +limfield_uni (const struct line *line, const struct keyfield *key)
3365 {
3366 char *ptr = line->text, *lim = ptr + line->length - 1;
3367 size_t eword = key->eword, echar = key->echar;
3368 @@ -866,10 +1052,10 @@
3369 `beginning' is the first character following the delimiting TAB.
3370 Otherwise, leave PTR pointing at the first `blank' character after
3371 the preceding field. */
3372 - if (tab != TAB_DEFAULT)
3373 + if (tab_length)
3374 while (ptr < lim && eword--)
3375 {
3376 - while (ptr < lim && *ptr != tab)
3377 + while (ptr < lim && *ptr != tab[0])
3378 ++ptr;
3379 if (ptr < lim && (eword | echar))
3380 ++ptr;
3381 @@ -915,10 +1101,10 @@
3382 */
3383
3384 /* Make LIM point to the end of (one byte past) the current field. */
3385 - if (tab != TAB_DEFAULT)
3386 + if (tab_length)
3387 {
3388 char *newlim;
3389 - newlim = memchr (ptr, tab, lim - ptr);
3390 + newlim = memchr (ptr, tab[0], lim - ptr);
3391 if (newlim)
3392 lim = newlim;
3393 }
3394 @@ -951,6 +1137,107 @@
3395 return ptr;
3396 }
3397
3398 +#if HAVE_MBRTOWC
3399 +static char *
3400 +limfield_mb (const struct line *line, const struct keyfield *key)
3401 +{
3402 + char *ptr = line->text, *lim = ptr + line->length - 1;
3403 + size_t eword = key->eword, echar = key->echar;
3404 + int i;
3405 + size_t mblength;
3406 + mbstate_t state;
3407 +
3408 + memset (&state, '\0', sizeof(mbstate_t));
3409 +
3410 + if (tab_length)
3411 + while (ptr < lim && eword--)
3412 + {
3413 + while (ptr < lim && memcmp (ptr, tab, tab_length) != 0)
3414 + {
3415 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
3416 + ptr += mblength;
3417 + }
3418 + if (ptr < lim && (eword | echar))
3419 + {
3420 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
3421 + ptr += mblength;
3422 + }
3423 + }
3424 + else
3425 + while (ptr < lim && eword--)
3426 + {
3427 + while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength))
3428 + ptr += mblength;
3429 + if (ptr < lim)
3430 + {
3431 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
3432 + ptr += mblength;
3433 + }
3434 + while (ptr < lim && !ismbblank (ptr, lim - ptr, &mblength))
3435 + ptr += mblength;
3436 + }
3437 +
3438 +
3439 +# ifdef POSIX_UNSPECIFIED
3440 + /* Make LIM point to the end of (one byte past) the current field. */
3441 + if (tab_length)
3442 + {
3443 + char *newlim, *p;
3444 +
3445 + newlim = NULL;
3446 + for (p = ptr; p < lim;)
3447 + {
3448 + if (memcmp (p, tab, tab_length) == 0)
3449 + {
3450 + newlim = p;
3451 + break;
3452 + }
3453 +
3454 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
3455 + p += mblength;
3456 + }
3457 + }
3458 + else
3459 + {
3460 + char *newlim;
3461 + newlim = ptr;
3462 +
3463 + while (newlim < lim && ismbblank (newlim, lim - newlim, &mblength))
3464 + newlim += mblength;
3465 + if (ptr < lim)
3466 + {
3467 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
3468 + ptr += mblength;
3469 + }
3470 + while (newlim < lim && !ismbblank (newlim, lim - newlim, &mblength))
3471 + newlim += mblength;
3472 + lim = newlim;
3473 + }
3474 +# endif
3475 +
3476 + /* If we're skipping leading blanks, don't start counting characters
3477 + * until after skipping past any leading blanks. */
3478 + if (key->skipsblanks)
3479 + while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength))
3480 + ptr += mblength;
3481 +
3482 + memset (&state, '\0', sizeof(mbstate_t));
3483 +
3484 + /* Advance PTR by ECHAR (if possible), but no further than LIM. */
3485 + for (i = 0; i < echar; i++)
3486 + {
3487 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
3488 +
3489 + if (ptr + mblength > lim)
3490 + break;
3491 + else
3492 + ptr += mblength;
3493 + }
3494 +
3495 + return ptr;
3496 +}
3497 +#endif
3498 +
3499 /* Fill BUF reading from FP, moving buf->left bytes from the end
3500 of buf->buf to the beginning first. If EOF is reached and the
3501 file wasn't terminated by a newline, supply one. Set up BUF's line
3502 @@ -1067,7 +1354,7 @@
3503 hideously fast. */
3504
3505 static int
3506 -numcompare (const char *a, const char *b)
3507 +numcompare_uni (const char *a, const char *b)
3508 {
3509 while (blanks[to_uchar (*a)])
3510 a++;
3511 @@ -1077,6 +1364,25 @@
3512 return strnumcmp (a, b, decimal_point, thousands_sep);
3513 }
3514
3515 +#if HAVE_MBRTOWC
3516 +static int
3517 +numcompare_mb (const char *a, const char *b)
3518 +{
3519 + size_t mblength, len;
3520 + len = strlen (a); /* okay for UTF-8 */
3521 + while (*a && ismbblank (a, len > MB_CUR_MAX ? MB_CUR_MAX : len, &mblength))
3522 + {
3523 + a += mblength;
3524 + len -= mblength;
3525 + }
3526 + len = strlen (b); /* okay for UTF-8 */
3527 + while (*b && ismbblank (b, len > MB_CUR_MAX ? MB_CUR_MAX : len, &mblength))
3528 + b += mblength;
3529 +
3530 + return strnumcmp (a, b, decimal_point, thousands_sep);
3531 +}
3532 +#endif /* HAV_EMBRTOWC */
3533 +
3534 static int
3535 general_numcompare (const char *sa, const char *sb)
3536 {
3537 @@ -1110,7 +1416,7 @@
3538 Return 0 if the name in S is not recognized. */
3539
3540 static int
3541 -getmonth (char const *month, size_t len)
3542 +getmonth_uni (char const *month, size_t len)
3543 {
3544 size_t lo = 0;
3545 size_t hi = MONTHS_PER_YEAR;
3546 @@ -1152,11 +1458,79 @@
3547 return 0;
3548 }
3549
3550 +#if HAVE_MBRTOWC
3551 +static int
3552 +getmonth_mb (const char *s, size_t len)
3553 +{
3554 + char *month;
3555 + register size_t i;
3556 + register int lo = 0, hi = MONTHS_PER_YEAR, result;
3557 + char *tmp;
3558 + size_t wclength, mblength;
3559 + const char **pp;
3560 + const wchar_t **wpp;
3561 + wchar_t *month_wcs;
3562 + mbstate_t state;
3563 +
3564 + while (len > 0 && ismbblank (s, len, &mblength))
3565 + {
3566 + s += mblength;
3567 + len -= mblength;
3568 + }
3569 +
3570 + if (len == 0)
3571 + return 0;
3572 +
3573 + month = (char *) alloca (len + 1);
3574 +
3575 + tmp = (char *) alloca (len + 1);
3576 + memcpy (tmp, s, len);
3577 + tmp[len] = '\0';
3578 + pp = (const char **)&tmp;
3579 + month_wcs = (wchar_t *) alloca ((len + 1) * sizeof (wchar_t));
3580 + memset (&state, '\0', sizeof(mbstate_t));
3581 +
3582 + wclength = mbsrtowcs (month_wcs, pp, len + 1, &state);
3583 + assert (wclength != (size_t)-1 && *pp == NULL);
3584 +
3585 + for (i = 0; i < wclength; i++)
3586 + {
3587 + month_wcs[i] = towupper(month_wcs[i]);
3588 + if (iswblank (month_wcs[i]))
3589 + {
3590 + month_wcs[i] = L'\0';
3591 + break;
3592 + }
3593 + }
3594 +
3595 + wpp = (const wchar_t **)&month_wcs;
3596 +
3597 + mblength = wcsrtombs (month, wpp, len + 1, &state);
3598 + assert (mblength != (-1) && *wpp == NULL);
3599 +
3600 + do
3601 + {
3602 + int ix = (lo + hi) / 2;
3603 +
3604 + if (strncmp (month, monthtab[ix].name, strlen (monthtab[ix].name)) < 0)
3605 + hi = ix;
3606 + else
3607 + lo = ix;
3608 + }
3609 + while (hi - lo > 1);
3610 +
3611 + result = (!strncmp (month, monthtab[lo].name, strlen (monthtab[lo].name))
3612 + ? monthtab[lo].val : 0);
3613 +
3614 + return result;
3615 +}
3616 +#endif
3617 +
3618 /* Compare two lines A and B trying every key in sequence until there
3619 are no more keys or a difference is found. */
3620
3621 static int
3622 -keycompare (const struct line *a, const struct line *b)
3623 +keycompare_uni (const struct line *a, const struct line *b)
3624 {
3625 struct keyfield const *key = keylist;
3626
3627 @@ -1326,6 +1700,177 @@
3628 return key->reverse ? -diff : diff;
3629 }
3630
3631 +#if HAVE_MBRTOWC
3632 +static int
3633 +keycompare_mb (const struct line *a, const struct line *b)
3634 +{
3635 + struct keyfield *key = keylist;
3636 +
3637 + /* For the first iteration only, the key positions have been
3638 + precomputed for us. */
3639 + char *texta = a->keybeg;
3640 + char *textb = b->keybeg;
3641 + char *lima = a->keylim;
3642 + char *limb = b->keylim;
3643 +
3644 + size_t mblength_a, mblength_b;
3645 + wchar_t wc_a, wc_b;
3646 + mbstate_t state_a, state_b;
3647 +
3648 + int diff;
3649 +
3650 + memset (&state_a, '\0', sizeof(mbstate_t));
3651 + memset (&state_b, '\0', sizeof(mbstate_t));
3652 +
3653 + for (;;)
3654 + {
3655 + unsigned char *translate = (unsigned char *) key->translate;
3656 + bool const *ignore = key->ignore;
3657 +
3658 + /* Find the lengths. */
3659 + size_t lena = lima <= texta ? 0 : lima - texta;
3660 + size_t lenb = limb <= textb ? 0 : limb - textb;
3661 +
3662 + /* Actually compare the fields. */
3663 + if (key->numeric | key->general_numeric)
3664 + {
3665 + char savea = *lima, saveb = *limb;
3666 +
3667 + *lima = *limb = '\0';
3668 + if (force_general_numcompare)
3669 + diff = general_numcompare (texta, textb);
3670 + else
3671 + diff = ((key->numeric ? numcompare : general_numcompare)
3672 + (texta, textb));
3673 + *lima = savea, *limb = saveb;
3674 + }
3675 + else if (key->month)
3676 + diff = getmonth (texta, lena) - getmonth (textb, lenb);
3677 + else
3678 + {
3679 + if (ignore || translate)
3680 + {
3681 + char *copy_a = (char *) alloca (lena + 1 + lenb + 1);
3682 + char *copy_b = copy_a + lena + 1;
3683 + size_t new_len_a, new_len_b;
3684 + size_t i, j;
3685 +
3686 + /* Ignore and/or translate chars before comparing. */
3687 +# define IGNORE_CHARS(NEW_LEN, LEN, TEXT, COPY, WC, MBLENGTH, STATE) \
3688 + do \
3689 + { \
3690 + wchar_t uwc; \
3691 + char mbc[MB_LEN_MAX]; \
3692 + mbstate_t state_wc; \
3693 + \
3694 + for (NEW_LEN = i = 0; i < LEN;) \
3695 + { \
3696 + mbstate_t state_bak; \
3697 + \
3698 + state_bak = STATE; \
3699 + MBLENGTH = mbrtowc (&WC, TEXT + i, LEN - i, &STATE); \
3700 + \
3701 + if (MBLENGTH == (size_t)-2 || MBLENGTH == (size_t)-1 \
3702 + || MBLENGTH == 0) \
3703 + { \
3704 + if (MBLENGTH == (size_t)-2 || MBLENGTH == (size_t)-1) \
3705 + STATE = state_bak; \
3706 + if (!ignore) \
3707 + COPY[NEW_LEN++] = TEXT[i++]; \
3708 + continue; \
3709 + } \
3710 + \
3711 + if (ignore) \
3712 + { \
3713 + if ((ignore == nonprinting && !iswprint (WC)) \
3714 + || (ignore == nondictionary \
3715 + && !iswalnum (WC) && !iswblank (WC))) \
3716 + { \
3717 + i += MBLENGTH; \
3718 + continue; \
3719 + } \
3720 + } \
3721 + \
3722 + if (translate) \
3723 + { \
3724 + \
3725 + uwc = towupper(WC); \
3726 + if (WC == uwc) \
3727 + { \
3728 + memcpy (mbc, TEXT + i, MBLENGTH); \
3729 + i += MBLENGTH; \
3730 + } \
3731 + else \
3732 + { \
3733 + i += MBLENGTH; \
3734 + WC = uwc; \
3735 + memset (&state_wc, '\0', sizeof (mbstate_t)); \
3736 + \
3737 + MBLENGTH = wcrtomb (mbc, WC, &state_wc); \
3738 + assert (MBLENGTH != (size_t)-1 && MBLENGTH != 0); \
3739 + } \
3740 + \
3741 + for (j = 0; j < MBLENGTH; j++) \
3742 + COPY[NEW_LEN++] = mbc[j]; \
3743 + } \
3744 + else \
3745 + for (j = 0; j < MBLENGTH; j++) \
3746 + COPY[NEW_LEN++] = TEXT[i++]; \
3747 + } \
3748 + COPY[NEW_LEN] = '\0'; \
3749 + } \
3750 + while (0)
3751 + IGNORE_CHARS (new_len_a, lena, texta, copy_a,
3752 + wc_a, mblength_a, state_a);
3753 + IGNORE_CHARS (new_len_b, lenb, textb, copy_b,
3754 + wc_b, mblength_b, state_b);
3755 + diff = xmemcoll (copy_a, new_len_a, copy_b, new_len_b);
3756 + }
3757 + else if (lena == 0)
3758 + diff = - NONZERO (lenb);
3759 + else if (lenb == 0)
3760 + goto greater;
3761 + else
3762 + diff = xmemcoll (texta, lena, textb, lenb);
3763 + }
3764 +
3765 + if (diff)
3766 + goto not_equal;
3767 +
3768 + key = key->next;
3769 + if (! key)
3770 + break;
3771 +
3772 + /* Find the beginning and limit of the next field. */
3773 + if (key->eword != -1)
3774 + lima = limfield (a, key), limb = limfield (b, key);
3775 + else
3776 + lima = a->text + a->length - 1, limb = b->text + b->length - 1;
3777 +
3778 + if (key->sword != -1)
3779 + texta = begfield (a, key), textb = begfield (b, key);
3780 + else
3781 + {
3782 + texta = a->text, textb = b->text;
3783 + if (key->skipsblanks)
3784 + {
3785 + while (texta < lima && ismbblank (texta, lima - texta, &mblength_a))
3786 + texta += mblength_a;
3787 + while (textb < limb && ismbblank (textb, limb - textb, &mblength_b))
3788 + textb += mblength_b;
3789 + }
3790 + }
3791 + }
3792 +
3793 + return 0;
3794 +
3795 +greater:
3796 + diff = 1;
3797 +not_equal:
3798 + return key->reverse ? -diff : diff;
3799 +}
3800 +#endif
3801 +
3802 /* Compare two lines A and B, returning negative, zero, or positive
3803 depending on whether A compares less than, equal to, or greater than B. */
3804
3805 @@ -2127,7 +2672,7 @@
3806 atexit (close_stdout);
3807
3808 hard_LC_COLLATE = hard_locale (LC_COLLATE);
3809 -#if HAVE_NL_LANGINFO
3810 +#if HAVE_LANGINFO_CODESET
3811 hard_LC_TIME = hard_locale (LC_TIME);
3812 #endif
3813
3814 @@ -2148,6 +2693,27 @@
3815 thousands_sep = -1;
3816 }
3817
3818 +#if HAVE_MBRTOWC
3819 + if (MB_CUR_MAX > 1)
3820 + {
3821 + inittables = inittables_mb;
3822 + begfield = begfield_mb;
3823 + limfield = limfield_mb;
3824 + getmonth = getmonth_mb;
3825 + keycompare = keycompare_mb;
3826 + numcompare = numcompare_mb;
3827 + }
3828 + else
3829 +#endif
3830 + {
3831 + inittables = inittables_uni;
3832 + begfield = begfield_uni;
3833 + limfield = limfield_uni;
3834 + getmonth = getmonth_uni;
3835 + keycompare = keycompare_uni;
3836 + numcompare = numcompare_uni;
3837 + }
3838 +
3839 have_read_stdin = false;
3840 inittables ();
3841
3842 @@ -2349,13 +2915,35 @@
3843
3844 case 't':
3845 {
3846 - char newtab = optarg[0];
3847 - if (! newtab)
3848 + char newtab[MB_LEN_MAX + 1];
3849 + size_t newtab_length = 1;
3850 + strncpy (newtab, optarg, MB_LEN_MAX);
3851 + if (! newtab[0])
3852 error (SORT_FAILURE, 0, _("empty tab"));
3853 - if (optarg[1])
3854 +#if HAVE_MBRTOWC
3855 + if (MB_CUR_MAX > 1)
3856 + {
3857 + wchar_t wc;
3858 + mbstate_t state;
3859 + size_t i;
3860 +
3861 + memset (&state, '\0', sizeof (mbstate_t));
3862 + newtab_length = mbrtowc (&wc, newtab, strnlen (newtab,
3863 + MB_LEN_MAX),
3864 + &state);
3865 + switch (newtab_length)
3866 + {
3867 + case (size_t) -1:
3868 + case (size_t) -2:
3869 + case 0:
3870 + newtab_length = 1;
3871 + }
3872 + }
3873 +#endif
3874 + if (newtab_length == 1 && optarg[1])
3875 {
3876 if (STREQ (optarg, "\\0"))
3877 - newtab = '\0';
3878 + newtab[0] = '\0';
3879 else
3880 {
3881 /* Provoke with `sort -txx'. Complain about
3882 @@ -2366,9 +2954,12 @@
3883 quote (optarg));
3884 }
3885 }
3886 - if (tab != TAB_DEFAULT && tab != newtab)
3887 + if (tab_length
3888 + && (tab_length != newtab_length
3889 + || memcmp (tab, newtab, tab_length) != 0))
3890 error (SORT_FAILURE, 0, _("incompatible tabs"));
3891 - tab = newtab;
3892 + memcpy (tab, newtab, newtab_length);
3893 + tab_length = newtab_length;
3894 }
3895 break;
3896
3897 --- /dev/null 2005-10-10 09:36:06.437701000 +0100
3898 +++ coreutils-5.93/tests/sort/sort-mb-tests 2005-12-23 08:53:01.000000000 +0000
3899 @@ -0,0 +1,58 @@
3900 +#! /bin/sh
3901 +case $# in
3902 + 0) xx='../../src/sort';;
3903 + *) xx="$1";;
3904 +esac
3905 +test "$VERBOSE" && echo=echo || echo=:
3906 +$echo testing program: $xx
3907 +errors=0
3908 +test "$srcdir" || srcdir=.
3909 +test "$VERBOSE" && $xx --version 2> /dev/null
3910 +
3911 +export LC_ALL=en_US.UTF-8
3912 +locale -k LC_CTYPE 2>&1 | grep -q charmap.*UTF-8 || exit 77
3913 +errors=0
3914 +
3915 +$xx -t @ -k2 -n mb1.I > mb1.O
3916 +code=$?
3917 +if test $code != 0; then
3918 + $echo "Test mb1 failed: $xx return code $code differs from expected value 0" 1>&2
3919 + errors=`expr $errors + 1`
3920 +else
3921 + cmp mb1.O $srcdir/mb1.X > /dev/null 2>&1
3922 + case $? in
3923 + 0) if test "$VERBOSE"; then $echo "passed mb1"; fi;;
3924 + 1) $echo "Test mb1 failed: files mb1.O and $srcdir/mb1.X differ" 1>&2
3925 + (diff -c mb1.O $srcdir/mb1.X) 2> /dev/null
3926 + errors=`expr $errors + 1`;;
3927 + 2) $echo "Test mb1 may have failed." 1>&2
3928 + $echo The command "cmp mb1.O $srcdir/mb1.X" failed. 1>&2
3929 + errors=`expr $errors + 1`;;
3930 + esac
3931 +fi
3932 +
3933 +$xx -t @ -k4 -n mb2.I > mb2.O
3934 +code=$?
3935 +if test $code != 0; then
3936 + $echo "Test mb2 failed: $xx return code $code differs from expected value 0" 1>&2
3937 + errors=`expr $errors + 1`
3938 +else
3939 + cmp mb2.O $srcdir/mb2.X > /dev/null 2>&1
3940 + case $? in
3941 + 0) if test "$VERBOSE"; then $echo "passed mb2"; fi;;
3942 + 1) $echo "Test mb2 failed: files mb2.O and $srcdir/mb2.X differ" 1>&2
3943 + (diff -c mb2.O $srcdir/mb2.X) 2> /dev/null
3944 + errors=`expr $errors + 1`;;
3945 + 2) $echo "Test mb2 may have failed." 1>&2
3946 + $echo The command "cmp mb2.O $srcdir/mb2.X" failed. 1>&2
3947 + errors=`expr $errors + 1`;;
3948 + esac
3949 +fi
3950 +
3951 +if test $errors = 0; then
3952 + $echo Passed all 113 tests. 1>&2
3953 +else
3954 + $echo Failed $errors tests. 1>&2
3955 +fi
3956 +test $errors = 0 || errors=1
3957 +exit $errors
3958 --- /dev/null 2005-10-10 09:36:06.437701000 +0100
3959 +++ coreutils-5.93/tests/sort/mb1.I 2005-12-23 08:53:01.000000000 +0000
3960 @@ -0,0 +1,4 @@
3961 +Apple@10
3962 +Banana@5
3963 +Citrus@20
3964 +Cherry@30
3965 --- /dev/null 2005-10-10 09:36:06.437701000 +0100
3966 +++ coreutils-5.93/tests/sort/mb2.I 2005-12-23 08:53:01.000000000 +0000
3967 @@ -0,0 +1,4 @@
3968 +Apple@AA10@@20
3969 +Banana@AA5@@30
3970 +Citrus@AA20@@5
3971 +Cherry@AA30@@10
3972 --- /dev/null 2005-10-10 09:36:06.437701000 +0100
3973 +++ coreutils-5.93/tests/sort/mb1.X 2005-12-23 08:53:01.000000000 +0000
3974 @@ -0,0 +1,4 @@
3975 +Banana@5
3976 +Apple@10
3977 +Citrus@20
3978 +Cherry@30
3979 --- /dev/null 2005-10-10 09:36:06.437701000 +0100
3980 +++ coreutils-5.93/tests/sort/mb2.X 2005-12-23 08:53:01.000000000 +0000
3981 @@ -0,0 +1,4 @@
3982 +Citrus@AA20@@5
3983 +Cherry@AA30@@10
3984 +Apple@AA10@@20
3985 +Banana@AA5@@30
3986 --- coreutils-5.93/tests/sort/Makefile.am.i18n 2005-10-24 22:02:25.000000000 +0100
3987 +++ coreutils-5.93/tests/sort/Makefile.am 2005-12-23 08:53:01.000000000 +0000
3988 @@ -43,14 +43,16 @@
3989 nul-nls.E use-nl.O use-nl.E o2.O o2.E nul-tab.O nul-tab.E
3990 ##test-files-end
3991
3992 -EXTRA_DIST = Test.pm $x-tests $(explicit) $(maint_gen)
3993 -noinst_SCRIPTS = $x-tests
3994 +run_gen += mb1.O mb2.O
3995 +
3996 +EXTRA_DIST = Test.pm $x-tests $(explicit) $(maint_gen) mb1.I mb1.X mb2.I mb2.X
3997 +noinst_SCRIPTS = $x-tests # $x-mb-tests
3998 TESTS_ENVIRONMENT = \
3999 PATH="`pwd`/../../src$(PATH_SEPARATOR)$$PATH"
4000
4001 editpl = sed -e 's,@''PERL''@,$(PERL),g' -e 's,@''srcdir''@,$(srcdir),g'
4002
4003 -TESTS = $x-tests
4004 +TESTS = $x-tests $x-mb-tests
4005
4006 mk_script = $(srcdir)/../mk-script
4007 $(srcdir)/$x-tests: $(mk_script) Test.pm Makefile.am
4008 --- coreutils-5.93/tests/sort/Makefile.in.i18n 2005-11-05 22:12:25.000000000 +0000
4009 +++ coreutils-5.93/tests/sort/Makefile.in 2005-12-23 09:00:37.000000000 +0000
4010 @@ -107,25 +107,25 @@
4011 $(top_srcdir)/m4/onceonly_2_57.m4 $(top_srcdir)/m4/openat.m4 \
4012 $(top_srcdir)/m4/pathmax.m4 $(top_srcdir)/m4/perl.m4 \
4013 $(top_srcdir)/m4/physmem.m4 $(top_srcdir)/m4/po.m4 \
4014 - $(top_srcdir)/m4/posixtm.m4 $(top_srcdir)/m4/posixver.m4 \
4015 - $(top_srcdir)/m4/prereq.m4 $(top_srcdir)/m4/progtest.m4 \
4016 - $(top_srcdir)/m4/putenv.m4 $(top_srcdir)/m4/quote.m4 \
4017 - $(top_srcdir)/m4/quotearg.m4 $(top_srcdir)/m4/readlink.m4 \
4018 - $(top_srcdir)/m4/readtokens.m4 $(top_srcdir)/m4/readutmp.m4 \
4019 - $(top_srcdir)/m4/regex.m4 $(top_srcdir)/m4/rename.m4 \
4020 - $(top_srcdir)/m4/restrict.m4 $(top_srcdir)/m4/rmdir-errno.m4 \
4021 - $(top_srcdir)/m4/rmdir.m4 $(top_srcdir)/m4/root-dev-ino.m4 \
4022 - $(top_srcdir)/m4/rpmatch.m4 $(top_srcdir)/m4/safe-read.m4 \
4023 - $(top_srcdir)/m4/safe-write.m4 $(top_srcdir)/m4/same.m4 \
4024 - $(top_srcdir)/m4/save-cwd.m4 $(top_srcdir)/m4/savedir.m4 \
4025 - $(top_srcdir)/m4/setenv.m4 $(top_srcdir)/m4/settime.m4 \
4026 - $(top_srcdir)/m4/sha1.m4 $(top_srcdir)/m4/sig2str.m4 \
4027 - $(top_srcdir)/m4/signed.m4 $(top_srcdir)/m4/socklen.m4 \
4028 - $(top_srcdir)/m4/sockpfaf.m4 $(top_srcdir)/m4/ssize_t.m4 \
4029 - $(top_srcdir)/m4/st_dm_mode.m4 $(top_srcdir)/m4/stat-macros.m4 \
4030 - $(top_srcdir)/m4/stat-prog.m4 $(top_srcdir)/m4/stat-time.m4 \
4031 - $(top_srcdir)/m4/stdbool.m4 $(top_srcdir)/m4/stdint_h.m4 \
4032 - $(top_srcdir)/m4/stdio-safer.m4 \
4033 + $(top_srcdir)/m4/posix_acl.m4 $(top_srcdir)/m4/posixtm.m4 \
4034 + $(top_srcdir)/m4/posixver.m4 $(top_srcdir)/m4/prereq.m4 \
4035 + $(top_srcdir)/m4/progtest.m4 $(top_srcdir)/m4/putenv.m4 \
4036 + $(top_srcdir)/m4/quote.m4 $(top_srcdir)/m4/quotearg.m4 \
4037 + $(top_srcdir)/m4/readlink.m4 $(top_srcdir)/m4/readtokens.m4 \
4038 + $(top_srcdir)/m4/readutmp.m4 $(top_srcdir)/m4/regex.m4 \
4039 + $(top_srcdir)/m4/rename.m4 $(top_srcdir)/m4/restrict.m4 \
4040 + $(top_srcdir)/m4/rmdir-errno.m4 $(top_srcdir)/m4/rmdir.m4 \
4041 + $(top_srcdir)/m4/root-dev-ino.m4 $(top_srcdir)/m4/rpmatch.m4 \
4042 + $(top_srcdir)/m4/safe-read.m4 $(top_srcdir)/m4/safe-write.m4 \
4043 + $(top_srcdir)/m4/same.m4 $(top_srcdir)/m4/save-cwd.m4 \
4044 + $(top_srcdir)/m4/savedir.m4 $(top_srcdir)/m4/setenv.m4 \
4045 + $(top_srcdir)/m4/settime.m4 $(top_srcdir)/m4/sha1.m4 \
4046 + $(top_srcdir)/m4/sig2str.m4 $(top_srcdir)/m4/signed.m4 \
4047 + $(top_srcdir)/m4/socklen.m4 $(top_srcdir)/m4/sockpfaf.m4 \
4048 + $(top_srcdir)/m4/ssize_t.m4 $(top_srcdir)/m4/st_dm_mode.m4 \
4049 + $(top_srcdir)/m4/stat-macros.m4 $(top_srcdir)/m4/stat-prog.m4 \
4050 + $(top_srcdir)/m4/stat-time.m4 $(top_srcdir)/m4/stdbool.m4 \
4051 + $(top_srcdir)/m4/stdint_h.m4 $(top_srcdir)/m4/stdio-safer.m4 \
4052 $(top_srcdir)/m4/stdlib-safer.m4 $(top_srcdir)/m4/stpcpy.m4 \
4053 $(top_srcdir)/m4/strcase.m4 $(top_srcdir)/m4/strcspn.m4 \
4054 $(top_srcdir)/m4/strdup.m4 $(top_srcdir)/m4/strftime.m4 \
4055 @@ -196,7 +196,6 @@
4056 GLIBC21 = @GLIBC21@
4057 GMSGFMT = @GMSGFMT@
4058 GNU_PACKAGE = @GNU_PACKAGE@
4059 -GREP = @GREP@
4060 HAVE__BOOL = @HAVE__BOOL@
4061 HELP2MAN = @HELP2MAN@
4062 INSTALL_DATA = @INSTALL_DATA@
4063 @@ -207,6 +206,7 @@
4064 INTL_MACOSX_LIBS = @INTL_MACOSX_LIBS@
4065 KMEM_GROUP = @KMEM_GROUP@
4066 LDFLAGS = @LDFLAGS@
4067 +LIBACL = @LIBACL@
4068 LIBICONV = @LIBICONV@
4069 LIBINTL = @LIBINTL@
4070 LIBOBJS = @LIBOBJS@
4071 @@ -217,6 +217,8 @@
4072 LIB_FDATASYNC = @LIB_FDATASYNC@
4073 LIB_GETHRXTIME = @LIB_GETHRXTIME@
4074 LIB_NANOSLEEP = @LIB_NANOSLEEP@
4075 +LIB_PAM = @LIB_PAM@
4076 +LIB_SELINUX = @LIB_SELINUX@
4077 LN_S = @LN_S@
4078 LTLIBICONV = @LTLIBICONV@
4079 LTLIBINTL = @LTLIBINTL@
4080 @@ -268,30 +270,23 @@
4081 build_os = @build_os@
4082 build_vendor = @build_vendor@
4083 datadir = @datadir@
4084 -datarootdir = @datarootdir@
4085 -docdir = @docdir@
4086 -dvidir = @dvidir@
4087 exec_prefix = @exec_prefix@
4088 host = @host@
4089 host_alias = @host_alias@
4090 host_cpu = @host_cpu@
4091 host_os = @host_os@
4092 host_vendor = @host_vendor@
4093 -htmldir = @htmldir@
4094 includedir = @includedir@
4095 infodir = @infodir@
4096 install_sh = @install_sh@
4097 libdir = @libdir@
4098 libexecdir = @libexecdir@
4099 -localedir = @localedir@
4100 localstatedir = @localstatedir@
4101 mandir = @mandir@
4102 mkdir_p = @mkdir_p@
4103 oldincludedir = @oldincludedir@
4104 -pdfdir = @pdfdir@
4105 prefix = @prefix@
4106 program_transform_name = @program_transform_name@
4107 -psdir = @psdir@
4108 sbindir = @sbindir@
4109 sharedstatedir = @sharedstatedir@
4110 sysconfdir = @sysconfdir@
4111 @@ -318,33 +313,37 @@
4112 neg-nls.I neg-nls.X nul-nls.I nul-nls.X use-nl.I use-nl.X o2.I o2.X nul-tab.I \
4113 nul-tab.X
4114
4115 -run_gen = n1.O n1.E n2.O n2.E n3.O n3.E n4.O n4.E n5.O n5.E n6.O n6.E n7.O \
4116 -n7.E n8a.O n8a.E n8b.O n8b.E n9a.O n9a.E n9b.O n9b.E n10a.O n10a.E n10b.O \
4117 -n10b.E n11a.O n11a.E n11b.O n11b.E 01a.O 01a.E 02a.O 02a.E 02b.O 02b.E 02c.O \
4118 -02c.E 02m.O 02m.E 02n.O 02n.E 02o.O 02o.E 02p.O 02p.E 03a.O 03a.E 03b.O 03b.E \
4119 -03c.O 03c.E 03d.O 03d.E 03e.O 03e.E 03f.O 03f.E 03g.O 03g.E 03h.O 03h.E 03i.O \
4120 -03i.E 04a.O 04a.E 04b.O 04b.E 04c.O 04c.E 04d.O 04d.E 04e.O 04e.E 05a.O 05a.E \
4121 -05b.O 05b.E 05c.O 05c.E 05d.O 05d.E 05e.O 05e.E 05f.O 05f.E 06a.O 06a.E 06b.O \
4122 -06b.E 06c.O 06c.E 06d.O 06d.E 06e.O 06e.E 06f.O 06f.E 07a.O 07a.E 07b.O 07b.E \
4123 -07c.O 07c.E 07d.O 07d.E 08a.O 08a.E 08b.O 08b.E 09a.O 09a.E 09b.O 09b.E 09c.O \
4124 -09c.E 09d.O 09d.E 10a.O 10a.E 10b.O 10b.E 10c.O 10c.E 10d.O 10d.E 10a0.O \
4125 -10a0.E 10a1.O 10a1.E 10a2.O 10a2.E 10e.O 10e.E 10f.O 10f.E 10g.O 10g.E 11a.O \
4126 -11a.E 11b.O 11b.E 11c.O 11c.E 11d.O 11d.E 12a.O 12a.E 12b.O 12b.E 12c.O 12c.E \
4127 -12d.O 12d.E 13a.O 13a.E 13b.O 13b.E 14a.O 14a.E 14b.O 14b.E 15a.O 15a.E 15b.O \
4128 -15b.E 15c.O 15c.E 15d.O 15d.E 15e.O 15e.E 16a.O 16a.E 17.O 17.E 18a.O 18a.E \
4129 -18b.O 18b.E 18c.O 18c.E 18d.O 18d.E 18e.O 18e.E 19a.O 19a.E 19b.O 19b.E 20a.O \
4130 -20a.E 21a.O 21a.E 21b.O 21b.E 21c.O 21c.E 21d.O 21d.E 21e.O 21e.E 21f.O 21f.E \
4131 -21g.O 21g.E 22a.O 22a.E 22b.O 22b.E no-file1.O no-file1.E o-no-file1.O \
4132 -o-no-file1.E create-empty.O create-empty.E neg-nls.O neg-nls.E nul-nls.O \
4133 -nul-nls.E use-nl.O use-nl.E o2.O o2.E nul-tab.O nul-tab.E
4134 -
4135 -EXTRA_DIST = Test.pm $x-tests $(explicit) $(maint_gen)
4136 -noinst_SCRIPTS = $x-tests
4137 +run_gen = n1.O n1.E n2.O n2.E n3.O n3.E n4.O n4.E n5.O n5.E n6.O n6.E \
4138 + n7.O n7.E n8a.O n8a.E n8b.O n8b.E n9a.O n9a.E n9b.O n9b.E \
4139 + n10a.O n10a.E n10b.O n10b.E n11a.O n11a.E n11b.O n11b.E 01a.O \
4140 + 01a.E 02a.O 02a.E 02b.O 02b.E 02c.O 02c.E 02m.O 02m.E 02n.O \
4141 + 02n.E 02o.O 02o.E 02p.O 02p.E 03a.O 03a.E 03b.O 03b.E 03c.O \
4142 + 03c.E 03d.O 03d.E 03e.O 03e.E 03f.O 03f.E 03g.O 03g.E 03h.O \
4143 + 03h.E 03i.O 03i.E 04a.O 04a.E 04b.O 04b.E 04c.O 04c.E 04d.O \
4144 + 04d.E 04e.O 04e.E 05a.O 05a.E 05b.O 05b.E 05c.O 05c.E 05d.O \
4145 + 05d.E 05e.O 05e.E 05f.O 05f.E 06a.O 06a.E 06b.O 06b.E 06c.O \
4146 + 06c.E 06d.O 06d.E 06e.O 06e.E 06f.O 06f.E 07a.O 07a.E 07b.O \
4147 + 07b.E 07c.O 07c.E 07d.O 07d.E 08a.O 08a.E 08b.O 08b.E 09a.O \
4148 + 09a.E 09b.O 09b.E 09c.O 09c.E 09d.O 09d.E 10a.O 10a.E 10b.O \
4149 + 10b.E 10c.O 10c.E 10d.O 10d.E 10a0.O 10a0.E 10a1.O 10a1.E \
4150 + 10a2.O 10a2.E 10e.O 10e.E 10f.O 10f.E 10g.O 10g.E 11a.O 11a.E \
4151 + 11b.O 11b.E 11c.O 11c.E 11d.O 11d.E 12a.O 12a.E 12b.O 12b.E \
4152 + 12c.O 12c.E 12d.O 12d.E 13a.O 13a.E 13b.O 13b.E 14a.O 14a.E \
4153 + 14b.O 14b.E 15a.O 15a.E 15b.O 15b.E 15c.O 15c.E 15d.O 15d.E \
4154 + 15e.O 15e.E 16a.O 16a.E 17.O 17.E 18a.O 18a.E 18b.O 18b.E \
4155 + 18c.O 18c.E 18d.O 18d.E 18e.O 18e.E 19a.O 19a.E 19b.O 19b.E \
4156 + 20a.O 20a.E 21a.O 21a.E 21b.O 21b.E 21c.O 21c.E 21d.O 21d.E \
4157 + 21e.O 21e.E 21f.O 21f.E 21g.O 21g.E 22a.O 22a.E 22b.O 22b.E \
4158 + no-file1.O no-file1.E o-no-file1.O o-no-file1.E create-empty.O \
4159 + create-empty.E neg-nls.O neg-nls.E nul-nls.O nul-nls.E \
4160 + use-nl.O use-nl.E o2.O o2.E nul-tab.O nul-tab.E mb1.O mb2.O
4161 +EXTRA_DIST = Test.pm $x-tests $(explicit) $(maint_gen) mb1.I mb1.X mb2.I mb2.X
4162 +noinst_SCRIPTS = $x-tests # $x-mb-tests
4163 TESTS_ENVIRONMENT = \
4164 PATH="`pwd`/../../src$(PATH_SEPARATOR)$$PATH"
4165
4166 editpl = sed -e 's,@''PERL''@,$(PERL),g' -e 's,@''srcdir''@,$(srcdir),g'
4167 -TESTS = $x-tests
4168 +TESTS = $x-tests $x-mb-tests
4169 mk_script = $(srcdir)/../mk-script
4170 MAINTAINERCLEANFILES = $x-tests $(maint_gen)
4171 CLEANFILES = $(run_gen)