Magellan Linux

Annotation of /trunk/coreutils/patches-5.94/coreutils-5.94-i18n.patch

Parent Directory Parent Directory | Revision Log Revision Log


Revision 115 - (hide annotations) (download)
Sun Mar 18 15:57:37 2007 UTC (17 years, 2 months ago) by niro
File size: 108739 byte(s)
patches for 5.94

1 niro 115 --- coreutils-5.93/lib/linebuffer.h.i18n 2005-05-14 08:58:06.000000000 +0100
2     +++ coreutils-5.93/lib/linebuffer.h 2005-12-23 08:53:01.000000000 +0000
3     @@ -22,6 +22,11 @@
4    
5     # include <stdio.h>
6    
7     +/* Get mbstate_t. */
8     +# if HAVE_WCHAR_H
9     +# include <wchar.h>
10     +# endif
11     +
12     /* A `struct linebuffer' holds a line of text. */
13    
14     struct linebuffer
15     @@ -29,6 +34,9 @@
16     size_t size; /* Allocated. */
17     size_t length; /* Used. */
18     char *buffer;
19     +# if HAVE_WCHAR_H
20     + mbstate_t state;
21     +# endif
22     };
23    
24     /* Initialize linebuffer LINEBUFFER for use. */
25     --- coreutils-5.93/src/cut.c.i18n 2005-08-12 08:16:25.000000000 +0100
26     +++ coreutils-5.93/src/cut.c 2005-12-23 08:53:01.000000000 +0000
27     @@ -29,6 +29,11 @@
28     #include <assert.h>
29     #include <getopt.h>
30     #include <sys/types.h>
31     +
32     +/* Get mbstate_t, mbrtowc(). */
33     +#if HAVE_WCHAR_H
34     +# include <wchar.h>
35     +#endif
36     #include "system.h"
37    
38     #include "error.h"
39     @@ -37,6 +42,18 @@
40     #include "quote.h"
41     #include "xstrndup.h"
42    
43     +/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
44     + installation; work around this configuration error. */
45     +#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
46     +# undef MB_LEN_MAX
47     +# define MB_LEN_MAX 16
48     +#endif
49     +
50     +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
51     +#if HAVE_MBRTOWC && defined mbstate_t
52     +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
53     +#endif
54     +
55     /* The official name of this program (e.g., no `g' prefix). */
56     #define PROGRAM_NAME "cut"
57    
58     @@ -67,6 +84,52 @@
59     } \
60     while (0)
61    
62     +/* Refill the buffer BUF to get a multibyte character. */
63     +#define REFILL_BUFFER(BUF, BUFPOS, BUFLEN, STREAM) \
64     + do \
65     + { \
66     + if (BUFLEN < MB_LEN_MAX && !feof (STREAM) && !ferror (STREAM)) \
67     + { \
68     + memmove (BUF, BUFPOS, BUFLEN); \
69     + BUFLEN += fread (BUF + BUFLEN, sizeof(char), BUFSIZ, STREAM); \
70     + BUFPOS = BUF; \
71     + } \
72     + } \
73     + while (0)
74     +
75     +/* Get wide character on BUFPOS. BUFPOS is not included after that.
76     + If byte sequence is not valid as a character, CONVFAIL is 1. Otherwise 0. */
77     +#define GET_NEXT_WC_FROM_BUFFER(WC, BUFPOS, BUFLEN, MBLENGTH, STATE, CONVFAIL) \
78     + do \
79     + { \
80     + mbstate_t state_bak; \
81     + \
82     + if (BUFLEN < 1) \
83     + { \
84     + WC = WEOF; \
85     + break; \
86     + } \
87     + \
88     + /* Get a wide character. */ \
89     + CONVFAIL = 0; \
90     + state_bak = STATE; \
91     + MBLENGTH = mbrtowc ((wchar_t *)&WC, BUFPOS, BUFLEN, &STATE); \
92     + \
93     + switch (MBLENGTH) \
94     + { \
95     + case (size_t)-1: \
96     + case (size_t)-2: \
97     + CONVFAIL++; \
98     + STATE = state_bak; \
99     + /* Fall througn. */ \
100     + \
101     + case 0: \
102     + MBLENGTH = 1; \
103     + break; \
104     + } \
105     + } \
106     + while (0)
107     +
108     struct range_pair
109     {
110     size_t lo;
111     @@ -85,7 +148,7 @@
112     /* The number of bytes allocated for FIELD_1_BUFFER. */
113     static size_t field_1_bufsize;
114    
115     -/* The largest field or byte index used as an endpoint of a closed
116     +/* The largest byte, character or field index used as an endpoint of a closed
117     or degenerate range specification; this doesn't include the starting
118     index of right-open-ended ranges. For example, with either range spec
119     `2-5,9-', `2-3,5,9-' this variable would be set to 5. */
120     @@ -97,10 +160,11 @@
121    
122     /* This is a bit vector.
123     In byte mode, which bytes to output.
124     + In character mode, which characters to output.
125     In field mode, which DELIM-separated fields to output.
126     - Both bytes and fields are numbered starting with 1,
127     + Bytes, characters and fields are numbered starting with 1,
128     so the zeroth bit of this array is unused.
129     - A field or byte K has been selected if
130     + A byte, character or field K has been selected if
131     (K <= MAX_RANGE_ENDPOINT and is_printable_field(K))
132     || (EOL_RANGE_START > 0 && K >= EOL_RANGE_START). */
133     static unsigned char *printable_field;
134     @@ -109,9 +173,12 @@
135     {
136     undefined_mode,
137    
138     - /* Output characters that are in the given bytes. */
139     + /* Output bytes that are at the given positions. */
140     byte_mode,
141    
142     + /* Output characters that are at the given positions. */
143     + character_mode,
144     +
145     /* Output the given delimeter-separated fields. */
146     field_mode
147     };
148     @@ -121,6 +188,13 @@
149    
150     static enum operating_mode operating_mode;
151    
152     +/* If nonzero, when in byte mode, don't split multibyte characters. */
153     +static int byte_mode_character_aware;
154     +
155     +/* If nonzero, the function for single byte locale is work
156     + if this program runs on multibyte locale. */
157     +static int force_singlebyte_mode;
158     +
159     /* If true do not output lines containing no delimeter characters.
160     Otherwise, all such lines are printed. This option is valid only
161     with field mode. */
162     @@ -132,6 +206,9 @@
163    
164     /* The delimeter character for field mode. */
165     static unsigned char delim;
166     +#if HAVE_WCHAR_H
167     +static wchar_t wcdelim;
168     +#endif
169    
170     /* True if the --output-delimiter=STRING option was specified. */
171     static bool output_delimiter_specified;
172     @@ -205,7 +282,7 @@
173     -f, --fields=LIST select only these fields; also print any line\n\
174     that contains no delimiter character, unless\n\
175     the -s option is specified\n\
176     - -n (ignored)\n\
177     + -n with -b: don't split multibyte characters\n\
178     "), stdout);
179     fputs (_("\
180     --complement complement the set of selected bytes, characters\n\
181     @@ -360,7 +437,7 @@
182     in_digits = false;
183     /* Starting a range. */
184     if (dash_found)
185     - FATAL_ERROR (_("invalid byte or field list"));
186     + FATAL_ERROR (_("invalid byte, character or field list"));
187     dash_found = true;
188     fieldstr++;
189    
190     @@ -385,14 +462,16 @@
191     if (value == 0)
192     {
193     /* `n-'. From `initial' to end of line. */
194     - eol_range_start = initial;
195     + if (eol_range_start == 0 ||
196     + (eol_range_start != 0 && eol_range_start > initial))
197     + eol_range_start = initial;
198     field_found = true;
199     }
200     else
201     {
202     /* `m-n' or `-n' (1-n). */
203     if (value < initial)
204     - FATAL_ERROR (_("invalid byte or field list"));
205     + FATAL_ERROR (_("invalid byte, character or field list"));
206    
207     /* Is there already a range going to end of line? */
208     if (eol_range_start != 0)
209     @@ -465,6 +544,9 @@
210     if (operating_mode == byte_mode)
211     error (0, 0,
212     _("byte offset %s is too large"), quote (bad_num));
213     + else if (operating_mode == character_mode)
214     + error (0, 0,
215     + _("character offset %s is too large"), quote (bad_num));
216     else
217     error (0, 0,
218     _("field number %s is too large"), quote (bad_num));
219     @@ -475,7 +557,7 @@
220     fieldstr++;
221     }
222     else
223     - FATAL_ERROR (_("invalid byte or field list"));
224     + FATAL_ERROR (_("invalid byte, character or field list"));
225     }
226    
227     max_range_endpoint = 0;
228     @@ -568,6 +650,63 @@
229     }
230     }
231    
232     +#if HAVE_MBRTOWC
233     +/* This function is in use for the following case.
234     +
235     + 1. Read from the stream STREAM, printing to standard output any selected
236     + characters.
237     +
238     + 2. Read from stream STREAM, printing to standard output any selected bytes,
239     + without splitting multibyte characters. */
240     +
241     +static void
242     +cut_characters_or_cut_bytes_no_split (FILE *stream)
243     +{
244     + int idx; /* number of bytes or characters in the line so far. */
245     + char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
246     + char *bufpos; /* Next read position of BUF. */
247     + size_t buflen; /* The length of the byte sequence in buf. */
248     + wint_t wc = 0; /* A gotten wide character. */
249     + size_t mblength; /* The byte size of a multibyte character which shows
250     + as same character as WC. */
251     + mbstate_t state; /* State of the stream. */
252     + int convfail; /* 1, when conversion is failed. Otherwise 0. */
253     +
254     + idx = 0;
255     + buflen = 0;
256     + bufpos = buf;
257     + memset (&state, '\0', sizeof(mbstate_t));
258     +
259     + while (1)
260     + {
261     + REFILL_BUFFER (buf, bufpos, buflen, stream);
262     +
263     + GET_NEXT_WC_FROM_BUFFER (wc, bufpos, buflen, mblength, state, convfail);
264     +
265     + if (wc == WEOF)
266     + {
267     + if (idx > 0)
268     + putchar ('\n');
269     + break;
270     + }
271     + else if (wc == L'\n')
272     + {
273     + putchar ('\n');
274     + idx = 0;
275     + }
276     + else
277     + {
278     + idx += (operating_mode == byte_mode) ? mblength : 1;
279     + if (print_kth (idx, NULL))
280     + fwrite (bufpos, mblength, sizeof(char), stdout);
281     + }
282     +
283     + buflen -= mblength;
284     + bufpos += mblength;
285     + }
286     +}
287     +#endif
288     +
289     /* Read from stream STREAM, printing to standard output any selected fields. */
290    
291     static void
292     @@ -689,13 +828,192 @@
293     }
294     }
295    
296     +#if HAVE_MBRTOWC
297     +static void
298     +cut_fields_mb (FILE *stream)
299     +{
300     + int c;
301     + unsigned int field_idx;
302     + int found_any_selected_field;
303     + int buffer_first_field;
304     + int empty_input;
305     + char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
306     + char *bufpos; /* Next read position of BUF. */
307     + size_t buflen; /* The length of the byte sequence in buf. */
308     + wint_t wc = 0; /* A gotten wide character. */
309     + size_t mblength; /* The byte size of a multibyte character which shows
310     + as same character as WC. */
311     + mbstate_t state; /* State of the stream. */
312     + int convfail; /* 1, when conversion is failed. Otherwise 0. */
313     +
314     + found_any_selected_field = 0;
315     + field_idx = 1;
316     + bufpos = buf;
317     + buflen = 0;
318     + memset (&state, '\0', sizeof(mbstate_t));
319     +
320     + c = getc (stream);
321     + empty_input = (c == EOF);
322     + if (c != EOF)
323     + ungetc (c, stream);
324     + else
325     + wc = WEOF;
326     +
327     + /* To support the semantics of the -s flag, we may have to buffer
328     + all of the first field to determine whether it is `delimited.'
329     + But that is unnecessary if all non-delimited lines must be printed
330     + and the first field has been selected, or if non-delimited lines
331     + must be suppressed and the first field has *not* been selected.
332     + That is because a non-delimited line has exactly one field. */
333     + buffer_first_field = (suppress_non_delimited ^ !print_kth (1, NULL));
334     +
335     + while (1)
336     + {
337     + if (field_idx == 1 && buffer_first_field)
338     + {
339     + int len = 0;
340     +
341     + while (1)
342     + {
343     + REFILL_BUFFER (buf, bufpos, buflen, stream);
344     +
345     + GET_NEXT_WC_FROM_BUFFER
346     + (wc, bufpos, buflen, mblength, state, convfail);
347     +
348     + if (wc == WEOF)
349     + break;
350     +
351     + field_1_buffer = xrealloc (field_1_buffer, len + mblength);
352     + memcpy (field_1_buffer + len, bufpos, mblength);
353     + len += mblength;
354     + buflen -= mblength;
355     + bufpos += mblength;
356     +
357     + if (!convfail && (wc == L'\n' || wc == wcdelim))
358     + break;
359     + }
360     +
361     + if (wc == WEOF)
362     + break;
363     +
364     + /* If the first field extends to the end of line (it is not
365     + delimited) and we are printing all non-delimited lines,
366     + print this one. */
367     + if (convfail || (!convfail && wc != wcdelim))
368     + {
369     + if (suppress_non_delimited)
370     + {
371     + /* Empty. */
372     + }
373     + else
374     + {
375     + fwrite (field_1_buffer, sizeof (char), len, stdout);
376     + /* Make sure the output line is newline terminated. */
377     + if (convfail || (!convfail && wc != L'\n'))
378     + putchar ('\n');
379     + }
380     + continue;
381     + }
382     +
383     + if (print_kth (1, NULL))
384     + {
385     + /* Print the field, but not the trailing delimiter. */
386     + fwrite (field_1_buffer, sizeof (char), len - 1, stdout);
387     + found_any_selected_field = 1;
388     + }
389     + ++field_idx;
390     + }
391     +
392     + if (wc != WEOF)
393     + {
394     + if (print_kth (field_idx, NULL))
395     + {
396     + if (found_any_selected_field)
397     + {
398     + fwrite (output_delimiter_string, sizeof (char),
399     + output_delimiter_length, stdout);
400     + }
401     + found_any_selected_field = 1;
402     + }
403     +
404     + while (1)
405     + {
406     + REFILL_BUFFER (buf, bufpos, buflen, stream);
407     +
408     + GET_NEXT_WC_FROM_BUFFER
409     + (wc, bufpos, buflen, mblength, state, convfail);
410     +
411     + if (wc == WEOF)
412     + break;
413     + else if (!convfail && (wc == wcdelim || wc == L'\n'))
414     + {
415     + buflen -= mblength;
416     + bufpos += mblength;
417     + break;
418     + }
419     +
420     + if (print_kth (field_idx, NULL))
421     + fwrite (bufpos, mblength, sizeof(char), stdout);
422     +
423     + buflen -= mblength;
424     + bufpos += mblength;
425     + }
426     + }
427     +
428     + if ((!convfail || wc == L'\n') && buflen < 1)
429     + wc = WEOF;
430     +
431     + if (!convfail && wc == wcdelim)
432     + ++field_idx;
433     + else if (wc == WEOF || (!convfail && wc == L'\n'))
434     + {
435     + if (found_any_selected_field
436     + || (!empty_input && !(suppress_non_delimited && field_idx == 1)))
437     + putchar ('\n');
438     + if (wc == WEOF)
439     + break;
440     + field_idx = 1;
441     + found_any_selected_field = 0;
442     + }
443     + }
444     +}
445     +#endif
446     +
447     static void
448     cut_stream (FILE *stream)
449     {
450     - if (operating_mode == byte_mode)
451     - cut_bytes (stream);
452     +#if HAVE_MBRTOWC
453     + if (MB_CUR_MAX > 1 && !force_singlebyte_mode)
454     + {
455     + switch (operating_mode)
456     + {
457     + case byte_mode:
458     + if (byte_mode_character_aware)
459     + cut_characters_or_cut_bytes_no_split (stream);
460     + else
461     + cut_bytes (stream);
462     + break;
463     +
464     + case character_mode:
465     + cut_characters_or_cut_bytes_no_split (stream);
466     + break;
467     +
468     + case field_mode:
469     + cut_fields_mb (stream);
470     + break;
471     +
472     + default:
473     + abort ();
474     + }
475     + }
476     else
477     - cut_fields (stream);
478     +#endif
479     + {
480     + if (operating_mode == field_mode)
481     + cut_fields (stream);
482     + else
483     + cut_bytes (stream);
484     + }
485     }
486    
487     /* Process file FILE to standard output.
488     @@ -745,6 +1063,8 @@
489     bool ok;
490     bool delim_specified = false;
491     char *spec_list_string IF_LINT(= NULL);
492     + char mbdelim[MB_LEN_MAX + 1];
493     + size_t delimlen = 0;
494    
495     initialize_main (&argc, &argv);
496     program_name = argv[0];
497     @@ -767,7 +1087,6 @@
498     switch (optc)
499     {
500     case 'b':
501     - case 'c':
502     /* Build the byte list. */
503     if (operating_mode != undefined_mode)
504     FATAL_ERROR (_("only one type of list may be specified"));
505     @@ -775,6 +1094,14 @@
506     spec_list_string = optarg;
507     break;
508    
509     + case 'c':
510     + /* Build the character list. */
511     + if (operating_mode != undefined_mode)
512     + FATAL_ERROR (_("only one type of list may be specified"));
513     + operating_mode = character_mode;
514     + spec_list_string = optarg;
515     + break;
516     +
517     case 'f':
518     /* Build the field list. */
519     if (operating_mode != undefined_mode)
520     @@ -786,10 +1113,35 @@
521     case 'd':
522     /* New delimiter. */
523     /* Interpret -d '' to mean `use the NUL byte as the delimiter.' */
524     - if (optarg[0] != '\0' && optarg[1] != '\0')
525     - FATAL_ERROR (_("the delimiter must be a single character"));
526     - delim = optarg[0];
527     - delim_specified = true;
528     +#if HAVE_MBRTOWC
529     + {
530     + if(MB_CUR_MAX > 1)
531     + {
532     + mbstate_t state;
533     +
534     + memset (&state, '\0', sizeof(mbstate_t));
535     + delimlen = mbrtowc (&wcdelim, optarg, strnlen(optarg, MB_LEN_MAX), &state);
536     +
537     + if (delimlen == (size_t)-1 || delimlen == (size_t)-2)
538     + ++force_singlebyte_mode;
539     + else
540     + {
541     + delimlen = (delimlen < 1) ? 1 : delimlen;
542     + if (wcdelim != L'\0' && *(optarg + delimlen) != '\0')
543     + FATAL_ERROR (_("the delimiter must be a single character"));
544     + memcpy (mbdelim, optarg, delimlen);
545     + }
546     + }
547     +
548     + if (MB_CUR_MAX <= 1 || force_singlebyte_mode)
549     +#endif
550     + {
551     + if (optarg[0] != '\0' && optarg[1] != '\0')
552     + FATAL_ERROR (_("the delimiter must be a single character"));
553     + delim = (unsigned char) optarg[0];
554     + }
555     + delim_specified = true;
556     + }
557     break;
558    
559     case OUTPUT_DELIMITER_OPTION:
560     @@ -802,6 +1154,7 @@
561     break;
562    
563     case 'n':
564     + byte_mode_character_aware = 1;
565     break;
566    
567     case 's':
568     @@ -824,7 +1177,7 @@
569     if (operating_mode == undefined_mode)
570     FATAL_ERROR (_("you must specify a list of bytes, characters, or fields"));
571    
572     - if (delim != '\0' && operating_mode != field_mode)
573     + if (delim_specified && operating_mode != field_mode)
574     FATAL_ERROR (_("an input delimiter may be specified only\
575     when operating on fields"));
576    
577     @@ -851,15 +1204,34 @@
578     }
579    
580     if (!delim_specified)
581     - delim = '\t';
582     + {
583     + delim = '\t';
584     +#ifdef HAVE_MBRTOWC
585     + wcdelim = L'\t';
586     + mbdelim[0] = '\t';
587     + mbdelim[1] = '\0';
588     + delimlen = 1;
589     +#endif
590     + }
591    
592     if (output_delimiter_string == NULL)
593     {
594     - static char dummy[2];
595     - dummy[0] = delim;
596     - dummy[1] = '\0';
597     - output_delimiter_string = dummy;
598     - output_delimiter_length = 1;
599     +#ifdef HAVE_MBRTOWC
600     + if (MB_CUR_MAX > 1 && !force_singlebyte_mode)
601     + {
602     + output_delimiter_string = xstrdup(mbdelim);
603     + output_delimiter_length = delimlen;
604     + }
605     +
606     + if (MB_CUR_MAX <= 1 || force_singlebyte_mode)
607     +#endif
608     + {
609     + static char dummy[2];
610     + dummy[0] = delim;
611     + dummy[1] = '\0';
612     + output_delimiter_string = dummy;
613     + output_delimiter_length = 1;
614     + }
615     }
616    
617     if (optind == argc)
618     --- coreutils-5.93/src/pr.c.i18n 2005-09-16 08:50:33.000000000 +0100
619     +++ coreutils-5.93/src/pr.c 2005-12-23 08:53:01.000000000 +0000
620     @@ -313,6 +313,32 @@
621    
622     #include <getopt.h>
623     #include <sys/types.h>
624     +
625     +/* Get MB_LEN_MAX. */
626     +#include <limits.h>
627     +/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
628     + installation; work around this configuration error. */
629     +#if !defined MB_LEN_MAX || MB_LEN_MAX == 1
630     +# define MB_LEN_MAX 16
631     +#endif
632     +
633     +/* Get MB_CUR_MAX. */
634     +#include <stdlib.h>
635     +
636     +/* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>. */
637     +/* Get mbstate_t, mbrtowc(), wcwidth(). */
638     +#if HAVE_WCHAR_H
639     +# include <wchar.h>
640     +#endif
641     +
642     +/* Get iswprint(). -- for wcwidth(). */
643     +#if HAVE_WCTYPE_H
644     +# include <wctype.h>
645     +#endif
646     +#if !defined iswprint && !HAVE_ISWPRINT
647     +# define iswprint(wc) 1
648     +#endif
649     +
650     #include "system.h"
651     #include "error.h"
652     #include "hard-locale.h"
653     @@ -324,6 +350,18 @@
654     #include "strftime.h"
655     #include "xstrtol.h"
656    
657     +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
658     +#if HAVE_MBRTOWC && defined mbstate_t
659     +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
660     +#endif
661     +
662     +#ifndef HAVE_DECL_WCWIDTH
663     +"this configure-time declaration test was not run"
664     +#endif
665     +#if !HAVE_DECL_WCWIDTH
666     +extern int wcwidth ();
667     +#endif
668     +
669     /* The official name of this program (e.g., no `g' prefix). */
670     #define PROGRAM_NAME "pr"
671    
672     @@ -416,7 +454,20 @@
673    
674     #define NULLCOL (COLUMN *)0
675    
676     -static int char_to_clump (char c);
677     +/* Funtion pointers to switch functions for single byte locale or for
678     + multibyte locale. If multibyte functions do not exist in your sysytem,
679     + these pointers always point the function for single byte locale. */
680     +static void (*print_char) (char c);
681     +static int (*char_to_clump) (char c);
682     +
683     +/* Functions for single byte locale. */
684     +static void print_char_single (char c);
685     +static int char_to_clump_single (char c);
686     +
687     +/* Functions for multibyte locale. */
688     +static void print_char_multi (char c);
689     +static int char_to_clump_multi (char c);
690     +
691     static bool read_line (COLUMN *p);
692     static bool print_page (void);
693     static bool print_stored (COLUMN *p);
694     @@ -426,6 +477,7 @@
695     static void pad_across_to (int position);
696     static void add_line_number (COLUMN *p);
697     static void getoptarg (char *arg, char switch_char, char *character,
698     + int *character_length, int *character_width,
699     int *number);
700     void usage (int status);
701     static void print_files (int number_of_files, char **av);
702     @@ -440,7 +492,6 @@
703     static void pad_down (int lines);
704     static void read_rest_of_line (COLUMN *p);
705     static void skip_read (COLUMN *p, int column_number);
706     -static void print_char (char c);
707     static void cleanup (void);
708     static void print_sep_string (void);
709     static void separator_string (const char *optarg_S);
710     @@ -455,7 +506,7 @@
711     we store the leftmost columns contiguously in buff.
712     To print a line from buff, get the index of the first character
713     from line_vector[i], and print up to line_vector[i + 1]. */
714     -static char *buff;
715     +static unsigned char *buff;
716    
717     /* Index of the position in buff where the next character
718     will be stored. */
719     @@ -559,7 +610,7 @@
720     static bool untabify_input = false;
721    
722     /* (-e) The input tab character. */
723     -static char input_tab_char = '\t';
724     +static char input_tab_char[MB_LEN_MAX] = "\t";
725    
726     /* (-e) Tabstops are at chars_per_tab, 2*chars_per_tab, 3*chars_per_tab, ...
727     where the leftmost column is 1. */
728     @@ -569,7 +620,10 @@
729     static bool tabify_output = false;
730    
731     /* (-i) The output tab character. */
732     -static char output_tab_char = '\t';
733     +static char output_tab_char[MB_LEN_MAX] = "\t";
734     +
735     +/* (-i) The byte length of output tab character. */
736     +static int output_tab_char_length = 1;
737    
738     /* (-i) The width of the output tab. */
739     static int chars_per_output_tab = 8;
740     @@ -643,7 +697,13 @@
741     static bool numbered_lines = false;
742    
743     /* (-n) Character which follows each line number. */
744     -static char number_separator = '\t';
745     +static char number_separator[MB_LEN_MAX] = "\t";
746     +
747     +/* (-n) The byte length of the character which follows each line number. */
748     +static int number_separator_length = 1;
749     +
750     +/* (-n) The character width of the character which follows each line number. */
751     +static int number_separator_width = 0;
752    
753     /* (-n) line counting starts with 1st line of input file (not with 1st
754     line of 1st page printed). */
755     @@ -696,6 +756,7 @@
756     -a|COLUMN|-m is a `space' and with the -J option a `tab'. */
757     static char *col_sep_string = "";
758     static int col_sep_length = 0;
759     +static int col_sep_width = 0;
760     static char *column_separator = " ";
761     static char *line_separator = "\t";
762    
763     @@ -852,6 +913,13 @@
764     col_sep_length = (int) strlen (optarg_S);
765     col_sep_string = xmalloc (col_sep_length + 1);
766     strcpy (col_sep_string, optarg_S);
767     +
768     +#if HAVE_MBRTOWC
769     + if (MB_CUR_MAX > 1)
770     + col_sep_width = mbswidth (col_sep_string, 0);
771     + else
772     +#endif
773     + col_sep_width = col_sep_length;
774     }
775    
776     int
777     @@ -877,6 +945,21 @@
778    
779     atexit (close_stdout);
780    
781     +/* Define which functions are used, the ones for single byte locale or the ones
782     + for multibyte locale. */
783     +#if HAVE_MBRTOWC
784     + if (MB_CUR_MAX > 1)
785     + {
786     + print_char = print_char_multi;
787     + char_to_clump = char_to_clump_multi;
788     + }
789     + else
790     +#endif
791     + {
792     + print_char = print_char_single;
793     + char_to_clump = char_to_clump_single;
794     + }
795     +
796     n_files = 0;
797     file_names = (argc > 1
798     ? xmalloc ((argc - 1) * sizeof (char *))
799     @@ -949,8 +1032,12 @@
800     break;
801     case 'e':
802     if (optarg)
803     - getoptarg (optarg, 'e', &input_tab_char,
804     - &chars_per_input_tab);
805     + {
806     + int dummy_length, dummy_width;
807     +
808     + getoptarg (optarg, 'e', input_tab_char, &dummy_length,
809     + &dummy_width, &chars_per_input_tab);
810     + }
811     /* Could check tab width > 0. */
812     untabify_input = true;
813     break;
814     @@ -963,8 +1050,12 @@
815     break;
816     case 'i':
817     if (optarg)
818     - getoptarg (optarg, 'i', &output_tab_char,
819     - &chars_per_output_tab);
820     + {
821     + int dummy_width;
822     +
823     + getoptarg (optarg, 'i', output_tab_char, &output_tab_char_length,
824     + &dummy_width, &chars_per_output_tab);
825     + }
826     /* Could check tab width > 0. */
827     tabify_output = true;
828     break;
829     @@ -991,8 +1082,8 @@
830     case 'n':
831     numbered_lines = true;
832     if (optarg)
833     - getoptarg (optarg, 'n', &number_separator,
834     - &chars_per_number);
835     + getoptarg (optarg, 'n', number_separator, &number_separator_length,
836     + &number_separator_width, &chars_per_number);
837     break;
838     case 'N':
839     skip_count = false;
840     @@ -1031,7 +1122,7 @@
841     old_s = false;
842     /* Reset an additional input of -s, -S dominates -s */
843     col_sep_string = "";
844     - col_sep_length = 0;
845     + col_sep_length = col_sep_width = 0;
846     use_col_separator = true;
847     if (optarg)
848     separator_string (optarg);
849     @@ -1188,10 +1279,45 @@
850     a number. */
851    
852     static void
853     -getoptarg (char *arg, char switch_char, char *character, int *number)
854     +getoptarg (char *arg, char switch_char, char *character, int *character_length,
855     + int *character_width, int *number)
856     {
857     if (!ISDIGIT (*arg))
858     - *character = *arg++;
859     + {
860     +#ifdef HAVE_MBRTOWC
861     + if (MB_CUR_MAX > 1) /* for multibyte locale. */
862     + {
863     + wchar_t wc;
864     + size_t mblength;
865     + int width;
866     + mbstate_t state = {'\0'};
867     +
868     + mblength = mbrtowc (&wc, arg, strnlen(arg, MB_LEN_MAX), &state);
869     +
870     + if (mblength == (size_t)-1 || mblength == (size_t)-2)
871     + {
872     + *character_length = 1;
873     + *character_width = 1;
874     + }
875     + else
876     + {
877     + *character_length = (mblength < 1) ? 1 : mblength;
878     + width = wcwidth (wc);
879     + *character_width = (width < 0) ? 0 : width;
880     + }
881     +
882     + strncpy (character, arg, *character_length);
883     + arg += *character_length;
884     + }
885     + else /* for single byte locale. */
886     +#endif
887     + {
888     + *character = *arg++;
889     + *character_length = 1;
890     + *character_width = 1;
891     + }
892     + }
893     +
894     if (*arg)
895     {
896     long int tmp_long;
897     @@ -1256,7 +1382,7 @@
898     else
899     col_sep_string = column_separator;
900    
901     - col_sep_length = 1;
902     + col_sep_length = col_sep_width = 1;
903     use_col_separator = true;
904     }
905     /* It's rather pointless to define a TAB separator with column
906     @@ -1288,11 +1414,11 @@
907     TAB_WIDTH (chars_per_input_tab, chars_per_number); */
908    
909     /* Estimate chars_per_text without any margin and keep it constant. */
910     - if (number_separator == '\t')
911     + if (number_separator[0] == '\t')
912     number_width = chars_per_number +
913     TAB_WIDTH (chars_per_default_tab, chars_per_number);
914     else
915     - number_width = chars_per_number + 1;
916     + number_width = chars_per_number + number_separator_width;
917    
918     /* The number is part of the column width unless we are
919     printing files in parallel. */
920     @@ -1307,7 +1433,7 @@
921     }
922    
923     chars_per_column = (chars_per_line - chars_used_by_number -
924     - (columns - 1) * col_sep_length) / columns;
925     + (columns - 1) * col_sep_width) / columns;
926    
927     if (chars_per_column < 1)
928     error (EXIT_FAILURE, 0, _("page width too narrow"));
929     @@ -1432,7 +1558,7 @@
930    
931     /* Enlarge p->start_position of first column to use the same form of
932     padding_not_printed with all columns. */
933     - h = h + col_sep_length;
934     + h = h + col_sep_width;
935    
936     /* This loop takes care of all but the rightmost column. */
937    
938     @@ -1466,7 +1592,7 @@
939     }
940     else
941     {
942     - h = h_next + col_sep_length;
943     + h = h_next + col_sep_width;
944     h_next = h + chars_per_column;
945     }
946     }
947     @@ -1756,9 +1882,9 @@
948     align_column (COLUMN *p)
949     {
950     padding_not_printed = p->start_position;
951     - if (padding_not_printed - col_sep_length > 0)
952     + if (padding_not_printed - col_sep_width > 0)
953     {
954     - pad_across_to (padding_not_printed - col_sep_length);
955     + pad_across_to (padding_not_printed - col_sep_width);
956     padding_not_printed = ANYWHERE;
957     }
958    
959     @@ -2029,13 +2155,13 @@
960     /* May be too generous. */
961     buff = X2REALLOC (buff, &buff_allocated);
962     }
963     - buff[buff_current++] = c;
964     + buff[buff_current++] = (unsigned char) c;
965     }
966    
967     static void
968     add_line_number (COLUMN *p)
969     {
970     - int i;
971     + int i, j;
972     char *s;
973     int left_cut;
974    
975     @@ -2058,22 +2184,24 @@
976     /* Tabification is assumed for multiple columns, also for n-separators,
977     but `default n-separator = TAB' hasn't been given priority over
978     equal column_width also specified by POSIX. */
979     - if (number_separator == '\t')
980     + if (number_separator[0] == '\t')
981     {
982     i = number_width - chars_per_number;
983     while (i-- > 0)
984     (p->char_func) (' ');
985     }
986     else
987     - (p->char_func) (number_separator);
988     + for (j = 0; j < number_separator_length; j++)
989     + (p->char_func) (number_separator[j]);
990     }
991     else
992     /* To comply with POSIX, we avoid any expansion of default TAB
993     separator with a single column output. No column_width requirement
994     has to be considered. */
995     {
996     - (p->char_func) (number_separator);
997     - if (number_separator == '\t')
998     + for (j = 0; j < number_separator_length; j++)
999     + (p->char_func) (number_separator[j]);
1000     + if (number_separator[0] == '\t')
1001     output_position = POS_AFTER_TAB (chars_per_output_tab,
1002     output_position);
1003     }
1004     @@ -2234,7 +2362,7 @@
1005     while (goal - h_old > 1
1006     && (h_new = POS_AFTER_TAB (chars_per_output_tab, h_old)) <= goal)
1007     {
1008     - putchar (output_tab_char);
1009     + fwrite (output_tab_char, sizeof(char), output_tab_char_length, stdout);
1010     h_old = h_new;
1011     }
1012     while (++h_old <= goal)
1013     @@ -2254,6 +2382,7 @@
1014     {
1015     char *s;
1016     int l = col_sep_length;
1017     + int not_space_flag;
1018    
1019     s = col_sep_string;
1020    
1021     @@ -2267,6 +2396,7 @@
1022     {
1023     for (; separators_not_printed > 0; --separators_not_printed)
1024     {
1025     + not_space_flag = 0;
1026     while (l-- > 0)
1027     {
1028     /* 3 types of sep_strings: spaces only, spaces and chars,
1029     @@ -2280,12 +2410,15 @@
1030     }
1031     else
1032     {
1033     + not_space_flag = 1;
1034     if (spaces_not_printed > 0)
1035     print_white_space ();
1036     putchar (*s++);
1037     - ++output_position;
1038     }
1039     }
1040     + if (not_space_flag)
1041     + output_position += col_sep_width;
1042     +
1043     /* sep_string ends with some spaces */
1044     if (spaces_not_printed > 0)
1045     print_white_space ();
1046     @@ -2313,7 +2446,7 @@
1047     required number of tabs and spaces. */
1048    
1049     static void
1050     -print_char (char c)
1051     +print_char_single (char c)
1052     {
1053     if (tabify_output)
1054     {
1055     @@ -2337,6 +2470,74 @@
1056     putchar (c);
1057     }
1058    
1059     +#ifdef HAVE_MBRTOWC
1060     +static void
1061     +print_char_multi (char c)
1062     +{
1063     + static size_t mbc_pos = 0;
1064     + static unsigned char mbc[MB_LEN_MAX] = {'\0'};
1065     + static mbstate_t state = {'\0'};
1066     + mbstate_t state_bak;
1067     + wchar_t wc;
1068     + size_t mblength;
1069     + int width;
1070     +
1071     + if (tabify_output)
1072     + {
1073     + state_bak = state;
1074     + mbc[mbc_pos++] = (unsigned char)c;
1075     + mblength = mbrtowc (&wc, mbc, mbc_pos, &state);
1076     +
1077     + while (mbc_pos > 0)
1078     + {
1079     + switch (mblength)
1080     + {
1081     + case (size_t)-2:
1082     + state = state_bak;
1083     + return;
1084     +
1085     + case (size_t)-1:
1086     + state = state_bak;
1087     + ++output_position;
1088     + putchar (mbc[0]);
1089     + memmove (mbc, mbc + 1, MB_CUR_MAX - 1);
1090     + --mbc_pos;
1091     + break;
1092     +
1093     + case 0:
1094     + mblength = 1;
1095     +
1096     + default:
1097     + if (wc == L' ')
1098     + {
1099     + memmove (mbc, mbc + mblength, MB_CUR_MAX - mblength);
1100     + --mbc_pos;
1101     + ++spaces_not_printed;
1102     + return;
1103     + }
1104     + else if (spaces_not_printed > 0)
1105     + print_white_space ();
1106     +
1107     + /* Nonprintables are assumed to have width 0, except L'\b'. */
1108     + if ((width = wcwidth (wc)) < 1)
1109     + {
1110     + if (wc == L'\b')
1111     + --output_position;
1112     + }
1113     + else
1114     + output_position += width;
1115     +
1116     + fwrite (mbc, sizeof(char), mblength, stdout);
1117     + memmove (mbc, mbc + mblength, MB_CUR_MAX - mblength);
1118     + mbc_pos -= mblength;
1119     + }
1120     + }
1121     + return;
1122     + }
1123     + putchar (c);
1124     +}
1125     +#endif
1126     +
1127     /* Skip to page PAGE before printing.
1128     PAGE may be larger than total number of pages. */
1129    
1130     @@ -2517,9 +2718,9 @@
1131     align_empty_cols = false;
1132     }
1133    
1134     - if (padding_not_printed - col_sep_length > 0)
1135     + if (padding_not_printed - col_sep_width > 0)
1136     {
1137     - pad_across_to (padding_not_printed - col_sep_length);
1138     + pad_across_to (padding_not_printed - col_sep_width);
1139     padding_not_printed = ANYWHERE;
1140     }
1141    
1142     @@ -2620,9 +2821,9 @@
1143     }
1144     }
1145    
1146     - if (padding_not_printed - col_sep_length > 0)
1147     + if (padding_not_printed - col_sep_width > 0)
1148     {
1149     - pad_across_to (padding_not_printed - col_sep_length);
1150     + pad_across_to (padding_not_printed - col_sep_width);
1151     padding_not_printed = ANYWHERE;
1152     }
1153    
1154     @@ -2635,8 +2836,8 @@
1155     if (spaces_not_printed == 0)
1156     {
1157     output_position = p->start_position + end_vector[line];
1158     - if (p->start_position - col_sep_length == chars_per_margin)
1159     - output_position -= col_sep_length;
1160     + if (p->start_position - col_sep_width == chars_per_margin)
1161     + output_position -= col_sep_width;
1162     }
1163    
1164     return true;
1165     @@ -2655,7 +2856,7 @@
1166     number of characters is 1.) */
1167    
1168     static int
1169     -char_to_clump (char c)
1170     +char_to_clump_single (char c)
1171     {
1172     unsigned char uc = c;
1173     char *s = clump_buff;
1174     @@ -2665,10 +2866,10 @@
1175     int chars;
1176     int chars_per_c = 8;
1177    
1178     - if (c == input_tab_char)
1179     + if (c == input_tab_char[0])
1180     chars_per_c = chars_per_input_tab;
1181    
1182     - if (c == input_tab_char || c == '\t')
1183     + if (c == input_tab_char[0] || c == '\t')
1184     {
1185     width = TAB_WIDTH (chars_per_c, input_position);
1186    
1187     @@ -2739,6 +2940,154 @@
1188     return chars;
1189     }
1190    
1191     +#ifdef HAVE_MBRTOWC
1192     +static int
1193     +char_to_clump_multi (char c)
1194     +{
1195     + static size_t mbc_pos = 0;
1196     + static char mbc[MB_LEN_MAX] = {'\0'};
1197     + static mbstate_t state = {'\0'};
1198     + mbstate_t state_bak;
1199     + wchar_t wc;
1200     + size_t mblength;
1201     + int wc_width;
1202     + register int *s = clump_buff;
1203     + register int i, j;
1204     + char esc_buff[4];
1205     + int width;
1206     + int chars;
1207     + int chars_per_c = 8;
1208     +
1209     + state_bak = state;
1210     + mbc[mbc_pos++] = c;
1211     + mblength = mbrtowc (&wc, mbc, mbc_pos, &state);
1212     +
1213     + width = 0;
1214     + chars = 0;
1215     + while (mbc_pos > 0)
1216     + {
1217     + switch (mblength)
1218     + {
1219     + case (size_t)-2:
1220     + state = state_bak;
1221     + return 0;
1222     +
1223     + case (size_t)-1:
1224     + state = state_bak;
1225     + mblength = 1;
1226     +
1227     + if (use_esc_sequence || use_cntrl_prefix)
1228     + {
1229     + width = +4;
1230     + chars = +4;
1231     + *s++ = '\\';
1232     + sprintf (esc_buff, "%03o", mbc[0]);
1233     + for (i = 0; i <= 2; ++i)
1234     + *s++ = (int) esc_buff[i];
1235     + }
1236     + else
1237     + {
1238     + width += 1;
1239     + chars += 1;
1240     + *s++ = mbc[0];
1241     + }
1242     + break;
1243     +
1244     + case 0:
1245     + mblength = 1;
1246     + /* Fall through */
1247     +
1248     + default:
1249     + if (memcmp (mbc, input_tab_char, mblength) == 0)
1250     + chars_per_c = chars_per_input_tab;
1251     +
1252     + if (memcmp (mbc, input_tab_char, mblength) == 0 || c == '\t')
1253     + {
1254     + int width_inc;
1255     +
1256     + width_inc = TAB_WIDTH (chars_per_c, input_position);
1257     + width += width_inc;
1258     +
1259     + if (untabify_input)
1260     + {
1261     + for (i = width_inc; i; --i)
1262     + *s++ = ' ';
1263     + chars += width_inc;
1264     + }
1265     + else
1266     + {
1267     + for (i = 0; i < mblength; i++)
1268     + *s++ = mbc[i];
1269     + chars += mblength;
1270     + }
1271     + }
1272     + else if ((wc_width = wcwidth (wc)) < 1)
1273     + {
1274     + if (use_esc_sequence)
1275     + {
1276     + for (i = 0; i < mblength; i++)
1277     + {
1278     + width += 4;
1279     + chars += 4;
1280     + *s++ = '\\';
1281     + sprintf (esc_buff, "%03o", c);
1282     + for (j = 0; j <= 2; ++j)
1283     + *s++ = (int) esc_buff[j];
1284     + }
1285     + }
1286     + else if (use_cntrl_prefix)
1287     + {
1288     + if (wc < 0200)
1289     + {
1290     + width += 2;
1291     + chars += 2;
1292     + *s++ = '^';
1293     + *s++ = wc ^ 0100;
1294     + }
1295     + else
1296     + {
1297     + for (i = 0; i < mblength; i++)
1298     + {
1299     + width += 4;
1300     + chars += 4;
1301     + *s++ = '\\';
1302     + sprintf (esc_buff, "%03o", c);
1303     + for (j = 0; j <= 2; ++j)
1304     + *s++ = (int) esc_buff[j];
1305     + }
1306     + }
1307     + }
1308     + else if (wc == L'\b')
1309     + {
1310     + width += -1;
1311     + chars += 1;
1312     + *s++ = c;
1313     + }
1314     + else
1315     + {
1316     + width += 0;
1317     + chars += mblength;
1318     + for (i = 0; i < mblength; i++)
1319     + *s++ = mbc[i];
1320     + }
1321     + }
1322     + else
1323     + {
1324     + width += wc_width;
1325     + chars += mblength;
1326     + for (i = 0; i < mblength; i++)
1327     + *s++ = mbc[i];
1328     + }
1329     + }
1330     + memmove (mbc, mbc + mblength, MB_CUR_MAX - mblength);
1331     + mbc_pos -= mblength;
1332     + }
1333     +
1334     + input_position += width;
1335     + return chars;
1336     +}
1337     +#endif
1338     +
1339     /* We've just printed some files and need to clean up things before
1340     looking for more options and printing the next batch of files.
1341    
1342     --- coreutils-5.93/src/uniq.c.i18n 2005-07-05 07:32:54.000000000 +0100
1343     +++ coreutils-5.93/src/uniq.c 2005-12-23 08:53:01.000000000 +0000
1344     @@ -23,6 +23,16 @@
1345     #include <getopt.h>
1346     #include <sys/types.h>
1347    
1348     +/* Get mbstate_t, mbrtowc(). */
1349     +#if HAVE_WCHAR_H
1350     +# include <wchar.h>
1351     +#endif
1352     +
1353     +/* Get isw* functions. */
1354     +#if HAVE_WCTYPE_H
1355     +# include <wctype.h>
1356     +#endif
1357     +
1358     #include "system.h"
1359     #include "argmatch.h"
1360     #include "linebuffer.h"
1361     @@ -32,7 +42,19 @@
1362     #include "quote.h"
1363     #include "xmemcoll.h"
1364     #include "xstrtol.h"
1365     -#include "memcasecmp.h"
1366     +#include "xmemcoll.h"
1367     +
1368     +/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
1369     + installation; work around this configuration error. */
1370     +#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
1371     +# define MB_LEN_MAX 16
1372     +#endif
1373     +
1374     +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
1375     +#if HAVE_MBRTOWC && defined mbstate_t
1376     +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
1377     +#endif
1378     +
1379    
1380     /* The official name of this program (e.g., no `g' prefix). */
1381     #define PROGRAM_NAME "uniq"
1382     @@ -109,6 +131,10 @@
1383     /* Select whether/how to delimit groups of duplicate lines. */
1384     static enum delimit_method delimit_groups;
1385    
1386     +/* Function pointers. */
1387     +static char *
1388     +(*find_field) (struct linebuffer *line);
1389     +
1390     static struct option const longopts[] =
1391     {
1392     {"count", no_argument, NULL, 'c'},
1393     @@ -189,7 +215,7 @@
1394     return a pointer to the beginning of the line's field to be compared. */
1395    
1396     static char *
1397     -find_field (const struct linebuffer *line)
1398     +find_field_uni (struct linebuffer *line)
1399     {
1400     size_t count;
1401     char *lp = line->buffer;
1402     @@ -210,6 +236,83 @@
1403     return lp + i;
1404     }
1405    
1406     +#if HAVE_MBRTOWC
1407     +
1408     +# define MBCHAR_TO_WCHAR(WC, MBLENGTH, LP, POS, SIZE, STATEP, CONVFAIL) \
1409     + do \
1410     + { \
1411     + mbstate_t state_bak; \
1412     + \
1413     + CONVFAIL = 0; \
1414     + state_bak = *STATEP; \
1415     + \
1416     + MBLENGTH = mbrtowc (&WC, LP + POS, SIZE - POS, STATEP); \
1417     + \
1418     + switch (MBLENGTH) \
1419     + { \
1420     + case (size_t)-2: \
1421     + case (size_t)-1: \
1422     + *STATEP = state_bak; \
1423     + CONVFAIL++; \
1424     + /* Fall through */ \
1425     + case 0: \
1426     + MBLENGTH = 1; \
1427     + } \
1428     + } \
1429     + while (0)
1430     +
1431     +static char *
1432     +find_field_multi (struct linebuffer *line)
1433     +{
1434     + size_t count;
1435     + char *lp = line->buffer;
1436     + size_t size = line->length - 1;
1437     + size_t pos;
1438     + size_t mblength;
1439     + wchar_t wc;
1440     + mbstate_t *statep;
1441     + int convfail;
1442     +
1443     + pos = 0;
1444     + statep = &(line->state);
1445     +
1446     + /* skip fields. */
1447     + for (count = 0; count < skip_fields && pos < size; count++)
1448     + {
1449     + while (pos < size)
1450     + {
1451     + MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail);
1452     +
1453     + if (convfail || !iswblank (wc))
1454     + {
1455     + pos += mblength;
1456     + break;
1457     + }
1458     + pos += mblength;
1459     + }
1460     +
1461     + while (pos < size)
1462     + {
1463     + MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail);
1464     +
1465     + if (!convfail && iswblank (wc))
1466     + break;
1467     +
1468     + pos += mblength;
1469     + }
1470     + }
1471     +
1472     + /* skip fields. */
1473     + for (count = 0; count < skip_chars && pos < size; count++)
1474     + {
1475     + MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail);
1476     + pos += mblength;
1477     + }
1478     +
1479     + return lp + pos;
1480     +}
1481     +#endif
1482     +
1483     /* Return false if two strings OLD and NEW match, true if not.
1484     OLD and NEW point not to the beginnings of the lines
1485     but rather to the beginnings of the fields to compare.
1486     @@ -218,6 +321,8 @@
1487     static bool
1488     different (char *old, char *new, size_t oldlen, size_t newlen)
1489     {
1490     + char *copy_old, *copy_new;
1491     +
1492     if (check_chars < oldlen)
1493     oldlen = check_chars;
1494     if (check_chars < newlen)
1495     @@ -225,14 +330,92 @@
1496    
1497     if (ignore_case)
1498     {
1499     - /* FIXME: This should invoke strcoll somehow. */
1500     - return oldlen != newlen || memcasecmp (old, new, oldlen);
1501     + size_t i;
1502     +
1503     + copy_old = alloca (oldlen + 1);
1504     + copy_new = alloca (oldlen + 1);
1505     +
1506     + for (i = 0; i < oldlen; i++)
1507     + {
1508     + copy_old[i] = toupper (old[i]);
1509     + copy_new[i] = toupper (new[i]);
1510     + }
1511     }
1512     - else if (hard_LC_COLLATE)
1513     - return xmemcoll (old, oldlen, new, newlen) != 0;
1514     else
1515     - return oldlen != newlen || memcmp (old, new, oldlen);
1516     + {
1517     + copy_old = (char *)old;
1518     + copy_new = (char *)new;
1519     + }
1520     +
1521     + return xmemcoll (copy_old, oldlen, copy_new, newlen);
1522     +}
1523     +
1524     +#if HAVE_MBRTOWC
1525     +static int
1526     +different_multi (const char *old, const char *new, size_t oldlen, size_t newlen, mbstate_t oldstate, mbstate_t newstate)
1527     +{
1528     + size_t i, j, chars;
1529     + const char *str[2];
1530     + char *copy[2];
1531     + size_t len[2];
1532     + mbstate_t state[2];
1533     + size_t mblength;
1534     + wchar_t wc, uwc;
1535     + mbstate_t state_bak;
1536     +
1537     + str[0] = old;
1538     + str[1] = new;
1539     + len[0] = oldlen;
1540     + len[1] = newlen;
1541     + state[0] = oldstate;
1542     + state[1] = newstate;
1543     +
1544     + for (i = 0; i < 2; i++)
1545     + {
1546     + copy[i] = alloca (len[i] + 1);
1547     +
1548     + for (j = 0, chars = 0; j < len[i] && chars < check_chars; chars++)
1549     + {
1550     + state_bak = state[i];
1551     + mblength = mbrtowc (&wc, str[i] + j, len[i] - j, &(state[i]));
1552     +
1553     + switch (mblength)
1554     + {
1555     + case (size_t)-1:
1556     + case (size_t)-2:
1557     + state[i] = state_bak;
1558     + /* Fall through */
1559     + case 0:
1560     + mblength = 1;
1561     + break;
1562     +
1563     + default:
1564     + if (ignore_case)
1565     + {
1566     + uwc = towupper (wc);
1567     +
1568     + if (uwc != wc)
1569     + {
1570     + mbstate_t state_wc;
1571     +
1572     + memset (&state_wc, '\0', sizeof(mbstate_t));
1573     + wcrtomb (copy[i] + j, uwc, &state_wc);
1574     + }
1575     + else
1576     + memcpy (copy[i] + j, str[i] + j, mblength);
1577     + }
1578     + else
1579     + memcpy (copy[i] + j, str[i] + j, mblength);
1580     + }
1581     + j += mblength;
1582     + }
1583     + copy[i][j] = '\0';
1584     + len[i] = j;
1585     + }
1586     +
1587     + return xmemcoll (copy[0], len[0], copy[1], len[1]);
1588     }
1589     +#endif
1590    
1591     /* Output the line in linebuffer LINE to standard output
1592     provided that the switches say it should be output.
1593     @@ -286,15 +469,43 @@
1594     {
1595     char *prevfield IF_LINT (= NULL);
1596     size_t prevlen IF_LINT (= 0);
1597     +#if HAVE_MBRTOWC
1598     + mbstate_t prevstate;
1599     +
1600     + memset (&prevstate, '\0', sizeof (mbstate_t));
1601     +#endif
1602    
1603     while (!feof (stdin))
1604     {
1605     char *thisfield;
1606     size_t thislen;
1607     +#if HAVE_MBRTOWC
1608     + mbstate_t thisstate;
1609     +#endif
1610     +
1611     if (readlinebuffer (thisline, stdin) == 0)
1612     break;
1613     thisfield = find_field (thisline);
1614     thislen = thisline->length - 1 - (thisfield - thisline->buffer);
1615     +#if HAVE_MBRTOWC
1616     + if (MB_CUR_MAX > 1)
1617     + {
1618     + thisstate = thisline->state;
1619     +
1620     + if (prevline->length == 0 || different_multi
1621     + (thisfield, prevfield, thislen, prevlen, thisstate, prevstate))
1622     + {
1623     + fwrite (thisline->buffer, sizeof (char),
1624     + thisline->length, stdout);
1625     +
1626     + SWAP_LINES (prevline, thisline);
1627     + prevfield = thisfield;
1628     + prevlen = thislen;
1629     + prevstate = thisstate;
1630     + }
1631     + }
1632     + else
1633     +#endif
1634     if (prevline->length == 0
1635     || different (thisfield, prevfield, thislen, prevlen))
1636     {
1637     @@ -313,17 +524,26 @@
1638     size_t prevlen;
1639     uintmax_t match_count = 0;
1640     bool first_delimiter = true;
1641     +#if HAVE_MBRTOWC
1642     + mbstate_t prevstate;
1643     +#endif
1644    
1645     if (readlinebuffer (prevline, stdin) == 0)
1646     goto closefiles;
1647     prevfield = find_field (prevline);
1648     prevlen = prevline->length - 1 - (prevfield - prevline->buffer);
1649     +#if HAVE_MBRTOWC
1650     + prevstate = prevline->state;
1651     +#endif
1652    
1653     while (!feof (stdin))
1654     {
1655     bool match;
1656     char *thisfield;
1657     size_t thislen;
1658     +#if HAVE_MBRTOWC
1659     + mbstate_t thisstate;
1660     +#endif
1661     if (readlinebuffer (thisline, stdin) == 0)
1662     {
1663     if (ferror (stdin))
1664     @@ -332,6 +552,15 @@
1665     }
1666     thisfield = find_field (thisline);
1667     thislen = thisline->length - 1 - (thisfield - thisline->buffer);
1668     +#if HAVE_MBRTOWC
1669     + if (MB_CUR_MAX > 1)
1670     + {
1671     + thisstate = thisline->state;
1672     + match = !different_multi (thisfield, prevfield,
1673     + thislen, prevlen, thisstate, prevstate);
1674     + }
1675     + else
1676     +#endif
1677     match = !different (thisfield, prevfield, thislen, prevlen);
1678     match_count += match;
1679    
1680     @@ -364,6 +593,9 @@
1681     SWAP_LINES (prevline, thisline);
1682     prevfield = thisfield;
1683     prevlen = thislen;
1684     +#if HAVE_MBRTOWC
1685     + prevstate = thisstate;
1686     +#endif
1687     if (!match)
1688     match_count = 0;
1689     }
1690     @@ -408,6 +640,19 @@
1691    
1692     atexit (close_stdout);
1693    
1694     +#if HAVE_MBRTOWC
1695     + if (MB_CUR_MAX > 1)
1696     + {
1697     + find_field = find_field_multi;
1698     + }
1699     + else
1700     +#endif
1701     + {
1702     + find_field = find_field_uni;
1703     + }
1704     +
1705     +
1706     +
1707     skip_chars = 0;
1708     skip_fields = 0;
1709     check_chars = SIZE_MAX;
1710     --- coreutils-5.93/src/expand.c.i18n 2005-08-12 08:16:25.000000000 +0100
1711     +++ coreutils-5.93/src/expand.c 2005-12-23 08:53:01.000000000 +0000
1712     @@ -38,11 +38,28 @@
1713     #include <stdio.h>
1714     #include <getopt.h>
1715     #include <sys/types.h>
1716     +
1717     +/* Get mbstate_t, mbrtowc(), wcwidth(). */
1718     +#if HAVE_WCHAR_H
1719     +# include <wchar.h>
1720     +#endif
1721     +
1722     #include "system.h"
1723     #include "error.h"
1724     #include "quote.h"
1725     #include "xstrndup.h"
1726    
1727     +/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
1728     + installation; work around this configuration error. */
1729     +#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
1730     +# define MB_LEN_MAX 16
1731     +#endif
1732     +
1733     +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
1734     +#if HAVE_MBRTOWC && defined mbstate_t
1735     +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
1736     +#endif
1737     +
1738     /* The official name of this program (e.g., no `g' prefix). */
1739     #define PROGRAM_NAME "expand"
1740    
1741     @@ -364,6 +382,142 @@
1742     }
1743     }
1744    
1745     +#if HAVE_MBRTOWC
1746     +static void
1747     +expand_multibyte (void)
1748     +{
1749     + FILE *fp; /* Input strem. */
1750     + mbstate_t i_state; /* Current shift state of the input stream. */
1751     + mbstate_t i_state_bak; /* Back up the I_STATE. */
1752     + mbstate_t o_state; /* Current shift state of the output stream. */
1753     + char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
1754     + char *bufpos; /* Next read position of BUF. */
1755     + size_t buflen = 0; /* The length of the byte sequence in buf. */
1756     + wchar_t wc; /* A gotten wide character. */
1757     + size_t mblength; /* The byte size of a multibyte character
1758     + which shows as same character as WC. */
1759     + int tab_index = 0; /* Index in `tab_list' of next tabstop. */
1760     + int column = 0; /* Column on screen of the next char. */
1761     + int next_tab_column; /* Column the next tab stop is on. */
1762     + int convert = 1; /* If nonzero, perform translations. */
1763     +
1764     + fp = next_file ((FILE *) NULL);
1765     + if (fp == NULL)
1766     + return;
1767     +
1768     + memset (&o_state, '\0', sizeof(mbstate_t));
1769     + memset (&i_state, '\0', sizeof(mbstate_t));
1770     +
1771     + for (;;)
1772     + {
1773     + /* Refill the buffer BUF. */
1774     + if (buflen < MB_LEN_MAX && !feof(fp) && !ferror(fp))
1775     + {
1776     + memmove (buf, bufpos, buflen);
1777     + buflen += fread (buf + buflen, sizeof(char), BUFSIZ, fp);
1778     + bufpos = buf;
1779     + }
1780     +
1781     + /* No character is left in BUF. */
1782     + if (buflen < 1)
1783     + {
1784     + fp = next_file (fp);
1785     +
1786     + if (fp == NULL)
1787     + break; /* No more files. */
1788     + else
1789     + {
1790     + memset (&i_state, '\0', sizeof(mbstate_t));
1791     + continue;
1792     + }
1793     + }
1794     +
1795     + /* Get a wide character. */
1796     + i_state_bak = i_state;
1797     + mblength = mbrtowc (&wc, bufpos, buflen, &i_state);
1798     +
1799     + switch (mblength)
1800     + {
1801     + case (size_t)-1: /* illegal byte sequence. */
1802     + case (size_t)-2:
1803     + mblength = 1;
1804     + i_state = i_state_bak;
1805     + if (convert)
1806     + {
1807     + ++column;
1808     + if (convert_entire_line == 0)
1809     + convert = 0;
1810     + }
1811     + putchar (*bufpos);
1812     + break;
1813     +
1814     + case 0: /* null. */
1815     + mblength = 1;
1816     + if (convert && convert_entire_line == 0)
1817     + convert = 0;
1818     + putchar ('\0');
1819     + break;
1820     +
1821     + default:
1822     + if (wc == L'\n') /* LF. */
1823     + {
1824     + tab_index = 0;
1825     + column = 0;
1826     + convert = 1;
1827     + putchar ('\n');
1828     + }
1829     + else if (wc == L'\t' && convert) /* Tab. */
1830     + {
1831     + if (tab_size == 0)
1832     + {
1833     + /* Do not let tab_index == first_free_tab;
1834     + stop when it is 1 less. */
1835     + while (tab_index < first_free_tab - 1
1836     + && column >= tab_list[tab_index])
1837     + tab_index++;
1838     + next_tab_column = tab_list[tab_index];
1839     + if (tab_index < first_free_tab - 1)
1840     + tab_index++;
1841     + if (column >= next_tab_column)
1842     + next_tab_column = column + 1;
1843     + }
1844     + else
1845     + next_tab_column = column + tab_size - column % tab_size;
1846     +
1847     + while (column < next_tab_column)
1848     + {
1849     + putchar (' ');
1850     + ++column;
1851     + }
1852     + }
1853     + else /* Others. */
1854     + {
1855     + if (convert)
1856     + {
1857     + if (wc == L'\b')
1858     + {
1859     + if (column > 0)
1860     + --column;
1861     + }
1862     + else
1863     + {
1864     + int width; /* The width of WC. */
1865     +
1866     + width = wcwidth (wc);
1867     + column += (width > 0) ? width : 0;
1868     + if (convert_entire_line == 0)
1869     + convert = 0;
1870     + }
1871     + }
1872     + fwrite (bufpos, sizeof(char), mblength, stdout);
1873     + }
1874     + }
1875     + buflen -= mblength;
1876     + bufpos += mblength;
1877     + }
1878     +}
1879     +#endif
1880     +
1881     int
1882     main (int argc, char **argv)
1883     {
1884     @@ -428,7 +582,12 @@
1885    
1886     file_list = (optind < argc ? &argv[optind] : stdin_argv);
1887    
1888     - expand ();
1889     +#if HAVE_MBRTOWC
1890     + if (MB_CUR_MAX > 1)
1891     + expand_multibyte ();
1892     + else
1893     +#endif
1894     + expand ();
1895    
1896     if (have_read_stdin && fclose (stdin) != 0)
1897     error (EXIT_FAILURE, errno, "-");
1898     --- coreutils-5.93/src/fold.c.i18n 2005-08-12 08:29:38.000000000 +0100
1899     +++ coreutils-5.93/src/fold.c 2005-12-23 08:53:01.000000000 +0000
1900     @@ -23,11 +23,33 @@
1901     #include <getopt.h>
1902     #include <sys/types.h>
1903    
1904     +/* Get mbstate_t, mbrtowc(), wcwidth(). */
1905     +#if HAVE_WCHAR_H
1906     +# include <wchar.h>
1907     +#endif
1908     +
1909     +/* Get iswprint(), iswblank(), wcwidth(). */
1910     +#if HAVE_WCTYPE_H
1911     +# include <wctype.h>
1912     +#endif
1913     +
1914     #include "system.h"
1915     #include "error.h"
1916     #include "quote.h"
1917     #include "xstrtol.h"
1918    
1919     +/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
1920     + installation; work around this configuration error. */
1921     +#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
1922     +# undef MB_LEN_MAX
1923     +# define MB_LEN_MAX 16
1924     +#endif
1925     +
1926     +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
1927     +#if HAVE_MBRTOWC && defined mbstate_t
1928     +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
1929     +#endif
1930     +
1931     #define TAB_WIDTH 8
1932    
1933     /* The official name of this program (e.g., no `g' prefix). */
1934     @@ -35,23 +57,44 @@
1935    
1936     #define AUTHORS "David MacKenzie"
1937    
1938     +#define FATAL_ERROR(Message) \
1939     + do \
1940     + { \
1941     + error (0, 0, (Message)); \
1942     + usage (2); \
1943     + } \
1944     + while (0)
1945     +
1946     +enum operating_mode
1947     +{
1948     + /* Fold texts by columns that are at the given positions. */
1949     + column_mode,
1950     +
1951     + /* Fold texts by bytes that are at the given positions. */
1952     + byte_mode,
1953     +
1954     + /* Fold texts by characters that are at the given positions. */
1955     + character_mode,
1956     +};
1957     +
1958     /* The name this program was run with. */
1959     char *program_name;
1960    
1961     +/* The argument shows current mode. (Default: column_mode) */
1962     +static enum operating_mode operating_mode;
1963     +
1964     /* If nonzero, try to break on whitespace. */
1965     static bool break_spaces;
1966    
1967     -/* If nonzero, count bytes, not column positions. */
1968     -static bool count_bytes;
1969     -
1970     /* If nonzero, at least one of the files we read was standard input. */
1971     static bool have_read_stdin;
1972    
1973     -static char const shortopts[] = "bsw:0::1::2::3::4::5::6::7::8::9::";
1974     +static char const shortopts[] = "bcsw:0::1::2::3::4::5::6::7::8::9::";
1975    
1976     static struct option const longopts[] =
1977     {
1978     {"bytes", no_argument, NULL, 'b'},
1979     + {"characters", no_argument, NULL, 'c'},
1980     {"spaces", no_argument, NULL, 's'},
1981     {"width", required_argument, NULL, 'w'},
1982     {GETOPT_HELP_OPTION_DECL},
1983     @@ -81,6 +124,7 @@
1984     "), stdout);
1985     fputs (_("\
1986     -b, --bytes count bytes rather than columns\n\
1987     + -c, --characters count characters rather than columns\n\
1988     -s, --spaces break at spaces\n\
1989     -w, --width=WIDTH use WIDTH columns instead of 80\n\
1990     "), stdout);
1991     @@ -98,7 +142,7 @@
1992     static size_t
1993     adjust_column (size_t column, char c)
1994     {
1995     - if (!count_bytes)
1996     + if (operating_mode != byte_mode)
1997     {
1998     if (c == '\b')
1999     {
2000     @@ -117,35 +161,14 @@
2001     return column;
2002     }
2003    
2004     -/* Fold file FILENAME, or standard input if FILENAME is "-",
2005     - to stdout, with maximum line length WIDTH.
2006     - Return true if successful. */
2007     -
2008     -static bool
2009     -fold_file (char *filename, size_t width)
2010     +static void
2011     +fold_text (FILE *istream, size_t width, int *saved_errno)
2012     {
2013     - FILE *istream;
2014     int c;
2015     size_t column = 0; /* Screen column where next char will go. */
2016     size_t offset_out = 0; /* Index in `line_out' for next char. */
2017     static char *line_out = NULL;
2018     static size_t allocated_out = 0;
2019     - int saved_errno;
2020     -
2021     - if (STREQ (filename, "-"))
2022     - {
2023     - istream = stdin;
2024     - have_read_stdin = true;
2025     - }
2026     - else
2027     - istream = fopen (filename, "r");
2028     -
2029     - if (istream == NULL)
2030     - {
2031     - error (0, errno, "%s", filename);
2032     - return false;
2033     - }
2034     -
2035     while ((c = getc (istream)) != EOF)
2036     {
2037     if (offset_out + 1 >= allocated_out)
2038     @@ -172,6 +195,15 @@
2039     bool found_blank = false;
2040     size_t logical_end = offset_out;
2041    
2042     + /* If LINE_OUT has no wide character,
2043     + put a new wide character in LINE_OUT
2044     + if column is bigger than width. */
2045     + if (offset_out == 0)
2046     + {
2047     + line_out[offset_out++] = c;
2048     + continue;
2049     + }
2050     +
2051     /* Look for the last blank. */
2052     while (logical_end)
2053     {
2054     @@ -218,11 +250,225 @@
2055     line_out[offset_out++] = c;
2056     }
2057    
2058     - saved_errno = errno;
2059     + *saved_errno = errno;
2060     +
2061     + if (offset_out)
2062     + fwrite (line_out, sizeof (char), (size_t) offset_out, stdout);
2063     +
2064     + free(line_out);
2065     +}
2066     +
2067     +#if HAVE_MBRTOWC
2068     +static void
2069     +fold_multibyte_text (FILE *istream, int width, int *saved_errno)
2070     +{
2071     + char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
2072     + size_t buflen = 0; /* The length of the byte sequence in buf. */
2073     + char *bufpos; /* Next read position of BUF. */
2074     + wint_t wc; /* A gotten wide character. */
2075     + size_t mblength; /* The byte size of a multibyte character which shows
2076     + as same character as WC. */
2077     + mbstate_t state, state_bak; /* State of the stream. */
2078     + int convfail; /* 1, when conversion is failed. Otherwise 0. */
2079     +
2080     + char *line_out = NULL;
2081     + size_t offset_out = 0; /* Index in `line_out' for next char. */
2082     + size_t allocated_out = 0;
2083     +
2084     + int increment;
2085     + size_t column = 0;
2086     +
2087     + size_t last_blank_pos;
2088     + size_t last_blank_column;
2089     + int is_blank_seen;
2090     + int last_blank_increment;
2091     + int is_bs_following_last_blank;
2092     + size_t bs_following_last_blank_num;
2093     + int is_cr_after_last_blank;
2094     +
2095     +#define CLEAR_FLAGS \
2096     + do \
2097     + { \
2098     + last_blank_pos = 0; \
2099     + last_blank_column = 0; \
2100     + is_blank_seen = 0; \
2101     + is_bs_following_last_blank = 0; \
2102     + bs_following_last_blank_num = 0; \
2103     + is_cr_after_last_blank = 0; \
2104     + } \
2105     + while (0)
2106     +
2107     +#define START_NEW_LINE \
2108     + do \
2109     + { \
2110     + putchar ('\n'); \
2111     + column = 0; \
2112     + offset_out = 0; \
2113     + CLEAR_FLAGS; \
2114     + } \
2115     + while (0)
2116     +
2117     + CLEAR_FLAGS;
2118     + memset (&state, '\0', sizeof(mbstate_t));
2119     +
2120     + for (;; bufpos += mblength, buflen -= mblength)
2121     + {
2122     + if (buflen < MB_LEN_MAX && !feof (istream) && !ferror (istream))
2123     + {
2124     + memmove (buf, bufpos, buflen);
2125     + buflen += fread (buf + buflen, sizeof(char), BUFSIZ, istream);
2126     + bufpos = buf;
2127     + }
2128     +
2129     + if (buflen < 1)
2130     + break;
2131     +
2132     + /* Get a wide character. */
2133     + convfail = 0;
2134     + state_bak = state;
2135     + mblength = mbrtowc ((wchar_t *)&wc, bufpos, buflen, &state);
2136     +
2137     + switch (mblength)
2138     + {
2139     + case (size_t)-1:
2140     + case (size_t)-2:
2141     + convfail++;
2142     + state = state_bak;
2143     + /* Fall through. */
2144     +
2145     + case 0:
2146     + mblength = 1;
2147     + break;
2148     + }
2149     +
2150     +rescan:
2151     + if (operating_mode == byte_mode) /* byte mode */
2152     + increment = mblength;
2153     + else if (operating_mode == character_mode) /* character mode */
2154     + increment = 1;
2155     + else /* column mode */
2156     + {
2157     + if (convfail)
2158     + increment = 1;
2159     + else
2160     + {
2161     + switch (wc)
2162     + {
2163     + case L'\n':
2164     + fwrite (line_out, sizeof(char), offset_out, stdout);
2165     + START_NEW_LINE;
2166     + continue;
2167     +
2168     + case L'\b':
2169     + increment = (column > 0) ? -1 : 0;
2170     + break;
2171     +
2172     + case L'\r':
2173     + increment = -1 * column;
2174     + break;
2175     +
2176     + case L'\t':
2177     + increment = 8 - column % 8;
2178     + break;
2179     +
2180     + default:
2181     + increment = wcwidth (wc);
2182     + increment = (increment < 0) ? 0 : increment;
2183     + }
2184     + }
2185     + }
2186     +
2187     + if (column + increment > width && break_spaces && last_blank_pos)
2188     + {
2189     + fwrite (line_out, sizeof(char), last_blank_pos, stdout);
2190     + putchar ('\n');
2191     +
2192     + offset_out = offset_out - last_blank_pos;
2193     + column = column - last_blank_column + ((is_cr_after_last_blank)
2194     + ? last_blank_increment : bs_following_last_blank_num);
2195     + memmove (line_out, line_out + last_blank_pos, offset_out);
2196     + CLEAR_FLAGS;
2197     + goto rescan;
2198     + }
2199     +
2200     + if (column + increment > width && column != 0)
2201     + {
2202     + fwrite (line_out, sizeof(char), offset_out, stdout);
2203     + START_NEW_LINE;
2204     + goto rescan;
2205     + }
2206     +
2207     + if (allocated_out < offset_out + mblength)
2208     + {
2209     + allocated_out += 1024;
2210     + line_out = xrealloc (line_out, allocated_out);
2211     + }
2212     +
2213     + memcpy (line_out + offset_out, bufpos, mblength);
2214     + offset_out += mblength;
2215     + column += increment;
2216     +
2217     + if (is_blank_seen && !convfail && wc == L'\r')
2218     + is_cr_after_last_blank = 1;
2219     +
2220     + if (is_bs_following_last_blank && !convfail && wc == L'\b')
2221     + ++bs_following_last_blank_num;
2222     + else
2223     + is_bs_following_last_blank = 0;
2224     +
2225     + if (break_spaces && !convfail && iswblank (wc))
2226     + {
2227     + last_blank_pos = offset_out;
2228     + last_blank_column = column;
2229     + is_blank_seen = 1;
2230     + last_blank_increment = increment;
2231     + is_bs_following_last_blank = 1;
2232     + bs_following_last_blank_num = 0;
2233     + is_cr_after_last_blank = 0;
2234     + }
2235     + }
2236     +
2237     + *saved_errno = errno;
2238    
2239     if (offset_out)
2240     fwrite (line_out, sizeof (char), (size_t) offset_out, stdout);
2241    
2242     + free(line_out);
2243     +}
2244     +#endif
2245     +
2246     +/* Fold file FILENAME, or standard input if FILENAME is "-",
2247     + to stdout, with maximum line length WIDTH.
2248     + Return 0 if successful, 1 if an error occurs. */
2249     +
2250     +static int
2251     +fold_file (char *filename, int width)
2252     +{
2253     + FILE *istream;
2254     + int saved_errno;
2255     +
2256     + if (STREQ (filename, "-"))
2257     + {
2258     + istream = stdin;
2259     + have_read_stdin = 1;
2260     + }
2261     + else
2262     + istream = fopen (filename, "r");
2263     +
2264     + if (istream == NULL)
2265     + {
2266     + error (0, errno, "%s", filename);
2267     + return 1;
2268     + }
2269     +
2270     + /* Define how ISTREAM is being folded. */
2271     +#if HAVE_MBRTOWC
2272     + if (MB_CUR_MAX > 1)
2273     + fold_multibyte_text (istream, width, &saved_errno);
2274     + else
2275     +#endif
2276     + fold_text (istream, width, &saved_errno);
2277     +
2278     if (ferror (istream))
2279     {
2280     error (0, saved_errno, "%s", filename);
2281     @@ -255,7 +501,8 @@
2282    
2283     atexit (close_stdout);
2284    
2285     - break_spaces = count_bytes = have_read_stdin = false;
2286     + operating_mode = column_mode;
2287     + break_spaces = have_read_stdin = false;
2288    
2289     while ((optc = getopt_long (argc, argv, shortopts, longopts, NULL)) != -1)
2290     {
2291     @@ -264,7 +511,15 @@
2292     switch (optc)
2293     {
2294     case 'b': /* Count bytes rather than columns. */
2295     - count_bytes = true;
2296     + if (operating_mode != column_mode)
2297     + FATAL_ERROR (_("only one way of folding may be specified"));
2298     + operating_mode = byte_mode;
2299     + break;
2300     +
2301     + case 'c':
2302     + if (operating_mode != column_mode)
2303     + FATAL_ERROR (_("only one way of folding may be specified"));
2304     + operating_mode = character_mode;
2305     break;
2306    
2307     case 's': /* Break at word boundaries. */
2308     --- coreutils-5.93/src/join.c.i18n 2005-08-12 08:16:25.000000000 +0100
2309     +++ coreutils-5.93/src/join.c 2005-12-23 08:53:01.000000000 +0000
2310     @@ -23,16 +23,30 @@
2311     #include <sys/types.h>
2312     #include <getopt.h>
2313    
2314     +/* Get mbstate_t, mbrtowc(), mbrtowc(), wcwidth(). */
2315     +#if HAVE_WCHAR_H
2316     +# include <wchar.h>
2317     +#endif
2318     +
2319     +/* Get iswblank(), towupper. */
2320     +#if HAVE_WCTYPE_H
2321     +# include <wctype.h>
2322     +#endif
2323     +
2324     #include "system.h"
2325     #include "error.h"
2326     #include "hard-locale.h"
2327     #include "linebuffer.h"
2328     -#include "memcasecmp.h"
2329     #include "quote.h"
2330     #include "stdio--.h"
2331     #include "xmemcoll.h"
2332     #include "xstrtol.h"
2333    
2334     +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
2335     +#if HAVE_MBRTOWC && defined mbstate_t
2336     +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
2337     +#endif
2338     +
2339     /* The official name of this program (e.g., no `g' prefix). */
2340     #define PROGRAM_NAME "join"
2341    
2342     @@ -104,10 +118,12 @@
2343     /* Last element in `outlist', where a new element can be added. */
2344     static struct outlist *outlist_end = &outlist_head;
2345    
2346     -/* Tab character separating fields. If negative, fields are separated
2347     - by any nonempty string of blanks, otherwise by exactly one
2348     - tab character whose value (when cast to unsigned char) equals TAB. */
2349     -static int tab = -1;
2350     +/* Tab character separating fields. If NULL, fields are separated
2351     + by any nonempty string of blanks. */
2352     +static char *tab = NULL;
2353     +
2354     +/* The number of bytes used for tab. */
2355     +static size_t tablen = 0;
2356    
2357     static struct option const longopts[] =
2358     {
2359     @@ -197,6 +213,8 @@
2360    
2361     /* Fill in the `fields' structure in LINE. */
2362    
2363     +/* Fill in the `fields' structure in LINE. */
2364     +
2365     static void
2366     xfields (struct line *line)
2367     {
2368     @@ -206,10 +224,11 @@
2369     if (ptr == lim)
2370     return;
2371    
2372     - if (0 <= tab)
2373     + if (tab != NULL)
2374     {
2375     + unsigned char t = tab[0];
2376     char *sep;
2377     - for (; (sep = memchr (ptr, tab, lim - ptr)) != NULL; ptr = sep + 1)
2378     + for (; (sep = memchr (ptr, t, lim - ptr)) != NULL; ptr = sep + 1)
2379     extract_field (line, ptr, sep - ptr);
2380     }
2381     else
2382     @@ -236,6 +255,148 @@
2383     extract_field (line, ptr, lim - ptr);
2384     }
2385    
2386     +#if HAVE_MBRTOWC
2387     +static void
2388     +xfields_multibyte (struct line *line)
2389     +{
2390     + char *ptr = line->buf.buffer;
2391     + char const *lim = ptr + line->buf.length - 1;
2392     + wchar_t wc = 0;
2393     + size_t mblength = 1;
2394     + mbstate_t state, state_bak;
2395     +
2396     + memset (&state, 0, sizeof (mbstate_t));
2397     +
2398     + if (ptr == lim)
2399     + return;
2400     +
2401     + if (tab != NULL)
2402     + {
2403     + unsigned char t = tab[0];
2404     + char *sep = ptr;
2405     + for (; ptr < lim; ptr = sep + mblength)
2406     + {
2407     + sep = ptr;
2408     + while (sep < lim)
2409     + {
2410     + state_bak = state;
2411     + mblength = mbrtowc (&wc, sep, lim - sep + 1, &state);
2412     +
2413     + if (mblength == (size_t)-1 || mblength == (size_t)-2)
2414     + {
2415     + mblength = 1;
2416     + state = state_bak;
2417     + }
2418     + mblength = (mblength < 1) ? 1 : mblength;
2419     +
2420     + if (mblength == tablen && !memcmp (sep, tab, mblength))
2421     + break;
2422     + else
2423     + {
2424     + sep += mblength;
2425     + continue;
2426     + }
2427     + }
2428     +
2429     + if (sep == lim)
2430     + break;
2431     +
2432     + extract_field (line, ptr, sep - ptr);
2433     + }
2434     + }
2435     + else
2436     + {
2437     + /* Skip leading blanks before the first field. */
2438     + while(ptr < lim)
2439     + {
2440     + state_bak = state;
2441     + mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state);
2442     +
2443     + if (mblength == (size_t)-1 || mblength == (size_t)-2)
2444     + {
2445     + mblength = 1;
2446     + state = state_bak;
2447     + break;
2448     + }
2449     + mblength = (mblength < 1) ? 1 : mblength;
2450     +
2451     + if (!iswblank(wc))
2452     + break;
2453     + ptr += mblength;
2454     + }
2455     +
2456     + do
2457     + {
2458     + char *sep;
2459     + state_bak = state;
2460     + mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state);
2461     + if (mblength == (size_t)-1 || mblength == (size_t)-2)
2462     + {
2463     + mblength = 1;
2464     + state = state_bak;
2465     + break;
2466     + }
2467     + mblength = (mblength < 1) ? 1 : mblength;
2468     +
2469     + sep = ptr + mblength;
2470     + while (sep != lim)
2471     + {
2472     + state_bak = state;
2473     + mblength = mbrtowc (&wc, sep, lim - sep + 1, &state);
2474     + if (mblength == (size_t)-1 || mblength == (size_t)-2)
2475     + {
2476     + mblength = 1;
2477     + state = state_bak;
2478     + break;
2479     + }
2480     + mblength = (mblength < 1) ? 1 : mblength;
2481     +
2482     + if (iswblank (wc))
2483     + break;
2484     +
2485     + sep += mblength;
2486     + }
2487     +
2488     + extract_field (line, ptr, sep - ptr);
2489     + if (sep == lim)
2490     + return;
2491     +
2492     + state_bak = state;
2493     + mblength = mbrtowc (&wc, sep, lim - sep + 1, &state);
2494     + if (mblength == (size_t)-1 || mblength == (size_t)-2)
2495     + {
2496     + mblength = 1;
2497     + state = state_bak;
2498     + break;
2499     + }
2500     + mblength = (mblength < 1) ? 1 : mblength;
2501     +
2502     + ptr = sep + mblength;
2503     + while (ptr != lim)
2504     + {
2505     + state_bak = state;
2506     + mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state);
2507     + if (mblength == (size_t)-1 || mblength == (size_t)-2)
2508     + {
2509     + mblength = 1;
2510     + state = state_bak;
2511     + break;
2512     + }
2513     + mblength = (mblength < 1) ? 1 : mblength;
2514     +
2515     + if (!iswblank (wc))
2516     + break;
2517     +
2518     + ptr += mblength;
2519     + }
2520     + }
2521     + while (ptr != lim);
2522     + }
2523     +
2524     + extract_field (line, ptr, lim - ptr);
2525     +}
2526     +#endif
2527     +
2528     /* Read a line from FP into LINE and split it into fields.
2529     Return true if successful. */
2530    
2531     @@ -256,6 +417,11 @@
2532     line->nfields_allocated = 0;
2533     line->nfields = 0;
2534     line->fields = NULL;
2535     +#if HAVE_MBRTOWC
2536     + if (MB_CUR_MAX > 1)
2537     + xfields_multibyte (line);
2538     + else
2539     +#endif
2540     xfields (line);
2541     return true;
2542     }
2543     @@ -310,56 +476,114 @@
2544     keycmp (struct line const *line1, struct line const *line2)
2545     {
2546     /* Start of field to compare in each file. */
2547     - char *beg1;
2548     - char *beg2;
2549     -
2550     - size_t len1;
2551     - size_t len2; /* Length of fields to compare. */
2552     + char *beg[2];
2553     + char *copy[2];
2554     + size_t len[2]; /* Length of fields to compare. */
2555     int diff;
2556     + int i, j;
2557    
2558     if (join_field_1 < line1->nfields)
2559     {
2560     - beg1 = line1->fields[join_field_1].beg;
2561     - len1 = line1->fields[join_field_1].len;
2562     + beg[0] = line1->fields[join_field_1].beg;
2563     + len[0] = line1->fields[join_field_1].len;
2564     }
2565     else
2566     {
2567     - beg1 = NULL;
2568     - len1 = 0;
2569     + beg[0] = NULL;
2570     + len[0] = 0;
2571     }
2572    
2573     if (join_field_2 < line2->nfields)
2574     {
2575     - beg2 = line2->fields[join_field_2].beg;
2576     - len2 = line2->fields[join_field_2].len;
2577     + beg[1] = line2->fields[join_field_2].beg;
2578     + len[1] = line2->fields[join_field_2].len;
2579     }
2580     else
2581     {
2582     - beg2 = NULL;
2583     - len2 = 0;
2584     + beg[1] = NULL;
2585     + len[1] = 0;
2586     }
2587    
2588     - if (len1 == 0)
2589     - return len2 == 0 ? 0 : -1;
2590     - if (len2 == 0)
2591     + if (len[0] == 0)
2592     + return len[1] == 0 ? 0 : -1;
2593     + if (len[1] == 0)
2594     return 1;
2595    
2596     if (ignore_case)
2597     {
2598     - /* FIXME: ignore_case does not work with NLS (in particular,
2599     - with multibyte chars). */
2600     - diff = memcasecmp (beg1, beg2, MIN (len1, len2));
2601     +#ifdef HAVE_MBRTOWC
2602     + if (MB_CUR_MAX > 1)
2603     + {
2604     + size_t mblength;
2605     + wchar_t wc, uwc;
2606     + mbstate_t state, state_bak;
2607     +
2608     + memset (&state, '\0', sizeof (mbstate_t));
2609     +
2610     + for (i = 0; i < 2; i++)
2611     + {
2612     + copy[i] = alloca (len[i] + 1);
2613     +
2614     + for (j = 0; j < MIN (len[0], len[1]);)
2615     + {
2616     + state_bak = state;
2617     + mblength = mbrtowc (&wc, beg[i] + j, len[i] - j, &state);
2618     +
2619     + switch (mblength)
2620     + {
2621     + case (size_t) -1:
2622     + case (size_t) -2:
2623     + state = state_bak;
2624     + /* Fall through */
2625     + case 0:
2626     + mblength = 1;
2627     + break;
2628     +
2629     + default:
2630     + uwc = towupper (wc);
2631     +
2632     + if (uwc != wc)
2633     + {
2634     + mbstate_t state_wc;
2635     +
2636     + memset (&state_wc, '\0', sizeof (mbstate_t));
2637     + wcrtomb (copy[i] + j, uwc, &state_wc);
2638     + }
2639     + else
2640     + memcpy (copy[i] + j, beg[i] + j, mblength);
2641     + }
2642     + j += mblength;
2643     + }
2644     + copy[i][j] = '\0';
2645     + }
2646     + }
2647     + else
2648     +#endif
2649     + {
2650     + for (i = 0; i < 2; i++)
2651     + {
2652     + copy[i] = alloca (len[i] + 1);
2653     +
2654     + for (j = 0; j < MIN (len[0], len[1]); j++)
2655     + copy[i][j] = toupper (beg[i][j]);
2656     +
2657     + copy[i][j] = '\0';
2658     + }
2659     + }
2660     }
2661     else
2662     {
2663     - if (hard_LC_COLLATE)
2664     - return xmemcoll (beg1, len1, beg2, len2);
2665     - diff = memcmp (beg1, beg2, MIN (len1, len2));
2666     + copy[0] = (unsigned char *) beg[0];
2667     + copy[1] = (unsigned char *) beg[1];
2668     }
2669    
2670     + if (HAVE_SETLOCALE && hard_LC_COLLATE)
2671     + return xmemcoll ((char *) copy[0], len[0], (char *) copy[1], len[1]);
2672     + diff = memcmp (copy[0], copy[1], MIN (len[0], len[1]));
2673     +
2674     if (diff)
2675     return diff;
2676     - return len1 < len2 ? -1 : len1 != len2;
2677     + return len[0] - len[1];
2678     }
2679    
2680     /* Print field N of LINE if it exists and is nonempty, otherwise
2681     @@ -384,11 +608,18 @@
2682    
2683     /* Print the join of LINE1 and LINE2. */
2684    
2685     +#define PUT_TAB_CHAR \
2686     + do \
2687     + { \
2688     + (tab != NULL) ? \
2689     + fwrite(tab, sizeof(char), tablen, stdout) : putchar (' '); \
2690     + } \
2691     + while (0)
2692     +
2693     static void
2694     prjoin (struct line const *line1, struct line const *line2)
2695     {
2696     const struct outlist *outlist;
2697     - char output_separator = tab < 0 ? ' ' : tab;
2698    
2699     outlist = outlist_head.next;
2700     if (outlist)
2701     @@ -404,12 +635,12 @@
2702     if (o->file == 0)
2703     {
2704     if (line1 == &uni_blank)
2705     - {
2706     + {
2707     line = line2;
2708     field = join_field_2;
2709     }
2710     else
2711     - {
2712     + {
2713     line = line1;
2714     field = join_field_1;
2715     }
2716     @@ -423,7 +654,7 @@
2717     o = o->next;
2718     if (o == NULL)
2719     break;
2720     - putchar (output_separator);
2721     + PUT_TAB_CHAR;
2722     }
2723     putchar ('\n');
2724     }
2725     @@ -441,23 +672,23 @@
2726     prfield (join_field_1, line1);
2727     for (i = 0; i < join_field_1 && i < line1->nfields; ++i)
2728     {
2729     - putchar (output_separator);
2730     + PUT_TAB_CHAR;
2731     prfield (i, line1);
2732     }
2733     for (i = join_field_1 + 1; i < line1->nfields; ++i)
2734     {
2735     - putchar (output_separator);
2736     + PUT_TAB_CHAR;
2737     prfield (i, line1);
2738     }
2739    
2740     for (i = 0; i < join_field_2 && i < line2->nfields; ++i)
2741     {
2742     - putchar (output_separator);
2743     + PUT_TAB_CHAR;
2744     prfield (i, line2);
2745     }
2746     for (i = join_field_2 + 1; i < line2->nfields; ++i)
2747     {
2748     - putchar (output_separator);
2749     + PUT_TAB_CHAR;
2750     prfield (i, line2);
2751     }
2752     putchar ('\n');
2753     @@ -869,20 +1100,41 @@
2754    
2755     case 't':
2756     {
2757     - unsigned char newtab = optarg[0];
2758     - if (! newtab)
2759     + char *newtab;
2760     + size_t newtablen;
2761     + if (! optarg[0])
2762     error (EXIT_FAILURE, 0, _("empty tab"));
2763     - if (optarg[1])
2764     + newtab = xstrdup (optarg);
2765     +#if HAVE_MBRTOWC
2766     + if (MB_CUR_MAX > 1)
2767     + {
2768     + mbstate_t state;
2769     +
2770     + memset (&state, 0, sizeof (mbstate_t));
2771     + newtablen = mbrtowc (NULL, newtab,
2772     + strnlen (newtab, MB_LEN_MAX),
2773     + &state);
2774     + if (newtablen == (size_t) 0
2775     + || newtablen == (size_t) -1
2776     + || newtablen == (size_t) -2)
2777     + newtablen = 1;
2778     + }
2779     + else
2780     +#endif
2781     + newtablen = 1;
2782     +
2783     + if (newtablen == 1 && newtab[1])
2784     + {
2785     + if (STREQ (newtab, "\\0"))
2786     + newtab[0] = '\0';
2787     + }
2788     + if (tab != NULL && strcmp (tab, newtab))
2789     {
2790     - if (STREQ (optarg, "\\0"))
2791     - newtab = '\0';
2792     - else
2793     - error (EXIT_FAILURE, 0, _("multi-character tab %s"),
2794     - quote (optarg));
2795     + free (newtab);
2796     + error (EXIT_FAILURE, 0, _("incompatible tabs"));
2797     }
2798     - if (0 <= tab && tab != newtab)
2799     - error (EXIT_FAILURE, 0, _("incompatible tabs"));
2800     tab = newtab;
2801     + tablen = newtablen;
2802     }
2803     break;
2804    
2805     --- coreutils-5.93/src/unexpand.c.i18n 2005-08-12 08:16:25.000000000 +0100
2806     +++ coreutils-5.93/src/unexpand.c 2005-12-23 08:53:01.000000000 +0000
2807     @@ -39,11 +39,28 @@
2808     #include <stdio.h>
2809     #include <getopt.h>
2810     #include <sys/types.h>
2811     +
2812     +/* Get mbstate_t, mbrtowc(), wcwidth(). */
2813     +#if HAVE_WCHAR_H
2814     +# include <wchar.h>
2815     +#endif
2816     +
2817     #include "system.h"
2818     #include "error.h"
2819     #include "quote.h"
2820     #include "xstrndup.h"
2821    
2822     +/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
2823     + installation; work around this configuration error. */
2824     +#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
2825     +# define MB_LEN_MAX 16
2826     +#endif
2827     +
2828     +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
2829     +#if HAVE_MBRTOWC && defined mbstate_t
2830     +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
2831     +#endif
2832     +
2833     /* The official name of this program (e.g., no `g' prefix). */
2834     #define PROGRAM_NAME "unexpand"
2835    
2836     @@ -110,6 +127,208 @@
2837     {NULL, 0, NULL, 0}
2838     };
2839    
2840     +static FILE *next_file (FILE *fp);
2841     +
2842     +#if HAVE_MBRTOWC
2843     +static void
2844     +unexpand_multibyte (void)
2845     +{
2846     + FILE *fp; /* Input stream. */
2847     + mbstate_t i_state; /* Current shift state of the input stream. */
2848     + mbstate_t i_state_bak; /* Back up the I_STATE. */
2849     + mbstate_t o_state; /* Current shift state of the output stream. */
2850     + char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
2851     + char *bufpos; /* Next read position of BUF. */
2852     + size_t buflen = 0; /* The length of the byte sequence in buf. */
2853     + wint_t wc; /* A gotten wide character. */
2854     + size_t mblength; /* The byte size of a multibyte character
2855     + which shows as same character as WC. */
2856     +
2857     + /* Index in `tab_list' of next tabstop: */
2858     + int tab_index = 0; /* For calculating width of pending tabs. */
2859     + int print_tab_index = 0; /* For printing as many tabs as possible. */
2860     + unsigned int column = 0; /* Column on screen of next char. */
2861     + int next_tab_column; /* Column the next tab stop is on. */
2862     + int convert = 1; /* If nonzero, perform translations. */
2863     + unsigned int pending = 0; /* Pending columns of blanks. */
2864     +
2865     + fp = next_file ((FILE *) NULL);
2866     + if (fp == NULL)
2867     + return;
2868     +
2869     + memset (&o_state, '\0', sizeof(mbstate_t));
2870     + memset (&i_state, '\0', sizeof(mbstate_t));
2871     +
2872     + for (;;)
2873     + {
2874     + if (buflen < MB_LEN_MAX && !feof(fp) && !ferror(fp))
2875     + {
2876     + memmove (buf, bufpos, buflen);
2877     + buflen += fread (buf + buflen, sizeof(char), BUFSIZ, fp);
2878     + bufpos = buf;
2879     + }
2880     +
2881     + /* Get a wide character. */
2882     + if (buflen < 1)
2883     + {
2884     + mblength = 1;
2885     + wc = WEOF;
2886     + }
2887     + else
2888     + {
2889     + i_state_bak = i_state;
2890     + mblength = mbrtowc ((wchar_t *)&wc, bufpos, buflen, &i_state);
2891     + }
2892     +
2893     + if (mblength == (size_t)-1 || mblength == (size_t)-2)
2894     + {
2895     + i_state = i_state_bak;
2896     + wc = L'\0';
2897     + }
2898     +
2899     + if (wc == L' ' && convert && column < INT_MAX)
2900     + {
2901     + ++pending;
2902     + ++column;
2903     + }
2904     + else if (wc == L'\t' && convert)
2905     + {
2906     + if (tab_size == 0)
2907     + {
2908     + /* Do not let tab_index == first_free_tab;
2909     + stop when it is 1 less. */
2910     + while (tab_index < first_free_tab - 1
2911     + && column >= tab_list[tab_index])
2912     + tab_index++;
2913     + next_tab_column = tab_list[tab_index];
2914     + if (tab_index < first_free_tab - 1)
2915     + tab_index++;
2916     + if (column >= next_tab_column)
2917     + {
2918     + convert = 0; /* Ran out of tab stops. */
2919     + goto flush_pend_mb;
2920     + }
2921     + }
2922     + else
2923     + {
2924     + next_tab_column = column + tab_size - column % tab_size;
2925     + }
2926     + pending += next_tab_column - column;
2927     + column = next_tab_column;
2928     + }
2929     + else
2930     + {
2931     +flush_pend_mb:
2932     + /* Flush pending spaces. Print as many tabs as possible,
2933     + then print the rest as spaces. */
2934     + if (pending == 1)
2935     + {
2936     + putchar (' ');
2937     + pending = 0;
2938     + }
2939     + column -= pending;
2940     + while (pending > 0)
2941     + {
2942     + if (tab_size == 0)
2943     + {
2944     + /* Do not let print_tab_index == first_free_tab;
2945     + stop when it is 1 less. */
2946     + while (print_tab_index < first_free_tab - 1
2947     + && column >= tab_list[print_tab_index])
2948     + print_tab_index++;
2949     + next_tab_column = tab_list[print_tab_index];
2950     + if (print_tab_index < first_free_tab - 1)
2951     + print_tab_index++;
2952     + }
2953     + else
2954     + {
2955     + next_tab_column =
2956     + column + tab_size - column % tab_size;
2957     + }
2958     + if (next_tab_column - column <= pending)
2959     + {
2960     + putchar ('\t');
2961     + pending -= next_tab_column - column;
2962     + column = next_tab_column;
2963     + }
2964     + else
2965     + {
2966     + --print_tab_index;
2967     + column += pending;
2968     + while (pending != 0)
2969     + {
2970     + putchar (' ');
2971     + pending--;
2972     + }
2973     + }
2974     + }
2975     +
2976     + if (wc == WEOF)
2977     + {
2978     + fp = next_file (fp);
2979     + if (fp == NULL)
2980     + break; /* No more files. */
2981     + else
2982     + {
2983     + memset (&i_state, '\0', sizeof(mbstate_t));
2984     + continue;
2985     + }
2986     + }
2987     +
2988     + if (mblength == (size_t)-1 || mblength == (size_t)-2)
2989     + {
2990     + if (convert)
2991     + {
2992     + ++column;
2993     + if (convert_entire_line == 0)
2994     + convert = 0;
2995     + }
2996     + mblength = 1;
2997     + putchar (buf[0]);
2998     + }
2999     + else if (mblength == 0)
3000     + {
3001     + if (convert && convert_entire_line == 0)
3002     + convert = 0;
3003     + mblength = 1;
3004     + putchar ('\0');
3005     + }
3006     + else
3007     + {
3008     + if (convert)
3009     + {
3010     + if (wc == L'\b')
3011     + {
3012     + if (column > 0)
3013     + --column;
3014     + }
3015     + else
3016     + {
3017     + int width; /* The width of WC. */
3018     +
3019     + width = wcwidth (wc);
3020     + column += (width > 0) ? width : 0;
3021     + if (convert_entire_line == 0)
3022     + convert = 0;
3023     + }
3024     + }
3025     +
3026     + if (wc == L'\n')
3027     + {
3028     + tab_index = print_tab_index = 0;
3029     + column = pending = 0;
3030     + convert = 1;
3031     + }
3032     + fwrite (bufpos, sizeof(char), mblength, stdout);
3033     + }
3034     + }
3035     + buflen -= mblength;
3036     + bufpos += mblength;
3037     + }
3038     +}
3039     +#endif
3040     +
3041     +
3042     void
3043     usage (int status)
3044     {
3045     @@ -532,7 +751,12 @@
3046    
3047     file_list = (optind < argc ? &argv[optind] : stdin_argv);
3048    
3049     - unexpand ();
3050     +#if HAVE_MBRTOWC
3051     + if (MB_CUR_MAX > 1)
3052     + unexpand_multibyte ();
3053     + else
3054     +#endif
3055     + unexpand ();
3056    
3057     if (have_read_stdin && fclose (stdin) != 0)
3058     error (EXIT_FAILURE, errno, "-");
3059     --- coreutils-5.93/src/sort.c.i18n 2005-10-07 19:48:28.000000000 +0100
3060     +++ coreutils-5.93/src/sort.c 2005-12-23 10:38:44.000000000 +0000
3061     @@ -23,9 +23,18 @@
3062    
3063     #include <config.h>
3064    
3065     +#include <assert.h>
3066     #include <getopt.h>
3067     #include <sys/types.h>
3068     #include <signal.h>
3069     +#if HAVE_WCHAR_H
3070     +# include <wchar.h>
3071     +#endif
3072     +/* Get isw* functions. */
3073     +#if HAVE_WCTYPE_H
3074     +# include <wctype.h>
3075     +#endif
3076     +
3077     #include "system.h"
3078     #include "error.h"
3079     #include "hard-locale.h"
3080     @@ -95,14 +104,38 @@
3081     /* Thousands separator; if -1, then there isn't one. */
3082     static int thousands_sep;
3083    
3084     +static int force_general_numcompare = 0;
3085     +
3086     /* Nonzero if the corresponding locales are hard. */
3087     static bool hard_LC_COLLATE;
3088     -#if HAVE_NL_LANGINFO
3089     +#if HAVE_LANGINFO_CODESET
3090     static bool hard_LC_TIME;
3091     #endif
3092    
3093     #define NONZERO(x) ((x) != 0)
3094    
3095     +/* get a multibyte character's byte length. */
3096     +#define GET_BYTELEN_OF_CHAR(LIM, PTR, MBLENGTH, STATE) \
3097     + do \
3098     + { \
3099     + wchar_t wc; \
3100     + mbstate_t state_bak; \
3101     + \
3102     + state_bak = STATE; \
3103     + mblength = mbrtowc (&wc, PTR, LIM - PTR, &STATE); \
3104     + \
3105     + switch (MBLENGTH) \
3106     + { \
3107     + case (size_t)-1: \
3108     + case (size_t)-2: \
3109     + STATE = state_bak; \
3110     + /* Fall through. */ \
3111     + case 0: \
3112     + MBLENGTH = 1; \
3113     + } \
3114     + } \
3115     + while (0)
3116     +
3117     /* The kind of blanks for '-b' to skip in various options. */
3118     enum blanktype { bl_start, bl_end, bl_both };
3119    
3120     @@ -239,13 +272,11 @@
3121     they were read if all keys compare equal. */
3122     static bool stable;
3123    
3124     -/* If TAB has this value, blanks separate fields. */
3125     -enum { TAB_DEFAULT = CHAR_MAX + 1 };
3126     -
3127     -/* Tab character separating fields. If TAB_DEFAULT, then fields are
3128     +/* Tab character separating fields. If tab_length is 0, then fields are
3129     separated by the empty string between a non-blank character and a blank
3130     character. */
3131     -static int tab = TAB_DEFAULT;
3132     +static char tab[MB_LEN_MAX + 1];
3133     +static size_t tab_length = 0;
3134    
3135     /* Flag to remove consecutive duplicate lines from the output.
3136     Only the last of a sequence of equal lines will be output. */
3137     @@ -392,6 +423,44 @@
3138     static struct tempnode *volatile temphead;
3139     static struct tempnode *volatile *temptail = &temphead;
3140    
3141     +/* Function pointers. */
3142     +static void
3143     +(*inittables) (void);
3144     +static char *
3145     +(*begfield) (const struct line*, const struct keyfield *);
3146     +static char *
3147     +(*limfield) (const struct line*, const struct keyfield *);
3148     +static int
3149     +(*getmonth) (char const *, size_t);
3150     +static int
3151     +(*keycompare) (const struct line *, const struct line *);
3152     +static int
3153     +(*numcompare) (const char *, const char *);
3154     +
3155     +/* Test for white space multibyte character.
3156     + Set LENGTH the byte length of investigated multibyte character. */
3157     +#if HAVE_MBRTOWC
3158     +static int
3159     +ismbblank (const char *str, size_t len, size_t *length)
3160     +{
3161     + size_t mblength;
3162     + wchar_t wc;
3163     + mbstate_t state;
3164     +
3165     + memset (&state, '\0', sizeof(mbstate_t));
3166     + mblength = mbrtowc (&wc, str, len, &state);
3167     +
3168     + if (mblength == (size_t)-1 || mblength == (size_t)-2)
3169     + {
3170     + *length = 1;
3171     + return 0;
3172     + }
3173     +
3174     + *length = (mblength < 1) ? 1 : mblength;
3175     + return iswblank (wc);
3176     +}
3177     +#endif
3178     +
3179     /* Clean up any remaining temporary files. */
3180    
3181     static void
3182     @@ -545,7 +614,7 @@
3183     free (node);
3184     }
3185    
3186     -#if HAVE_NL_LANGINFO
3187     +#if HAVE_LANGINFO_CODESET
3188    
3189     static int
3190     struct_month_cmp (const void *m1, const void *m2)
3191     @@ -560,7 +629,7 @@
3192     /* Initialize the character class tables. */
3193    
3194     static void
3195     -inittables (void)
3196     +inittables_uni (void)
3197     {
3198     size_t i;
3199    
3200     @@ -572,7 +641,7 @@
3201     fold_toupper[i] = (ISLOWER (i) ? toupper (i) : i);
3202     }
3203    
3204     -#if HAVE_NL_LANGINFO
3205     +#if HAVE_LANGINFO_CODESET
3206     /* If we're not in the "C" locale, read different names for months. */
3207     if (hard_LC_TIME)
3208     {
3209     @@ -598,6 +667,64 @@
3210     #endif
3211     }
3212    
3213     +#if HAVE_MBRTOWC
3214     +static void
3215     +inittables_mb (void)
3216     +{
3217     + int i, j, k, l;
3218     + char *name, *s;
3219     + size_t s_len, mblength;
3220     + char mbc[MB_LEN_MAX];
3221     + wchar_t wc, pwc;
3222     + mbstate_t state_mb, state_wc;
3223     +
3224     + for (i = 0; i < MONTHS_PER_YEAR; i++)
3225     + {
3226     + s = (char *) nl_langinfo (ABMON_1 + i);
3227     + s_len = strlen (s);
3228     + monthtab[i].name = name = (char *) xmalloc (s_len + 1);
3229     + monthtab[i].val = i + 1;
3230     +
3231     + memset (&state_mb, '\0', sizeof (mbstate_t));
3232     + memset (&state_wc, '\0', sizeof (mbstate_t));
3233     +
3234     + for (j = 0; j < s_len;)
3235     + {
3236     + if (!ismbblank (s + j, s_len - j, &mblength))
3237     + break;
3238     + j += mblength;
3239     + }
3240     +
3241     + for (k = 0; j < s_len;)
3242     + {
3243     + mblength = mbrtowc (&wc, (s + j), (s_len - j), &state_mb);
3244     + assert (mblength != (size_t)-1 && mblength != (size_t)-2);
3245     + if (mblength == 0)
3246     + break;
3247     +
3248     + pwc = towupper (wc);
3249     + if (pwc == wc)
3250     + {
3251     + memcpy (mbc, s + j, mblength);
3252     + j += mblength;
3253     + }
3254     + else
3255     + {
3256     + j += mblength;
3257     + mblength = wcrtomb (mbc, pwc, &state_wc);
3258     + assert (mblength != (size_t)0 && mblength != (size_t)-1);
3259     + }
3260     +
3261     + for (l = 0; l < mblength; l++)
3262     + name[k++] = mbc[l];
3263     + }
3264     + name[k] = '\0';
3265     + }
3266     + qsort ((void *) monthtab, MONTHS_PER_YEAR,
3267     + sizeof (struct month), struct_month_cmp);
3268     +}
3269     +#endif
3270     +
3271     /* Specify the amount of main memory to use when sorting. */
3272     static void
3273     specify_sort_size (char const *s)
3274     @@ -808,7 +935,7 @@
3275     by KEY in LINE. */
3276    
3277     static char *
3278     -begfield (const struct line *line, const struct keyfield *key)
3279     +begfield_uni (const struct line *line, const struct keyfield *key)
3280     {
3281     char *ptr = line->text, *lim = ptr + line->length - 1;
3282     size_t sword = key->sword;
3283     @@ -818,10 +945,10 @@
3284     /* The leading field separator itself is included in a field when -t
3285     is absent. */
3286    
3287     - if (tab != TAB_DEFAULT)
3288     + if (tab_length)
3289     while (ptr < lim && sword--)
3290     {
3291     - while (ptr < lim && *ptr != tab)
3292     + while (ptr < lim && *ptr != tab[0])
3293     ++ptr;
3294     if (ptr < lim)
3295     ++ptr;
3296     @@ -849,11 +976,70 @@
3297     return ptr;
3298     }
3299    
3300     +#if HAVE_MBRTOWC
3301     +static char *
3302     +begfield_mb (const struct line *line, const struct keyfield *key)
3303     +{
3304     + int i;
3305     + char *ptr = line->text, *lim = ptr + line->length - 1;
3306     + size_t sword = key->sword;
3307     + size_t schar = key->schar;
3308     + size_t mblength;
3309     + mbstate_t state;
3310     +
3311     + memset (&state, '\0', sizeof(mbstate_t));
3312     +
3313     + if (tab_length)
3314     + while (ptr < lim && sword--)
3315     + {
3316     + while (ptr < lim && memcmp (ptr, tab, tab_length) != 0)
3317     + {
3318     + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
3319     + ptr += mblength;
3320     + }
3321     + if (ptr < lim)
3322     + {
3323     + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
3324     + ptr += mblength;
3325     + }
3326     + }
3327     + else
3328     + while (ptr < lim && sword--)
3329     + {
3330     + while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength))
3331     + ptr += mblength;
3332     + if (ptr < lim)
3333     + {
3334     + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
3335     + ptr += mblength;
3336     + }
3337     + while (ptr < lim && !ismbblank (ptr, lim - ptr, &mblength))
3338     + ptr += mblength;
3339     + }
3340     +
3341     + if (key->skipsblanks)
3342     + while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength))
3343     + ptr += mblength;
3344     +
3345     + for (i = 0; i < schar; i++)
3346     + {
3347     + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
3348     +
3349     + if (ptr + mblength > lim)
3350     + break;
3351     + else
3352     + ptr += mblength;
3353     + }
3354     +
3355     + return ptr;
3356     +}
3357     +#endif
3358     +
3359     /* Return the limit of (a pointer to the first character after) the field
3360     in LINE specified by KEY. */
3361    
3362     static char *
3363     -limfield (const struct line *line, const struct keyfield *key)
3364     +limfield_uni (const struct line *line, const struct keyfield *key)
3365     {
3366     char *ptr = line->text, *lim = ptr + line->length - 1;
3367     size_t eword = key->eword, echar = key->echar;
3368     @@ -866,10 +1052,10 @@
3369     `beginning' is the first character following the delimiting TAB.
3370     Otherwise, leave PTR pointing at the first `blank' character after
3371     the preceding field. */
3372     - if (tab != TAB_DEFAULT)
3373     + if (tab_length)
3374     while (ptr < lim && eword--)
3375     {
3376     - while (ptr < lim && *ptr != tab)
3377     + while (ptr < lim && *ptr != tab[0])
3378     ++ptr;
3379     if (ptr < lim && (eword | echar))
3380     ++ptr;
3381     @@ -915,10 +1101,10 @@
3382     */
3383    
3384     /* Make LIM point to the end of (one byte past) the current field. */
3385     - if (tab != TAB_DEFAULT)
3386     + if (tab_length)
3387     {
3388     char *newlim;
3389     - newlim = memchr (ptr, tab, lim - ptr);
3390     + newlim = memchr (ptr, tab[0], lim - ptr);
3391     if (newlim)
3392     lim = newlim;
3393     }
3394     @@ -951,6 +1137,107 @@
3395     return ptr;
3396     }
3397    
3398     +#if HAVE_MBRTOWC
3399     +static char *
3400     +limfield_mb (const struct line *line, const struct keyfield *key)
3401     +{
3402     + char *ptr = line->text, *lim = ptr + line->length - 1;
3403     + size_t eword = key->eword, echar = key->echar;
3404     + int i;
3405     + size_t mblength;
3406     + mbstate_t state;
3407     +
3408     + memset (&state, '\0', sizeof(mbstate_t));
3409     +
3410     + if (tab_length)
3411     + while (ptr < lim && eword--)
3412     + {
3413     + while (ptr < lim && memcmp (ptr, tab, tab_length) != 0)
3414     + {
3415     + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
3416     + ptr += mblength;
3417     + }
3418     + if (ptr < lim && (eword | echar))
3419     + {
3420     + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
3421     + ptr += mblength;
3422     + }
3423     + }
3424     + else
3425     + while (ptr < lim && eword--)
3426     + {
3427     + while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength))
3428     + ptr += mblength;
3429     + if (ptr < lim)
3430     + {
3431     + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
3432     + ptr += mblength;
3433     + }
3434     + while (ptr < lim && !ismbblank (ptr, lim - ptr, &mblength))
3435     + ptr += mblength;
3436     + }
3437     +
3438     +
3439     +# ifdef POSIX_UNSPECIFIED
3440     + /* Make LIM point to the end of (one byte past) the current field. */
3441     + if (tab_length)
3442     + {
3443     + char *newlim, *p;
3444     +
3445     + newlim = NULL;
3446     + for (p = ptr; p < lim;)
3447     + {
3448     + if (memcmp (p, tab, tab_length) == 0)
3449     + {
3450     + newlim = p;
3451     + break;
3452     + }
3453     +
3454     + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
3455     + p += mblength;
3456     + }
3457     + }
3458     + else
3459     + {
3460     + char *newlim;
3461     + newlim = ptr;
3462     +
3463     + while (newlim < lim && ismbblank (newlim, lim - newlim, &mblength))
3464     + newlim += mblength;
3465     + if (ptr < lim)
3466     + {
3467     + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
3468     + ptr += mblength;
3469     + }
3470     + while (newlim < lim && !ismbblank (newlim, lim - newlim, &mblength))
3471     + newlim += mblength;
3472     + lim = newlim;
3473     + }
3474     +# endif
3475     +
3476     + /* If we're skipping leading blanks, don't start counting characters
3477     + * until after skipping past any leading blanks. */
3478     + if (key->skipsblanks)
3479     + while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength))
3480     + ptr += mblength;
3481     +
3482     + memset (&state, '\0', sizeof(mbstate_t));
3483     +
3484     + /* Advance PTR by ECHAR (if possible), but no further than LIM. */
3485     + for (i = 0; i < echar; i++)
3486     + {
3487     + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
3488     +
3489     + if (ptr + mblength > lim)
3490     + break;
3491     + else
3492     + ptr += mblength;
3493     + }
3494     +
3495     + return ptr;
3496     +}
3497     +#endif
3498     +
3499     /* Fill BUF reading from FP, moving buf->left bytes from the end
3500     of buf->buf to the beginning first. If EOF is reached and the
3501     file wasn't terminated by a newline, supply one. Set up BUF's line
3502     @@ -1067,7 +1354,7 @@
3503     hideously fast. */
3504    
3505     static int
3506     -numcompare (const char *a, const char *b)
3507     +numcompare_uni (const char *a, const char *b)
3508     {
3509     while (blanks[to_uchar (*a)])
3510     a++;
3511     @@ -1077,6 +1364,25 @@
3512     return strnumcmp (a, b, decimal_point, thousands_sep);
3513     }
3514    
3515     +#if HAVE_MBRTOWC
3516     +static int
3517     +numcompare_mb (const char *a, const char *b)
3518     +{
3519     + size_t mblength, len;
3520     + len = strlen (a); /* okay for UTF-8 */
3521     + while (*a && ismbblank (a, len > MB_CUR_MAX ? MB_CUR_MAX : len, &mblength))
3522     + {
3523     + a += mblength;
3524     + len -= mblength;
3525     + }
3526     + len = strlen (b); /* okay for UTF-8 */
3527     + while (*b && ismbblank (b, len > MB_CUR_MAX ? MB_CUR_MAX : len, &mblength))
3528     + b += mblength;
3529     +
3530     + return strnumcmp (a, b, decimal_point, thousands_sep);
3531     +}
3532     +#endif /* HAV_EMBRTOWC */
3533     +
3534     static int
3535     general_numcompare (const char *sa, const char *sb)
3536     {
3537     @@ -1110,7 +1416,7 @@
3538     Return 0 if the name in S is not recognized. */
3539    
3540     static int
3541     -getmonth (char const *month, size_t len)
3542     +getmonth_uni (char const *month, size_t len)
3543     {
3544     size_t lo = 0;
3545     size_t hi = MONTHS_PER_YEAR;
3546     @@ -1152,11 +1458,79 @@
3547     return 0;
3548     }
3549    
3550     +#if HAVE_MBRTOWC
3551     +static int
3552     +getmonth_mb (const char *s, size_t len)
3553     +{
3554     + char *month;
3555     + register size_t i;
3556     + register int lo = 0, hi = MONTHS_PER_YEAR, result;
3557     + char *tmp;
3558     + size_t wclength, mblength;
3559     + const char **pp;
3560     + const wchar_t **wpp;
3561     + wchar_t *month_wcs;
3562     + mbstate_t state;
3563     +
3564     + while (len > 0 && ismbblank (s, len, &mblength))
3565     + {
3566     + s += mblength;
3567     + len -= mblength;
3568     + }
3569     +
3570     + if (len == 0)
3571     + return 0;
3572     +
3573     + month = (char *) alloca (len + 1);
3574     +
3575     + tmp = (char *) alloca (len + 1);
3576     + memcpy (tmp, s, len);
3577     + tmp[len] = '\0';
3578     + pp = (const char **)&tmp;
3579     + month_wcs = (wchar_t *) alloca ((len + 1) * sizeof (wchar_t));
3580     + memset (&state, '\0', sizeof(mbstate_t));
3581     +
3582     + wclength = mbsrtowcs (month_wcs, pp, len + 1, &state);
3583     + assert (wclength != (size_t)-1 && *pp == NULL);
3584     +
3585     + for (i = 0; i < wclength; i++)
3586     + {
3587     + month_wcs[i] = towupper(month_wcs[i]);
3588     + if (iswblank (month_wcs[i]))
3589     + {
3590     + month_wcs[i] = L'\0';
3591     + break;
3592     + }
3593     + }
3594     +
3595     + wpp = (const wchar_t **)&month_wcs;
3596     +
3597     + mblength = wcsrtombs (month, wpp, len + 1, &state);
3598     + assert (mblength != (-1) && *wpp == NULL);
3599     +
3600     + do
3601     + {
3602     + int ix = (lo + hi) / 2;
3603     +
3604     + if (strncmp (month, monthtab[ix].name, strlen (monthtab[ix].name)) < 0)
3605     + hi = ix;
3606     + else
3607     + lo = ix;
3608     + }
3609     + while (hi - lo > 1);
3610     +
3611     + result = (!strncmp (month, monthtab[lo].name, strlen (monthtab[lo].name))
3612     + ? monthtab[lo].val : 0);
3613     +
3614     + return result;
3615     +}
3616     +#endif
3617     +
3618     /* Compare two lines A and B trying every key in sequence until there
3619     are no more keys or a difference is found. */
3620    
3621     static int
3622     -keycompare (const struct line *a, const struct line *b)
3623     +keycompare_uni (const struct line *a, const struct line *b)
3624     {
3625     struct keyfield const *key = keylist;
3626    
3627     @@ -1326,6 +1700,177 @@
3628     return key->reverse ? -diff : diff;
3629     }
3630    
3631     +#if HAVE_MBRTOWC
3632     +static int
3633     +keycompare_mb (const struct line *a, const struct line *b)
3634     +{
3635     + struct keyfield *key = keylist;
3636     +
3637     + /* For the first iteration only, the key positions have been
3638     + precomputed for us. */
3639     + char *texta = a->keybeg;
3640     + char *textb = b->keybeg;
3641     + char *lima = a->keylim;
3642     + char *limb = b->keylim;
3643     +
3644     + size_t mblength_a, mblength_b;
3645     + wchar_t wc_a, wc_b;
3646     + mbstate_t state_a, state_b;
3647     +
3648     + int diff;
3649     +
3650     + memset (&state_a, '\0', sizeof(mbstate_t));
3651     + memset (&state_b, '\0', sizeof(mbstate_t));
3652     +
3653     + for (;;)
3654     + {
3655     + unsigned char *translate = (unsigned char *) key->translate;
3656     + bool const *ignore = key->ignore;
3657     +
3658     + /* Find the lengths. */
3659     + size_t lena = lima <= texta ? 0 : lima - texta;
3660     + size_t lenb = limb <= textb ? 0 : limb - textb;
3661     +
3662     + /* Actually compare the fields. */
3663     + if (key->numeric | key->general_numeric)
3664     + {
3665     + char savea = *lima, saveb = *limb;
3666     +
3667     + *lima = *limb = '\0';
3668     + if (force_general_numcompare)
3669     + diff = general_numcompare (texta, textb);
3670     + else
3671     + diff = ((key->numeric ? numcompare : general_numcompare)
3672     + (texta, textb));
3673     + *lima = savea, *limb = saveb;
3674     + }
3675     + else if (key->month)
3676     + diff = getmonth (texta, lena) - getmonth (textb, lenb);
3677     + else
3678     + {
3679     + if (ignore || translate)
3680     + {
3681     + char *copy_a = (char *) alloca (lena + 1 + lenb + 1);
3682     + char *copy_b = copy_a + lena + 1;
3683     + size_t new_len_a, new_len_b;
3684     + size_t i, j;
3685     +
3686     + /* Ignore and/or translate chars before comparing. */
3687     +# define IGNORE_CHARS(NEW_LEN, LEN, TEXT, COPY, WC, MBLENGTH, STATE) \
3688     + do \
3689     + { \
3690     + wchar_t uwc; \
3691     + char mbc[MB_LEN_MAX]; \
3692     + mbstate_t state_wc; \
3693     + \
3694     + for (NEW_LEN = i = 0; i < LEN;) \
3695     + { \
3696     + mbstate_t state_bak; \
3697     + \
3698     + state_bak = STATE; \
3699     + MBLENGTH = mbrtowc (&WC, TEXT + i, LEN - i, &STATE); \
3700     + \
3701     + if (MBLENGTH == (size_t)-2 || MBLENGTH == (size_t)-1 \
3702     + || MBLENGTH == 0) \
3703     + { \
3704     + if (MBLENGTH == (size_t)-2 || MBLENGTH == (size_t)-1) \
3705     + STATE = state_bak; \
3706     + if (!ignore) \
3707     + COPY[NEW_LEN++] = TEXT[i++]; \
3708     + continue; \
3709     + } \
3710     + \
3711     + if (ignore) \
3712     + { \
3713     + if ((ignore == nonprinting && !iswprint (WC)) \
3714     + || (ignore == nondictionary \
3715     + && !iswalnum (WC) && !iswblank (WC))) \
3716     + { \
3717     + i += MBLENGTH; \
3718     + continue; \
3719     + } \
3720     + } \
3721     + \
3722     + if (translate) \
3723     + { \
3724     + \
3725     + uwc = towupper(WC); \
3726     + if (WC == uwc) \
3727     + { \
3728     + memcpy (mbc, TEXT + i, MBLENGTH); \
3729     + i += MBLENGTH; \
3730     + } \
3731     + else \
3732     + { \
3733     + i += MBLENGTH; \
3734     + WC = uwc; \
3735     + memset (&state_wc, '\0', sizeof (mbstate_t)); \
3736     + \
3737     + MBLENGTH = wcrtomb (mbc, WC, &state_wc); \
3738     + assert (MBLENGTH != (size_t)-1 && MBLENGTH != 0); \
3739     + } \
3740     + \
3741     + for (j = 0; j < MBLENGTH; j++) \
3742     + COPY[NEW_LEN++] = mbc[j]; \
3743     + } \
3744     + else \
3745     + for (j = 0; j < MBLENGTH; j++) \
3746     + COPY[NEW_LEN++] = TEXT[i++]; \
3747     + } \
3748     + COPY[NEW_LEN] = '\0'; \
3749     + } \
3750     + while (0)
3751     + IGNORE_CHARS (new_len_a, lena, texta, copy_a,
3752     + wc_a, mblength_a, state_a);
3753     + IGNORE_CHARS (new_len_b, lenb, textb, copy_b,
3754     + wc_b, mblength_b, state_b);
3755     + diff = xmemcoll (copy_a, new_len_a, copy_b, new_len_b);
3756     + }
3757     + else if (lena == 0)
3758     + diff = - NONZERO (lenb);
3759     + else if (lenb == 0)
3760     + goto greater;
3761     + else
3762     + diff = xmemcoll (texta, lena, textb, lenb);
3763     + }
3764     +
3765     + if (diff)
3766     + goto not_equal;
3767     +
3768     + key = key->next;
3769     + if (! key)
3770     + break;
3771     +
3772     + /* Find the beginning and limit of the next field. */
3773     + if (key->eword != -1)
3774     + lima = limfield (a, key), limb = limfield (b, key);
3775     + else
3776     + lima = a->text + a->length - 1, limb = b->text + b->length - 1;
3777     +
3778     + if (key->sword != -1)
3779     + texta = begfield (a, key), textb = begfield (b, key);
3780     + else
3781     + {
3782     + texta = a->text, textb = b->text;
3783     + if (key->skipsblanks)
3784     + {
3785     + while (texta < lima && ismbblank (texta, lima - texta, &mblength_a))
3786     + texta += mblength_a;
3787     + while (textb < limb && ismbblank (textb, limb - textb, &mblength_b))
3788     + textb += mblength_b;
3789     + }
3790     + }
3791     + }
3792     +
3793     + return 0;
3794     +
3795     +greater:
3796     + diff = 1;
3797     +not_equal:
3798     + return key->reverse ? -diff : diff;
3799     +}
3800     +#endif
3801     +
3802     /* Compare two lines A and B, returning negative, zero, or positive
3803     depending on whether A compares less than, equal to, or greater than B. */
3804    
3805     @@ -2127,7 +2672,7 @@
3806     atexit (close_stdout);
3807    
3808     hard_LC_COLLATE = hard_locale (LC_COLLATE);
3809     -#if HAVE_NL_LANGINFO
3810     +#if HAVE_LANGINFO_CODESET
3811     hard_LC_TIME = hard_locale (LC_TIME);
3812     #endif
3813    
3814     @@ -2148,6 +2693,27 @@
3815     thousands_sep = -1;
3816     }
3817    
3818     +#if HAVE_MBRTOWC
3819     + if (MB_CUR_MAX > 1)
3820     + {
3821     + inittables = inittables_mb;
3822     + begfield = begfield_mb;
3823     + limfield = limfield_mb;
3824     + getmonth = getmonth_mb;
3825     + keycompare = keycompare_mb;
3826     + numcompare = numcompare_mb;
3827     + }
3828     + else
3829     +#endif
3830     + {
3831     + inittables = inittables_uni;
3832     + begfield = begfield_uni;
3833     + limfield = limfield_uni;
3834     + getmonth = getmonth_uni;
3835     + keycompare = keycompare_uni;
3836     + numcompare = numcompare_uni;
3837     + }
3838     +
3839     have_read_stdin = false;
3840     inittables ();
3841    
3842     @@ -2349,13 +2915,35 @@
3843    
3844     case 't':
3845     {
3846     - char newtab = optarg[0];
3847     - if (! newtab)
3848     + char newtab[MB_LEN_MAX + 1];
3849     + size_t newtab_length = 1;
3850     + strncpy (newtab, optarg, MB_LEN_MAX);
3851     + if (! newtab[0])
3852     error (SORT_FAILURE, 0, _("empty tab"));
3853     - if (optarg[1])
3854     +#if HAVE_MBRTOWC
3855     + if (MB_CUR_MAX > 1)
3856     + {
3857     + wchar_t wc;
3858     + mbstate_t state;
3859     + size_t i;
3860     +
3861     + memset (&state, '\0', sizeof (mbstate_t));
3862     + newtab_length = mbrtowc (&wc, newtab, strnlen (newtab,
3863     + MB_LEN_MAX),
3864     + &state);
3865     + switch (newtab_length)
3866     + {
3867     + case (size_t) -1:
3868     + case (size_t) -2:
3869     + case 0:
3870     + newtab_length = 1;
3871     + }
3872     + }
3873     +#endif
3874     + if (newtab_length == 1 && optarg[1])
3875     {
3876     if (STREQ (optarg, "\\0"))
3877     - newtab = '\0';
3878     + newtab[0] = '\0';
3879     else
3880     {
3881     /* Provoke with `sort -txx'. Complain about
3882     @@ -2366,9 +2954,12 @@
3883     quote (optarg));
3884     }
3885     }
3886     - if (tab != TAB_DEFAULT && tab != newtab)
3887     + if (tab_length
3888     + && (tab_length != newtab_length
3889     + || memcmp (tab, newtab, tab_length) != 0))
3890     error (SORT_FAILURE, 0, _("incompatible tabs"));
3891     - tab = newtab;
3892     + memcpy (tab, newtab, newtab_length);
3893     + tab_length = newtab_length;
3894     }
3895     break;
3896    
3897     --- /dev/null 2005-10-10 09:36:06.437701000 +0100
3898     +++ coreutils-5.93/tests/sort/sort-mb-tests 2005-12-23 08:53:01.000000000 +0000
3899     @@ -0,0 +1,58 @@
3900     +#! /bin/sh
3901     +case $# in
3902     + 0) xx='../../src/sort';;
3903     + *) xx="$1";;
3904     +esac
3905     +test "$VERBOSE" && echo=echo || echo=:
3906     +$echo testing program: $xx
3907     +errors=0
3908     +test "$srcdir" || srcdir=.
3909     +test "$VERBOSE" && $xx --version 2> /dev/null
3910     +
3911     +export LC_ALL=en_US.UTF-8
3912     +locale -k LC_CTYPE 2>&1 | grep -q charmap.*UTF-8 || exit 77
3913     +errors=0
3914     +
3915     +$xx -t @ -k2 -n mb1.I > mb1.O
3916     +code=$?
3917     +if test $code != 0; then
3918     + $echo "Test mb1 failed: $xx return code $code differs from expected value 0" 1>&2
3919     + errors=`expr $errors + 1`
3920     +else
3921     + cmp mb1.O $srcdir/mb1.X > /dev/null 2>&1
3922     + case $? in
3923     + 0) if test "$VERBOSE"; then $echo "passed mb1"; fi;;
3924     + 1) $echo "Test mb1 failed: files mb1.O and $srcdir/mb1.X differ" 1>&2
3925     + (diff -c mb1.O $srcdir/mb1.X) 2> /dev/null
3926     + errors=`expr $errors + 1`;;
3927     + 2) $echo "Test mb1 may have failed." 1>&2
3928     + $echo The command "cmp mb1.O $srcdir/mb1.X" failed. 1>&2
3929     + errors=`expr $errors + 1`;;
3930     + esac
3931     +fi
3932     +
3933     +$xx -t @ -k4 -n mb2.I > mb2.O
3934     +code=$?
3935     +if test $code != 0; then
3936     + $echo "Test mb2 failed: $xx return code $code differs from expected value 0" 1>&2
3937     + errors=`expr $errors + 1`
3938     +else
3939     + cmp mb2.O $srcdir/mb2.X > /dev/null 2>&1
3940     + case $? in
3941     + 0) if test "$VERBOSE"; then $echo "passed mb2"; fi;;
3942     + 1) $echo "Test mb2 failed: files mb2.O and $srcdir/mb2.X differ" 1>&2
3943     + (diff -c mb2.O $srcdir/mb2.X) 2> /dev/null
3944     + errors=`expr $errors + 1`;;
3945     + 2) $echo "Test mb2 may have failed." 1>&2
3946     + $echo The command "cmp mb2.O $srcdir/mb2.X" failed. 1>&2
3947     + errors=`expr $errors + 1`;;
3948     + esac
3949     +fi
3950     +
3951     +if test $errors = 0; then
3952     + $echo Passed all 113 tests. 1>&2
3953     +else
3954     + $echo Failed $errors tests. 1>&2
3955     +fi
3956     +test $errors = 0 || errors=1
3957     +exit $errors
3958     --- /dev/null 2005-10-10 09:36:06.437701000 +0100
3959     +++ coreutils-5.93/tests/sort/mb1.I 2005-12-23 08:53:01.000000000 +0000
3960     @@ -0,0 +1,4 @@
3961     +Apple@10
3962     +Banana@5
3963     +Citrus@20
3964     +Cherry@30
3965     --- /dev/null 2005-10-10 09:36:06.437701000 +0100
3966     +++ coreutils-5.93/tests/sort/mb2.I 2005-12-23 08:53:01.000000000 +0000
3967     @@ -0,0 +1,4 @@
3968     +Apple@AA10@@20
3969     +Banana@AA5@@30
3970     +Citrus@AA20@@5
3971     +Cherry@AA30@@10
3972     --- /dev/null 2005-10-10 09:36:06.437701000 +0100
3973     +++ coreutils-5.93/tests/sort/mb1.X 2005-12-23 08:53:01.000000000 +0000
3974     @@ -0,0 +1,4 @@
3975     +Banana@5
3976     +Apple@10
3977     +Citrus@20
3978     +Cherry@30
3979     --- /dev/null 2005-10-10 09:36:06.437701000 +0100
3980     +++ coreutils-5.93/tests/sort/mb2.X 2005-12-23 08:53:01.000000000 +0000
3981     @@ -0,0 +1,4 @@
3982     +Citrus@AA20@@5
3983     +Cherry@AA30@@10
3984     +Apple@AA10@@20
3985     +Banana@AA5@@30
3986     --- coreutils-5.93/tests/sort/Makefile.am.i18n 2005-10-24 22:02:25.000000000 +0100
3987     +++ coreutils-5.93/tests/sort/Makefile.am 2005-12-23 08:53:01.000000000 +0000
3988     @@ -43,14 +43,16 @@
3989     nul-nls.E use-nl.O use-nl.E o2.O o2.E nul-tab.O nul-tab.E
3990     ##test-files-end
3991    
3992     -EXTRA_DIST = Test.pm $x-tests $(explicit) $(maint_gen)
3993     -noinst_SCRIPTS = $x-tests
3994     +run_gen += mb1.O mb2.O
3995     +
3996     +EXTRA_DIST = Test.pm $x-tests $(explicit) $(maint_gen) mb1.I mb1.X mb2.I mb2.X
3997     +noinst_SCRIPTS = $x-tests # $x-mb-tests
3998     TESTS_ENVIRONMENT = \
3999     PATH="`pwd`/../../src$(PATH_SEPARATOR)$$PATH"
4000    
4001     editpl = sed -e 's,@''PERL''@,$(PERL),g' -e 's,@''srcdir''@,$(srcdir),g'
4002    
4003     -TESTS = $x-tests
4004     +TESTS = $x-tests $x-mb-tests
4005    
4006     mk_script = $(srcdir)/../mk-script
4007     $(srcdir)/$x-tests: $(mk_script) Test.pm Makefile.am
4008     --- coreutils-5.93/tests/sort/Makefile.in.i18n 2005-11-05 22:12:25.000000000 +0000
4009     +++ coreutils-5.93/tests/sort/Makefile.in 2005-12-23 09:00:37.000000000 +0000
4010     @@ -107,25 +107,25 @@
4011     $(top_srcdir)/m4/onceonly_2_57.m4 $(top_srcdir)/m4/openat.m4 \
4012     $(top_srcdir)/m4/pathmax.m4 $(top_srcdir)/m4/perl.m4 \
4013     $(top_srcdir)/m4/physmem.m4 $(top_srcdir)/m4/po.m4 \
4014     - $(top_srcdir)/m4/posixtm.m4 $(top_srcdir)/m4/posixver.m4 \
4015     - $(top_srcdir)/m4/prereq.m4 $(top_srcdir)/m4/progtest.m4 \
4016     - $(top_srcdir)/m4/putenv.m4 $(top_srcdir)/m4/quote.m4 \
4017     - $(top_srcdir)/m4/quotearg.m4 $(top_srcdir)/m4/readlink.m4 \
4018     - $(top_srcdir)/m4/readtokens.m4 $(top_srcdir)/m4/readutmp.m4 \
4019     - $(top_srcdir)/m4/regex.m4 $(top_srcdir)/m4/rename.m4 \
4020     - $(top_srcdir)/m4/restrict.m4 $(top_srcdir)/m4/rmdir-errno.m4 \
4021     - $(top_srcdir)/m4/rmdir.m4 $(top_srcdir)/m4/root-dev-ino.m4 \
4022     - $(top_srcdir)/m4/rpmatch.m4 $(top_srcdir)/m4/safe-read.m4 \
4023     - $(top_srcdir)/m4/safe-write.m4 $(top_srcdir)/m4/same.m4 \
4024     - $(top_srcdir)/m4/save-cwd.m4 $(top_srcdir)/m4/savedir.m4 \
4025     - $(top_srcdir)/m4/setenv.m4 $(top_srcdir)/m4/settime.m4 \
4026     - $(top_srcdir)/m4/sha1.m4 $(top_srcdir)/m4/sig2str.m4 \
4027     - $(top_srcdir)/m4/signed.m4 $(top_srcdir)/m4/socklen.m4 \
4028     - $(top_srcdir)/m4/sockpfaf.m4 $(top_srcdir)/m4/ssize_t.m4 \
4029     - $(top_srcdir)/m4/st_dm_mode.m4 $(top_srcdir)/m4/stat-macros.m4 \
4030     - $(top_srcdir)/m4/stat-prog.m4 $(top_srcdir)/m4/stat-time.m4 \
4031     - $(top_srcdir)/m4/stdbool.m4 $(top_srcdir)/m4/stdint_h.m4 \
4032     - $(top_srcdir)/m4/stdio-safer.m4 \
4033     + $(top_srcdir)/m4/posix_acl.m4 $(top_srcdir)/m4/posixtm.m4 \
4034     + $(top_srcdir)/m4/posixver.m4 $(top_srcdir)/m4/prereq.m4 \
4035     + $(top_srcdir)/m4/progtest.m4 $(top_srcdir)/m4/putenv.m4 \
4036     + $(top_srcdir)/m4/quote.m4 $(top_srcdir)/m4/quotearg.m4 \
4037     + $(top_srcdir)/m4/readlink.m4 $(top_srcdir)/m4/readtokens.m4 \
4038     + $(top_srcdir)/m4/readutmp.m4 $(top_srcdir)/m4/regex.m4 \
4039     + $(top_srcdir)/m4/rename.m4 $(top_srcdir)/m4/restrict.m4 \
4040     + $(top_srcdir)/m4/rmdir-errno.m4 $(top_srcdir)/m4/rmdir.m4 \
4041     + $(top_srcdir)/m4/root-dev-ino.m4 $(top_srcdir)/m4/rpmatch.m4 \
4042     + $(top_srcdir)/m4/safe-read.m4 $(top_srcdir)/m4/safe-write.m4 \
4043     + $(top_srcdir)/m4/same.m4 $(top_srcdir)/m4/save-cwd.m4 \
4044     + $(top_srcdir)/m4/savedir.m4 $(top_srcdir)/m4/setenv.m4 \
4045     + $(top_srcdir)/m4/settime.m4 $(top_srcdir)/m4/sha1.m4 \
4046     + $(top_srcdir)/m4/sig2str.m4 $(top_srcdir)/m4/signed.m4 \
4047     + $(top_srcdir)/m4/socklen.m4 $(top_srcdir)/m4/sockpfaf.m4 \
4048     + $(top_srcdir)/m4/ssize_t.m4 $(top_srcdir)/m4/st_dm_mode.m4 \
4049     + $(top_srcdir)/m4/stat-macros.m4 $(top_srcdir)/m4/stat-prog.m4 \
4050     + $(top_srcdir)/m4/stat-time.m4 $(top_srcdir)/m4/stdbool.m4 \
4051     + $(top_srcdir)/m4/stdint_h.m4 $(top_srcdir)/m4/stdio-safer.m4 \
4052     $(top_srcdir)/m4/stdlib-safer.m4 $(top_srcdir)/m4/stpcpy.m4 \
4053     $(top_srcdir)/m4/strcase.m4 $(top_srcdir)/m4/strcspn.m4 \
4054     $(top_srcdir)/m4/strdup.m4 $(top_srcdir)/m4/strftime.m4 \
4055     @@ -196,7 +196,6 @@
4056     GLIBC21 = @GLIBC21@
4057     GMSGFMT = @GMSGFMT@
4058     GNU_PACKAGE = @GNU_PACKAGE@
4059     -GREP = @GREP@
4060     HAVE__BOOL = @HAVE__BOOL@
4061     HELP2MAN = @HELP2MAN@
4062     INSTALL_DATA = @INSTALL_DATA@
4063     @@ -207,6 +206,7 @@
4064     INTL_MACOSX_LIBS = @INTL_MACOSX_LIBS@
4065     KMEM_GROUP = @KMEM_GROUP@
4066     LDFLAGS = @LDFLAGS@
4067     +LIBACL = @LIBACL@
4068     LIBICONV = @LIBICONV@
4069     LIBINTL = @LIBINTL@
4070     LIBOBJS = @LIBOBJS@
4071     @@ -217,6 +217,8 @@
4072     LIB_FDATASYNC = @LIB_FDATASYNC@
4073     LIB_GETHRXTIME = @LIB_GETHRXTIME@
4074     LIB_NANOSLEEP = @LIB_NANOSLEEP@
4075     +LIB_PAM = @LIB_PAM@
4076     +LIB_SELINUX = @LIB_SELINUX@
4077     LN_S = @LN_S@
4078     LTLIBICONV = @LTLIBICONV@
4079     LTLIBINTL = @LTLIBINTL@
4080     @@ -268,30 +270,23 @@
4081     build_os = @build_os@
4082     build_vendor = @build_vendor@
4083     datadir = @datadir@
4084     -datarootdir = @datarootdir@
4085     -docdir = @docdir@
4086     -dvidir = @dvidir@
4087     exec_prefix = @exec_prefix@
4088     host = @host@
4089     host_alias = @host_alias@
4090     host_cpu = @host_cpu@
4091     host_os = @host_os@
4092     host_vendor = @host_vendor@
4093     -htmldir = @htmldir@
4094     includedir = @includedir@
4095     infodir = @infodir@
4096     install_sh = @install_sh@
4097     libdir = @libdir@
4098     libexecdir = @libexecdir@
4099     -localedir = @localedir@
4100     localstatedir = @localstatedir@
4101     mandir = @mandir@
4102     mkdir_p = @mkdir_p@
4103     oldincludedir = @oldincludedir@
4104     -pdfdir = @pdfdir@
4105     prefix = @prefix@
4106     program_transform_name = @program_transform_name@
4107     -psdir = @psdir@
4108     sbindir = @sbindir@
4109     sharedstatedir = @sharedstatedir@
4110     sysconfdir = @sysconfdir@
4111     @@ -318,33 +313,37 @@
4112     neg-nls.I neg-nls.X nul-nls.I nul-nls.X use-nl.I use-nl.X o2.I o2.X nul-tab.I \
4113     nul-tab.X
4114    
4115     -run_gen = n1.O n1.E n2.O n2.E n3.O n3.E n4.O n4.E n5.O n5.E n6.O n6.E n7.O \
4116     -n7.E n8a.O n8a.E n8b.O n8b.E n9a.O n9a.E n9b.O n9b.E n10a.O n10a.E n10b.O \
4117     -n10b.E n11a.O n11a.E n11b.O n11b.E 01a.O 01a.E 02a.O 02a.E 02b.O 02b.E 02c.O \
4118     -02c.E 02m.O 02m.E 02n.O 02n.E 02o.O 02o.E 02p.O 02p.E 03a.O 03a.E 03b.O 03b.E \
4119     -03c.O 03c.E 03d.O 03d.E 03e.O 03e.E 03f.O 03f.E 03g.O 03g.E 03h.O 03h.E 03i.O \
4120     -03i.E 04a.O 04a.E 04b.O 04b.E 04c.O 04c.E 04d.O 04d.E 04e.O 04e.E 05a.O 05a.E \
4121     -05b.O 05b.E 05c.O 05c.E 05d.O 05d.E 05e.O 05e.E 05f.O 05f.E 06a.O 06a.E 06b.O \
4122     -06b.E 06c.O 06c.E 06d.O 06d.E 06e.O 06e.E 06f.O 06f.E 07a.O 07a.E 07b.O 07b.E \
4123     -07c.O 07c.E 07d.O 07d.E 08a.O 08a.E 08b.O 08b.E 09a.O 09a.E 09b.O 09b.E 09c.O \
4124     -09c.E 09d.O 09d.E 10a.O 10a.E 10b.O 10b.E 10c.O 10c.E 10d.O 10d.E 10a0.O \
4125     -10a0.E 10a1.O 10a1.E 10a2.O 10a2.E 10e.O 10e.E 10f.O 10f.E 10g.O 10g.E 11a.O \
4126     -11a.E 11b.O 11b.E 11c.O 11c.E 11d.O 11d.E 12a.O 12a.E 12b.O 12b.E 12c.O 12c.E \
4127     -12d.O 12d.E 13a.O 13a.E 13b.O 13b.E 14a.O 14a.E 14b.O 14b.E 15a.O 15a.E 15b.O \
4128     -15b.E 15c.O 15c.E 15d.O 15d.E 15e.O 15e.E 16a.O 16a.E 17.O 17.E 18a.O 18a.E \
4129     -18b.O 18b.E 18c.O 18c.E 18d.O 18d.E 18e.O 18e.E 19a.O 19a.E 19b.O 19b.E 20a.O \
4130     -20a.E 21a.O 21a.E 21b.O 21b.E 21c.O 21c.E 21d.O 21d.E 21e.O 21e.E 21f.O 21f.E \
4131     -21g.O 21g.E 22a.O 22a.E 22b.O 22b.E no-file1.O no-file1.E o-no-file1.O \
4132     -o-no-file1.E create-empty.O create-empty.E neg-nls.O neg-nls.E nul-nls.O \
4133     -nul-nls.E use-nl.O use-nl.E o2.O o2.E nul-tab.O nul-tab.E
4134     -
4135     -EXTRA_DIST = Test.pm $x-tests $(explicit) $(maint_gen)
4136     -noinst_SCRIPTS = $x-tests
4137     +run_gen = n1.O n1.E n2.O n2.E n3.O n3.E n4.O n4.E n5.O n5.E n6.O n6.E \
4138     + n7.O n7.E n8a.O n8a.E n8b.O n8b.E n9a.O n9a.E n9b.O n9b.E \
4139     + n10a.O n10a.E n10b.O n10b.E n11a.O n11a.E n11b.O n11b.E 01a.O \
4140     + 01a.E 02a.O 02a.E 02b.O 02b.E 02c.O 02c.E 02m.O 02m.E 02n.O \
4141     + 02n.E 02o.O 02o.E 02p.O 02p.E 03a.O 03a.E 03b.O 03b.E 03c.O \
4142     + 03c.E 03d.O 03d.E 03e.O 03e.E 03f.O 03f.E 03g.O 03g.E 03h.O \
4143     + 03h.E 03i.O 03i.E 04a.O 04a.E 04b.O 04b.E 04c.O 04c.E 04d.O \
4144     + 04d.E 04e.O 04e.E 05a.O 05a.E 05b.O 05b.E 05c.O 05c.E 05d.O \
4145     + 05d.E 05e.O 05e.E 05f.O 05f.E 06a.O 06a.E 06b.O 06b.E 06c.O \
4146     + 06c.E 06d.O 06d.E 06e.O 06e.E 06f.O 06f.E 07a.O 07a.E 07b.O \
4147     + 07b.E 07c.O 07c.E 07d.O 07d.E 08a.O 08a.E 08b.O 08b.E 09a.O \
4148     + 09a.E 09b.O 09b.E 09c.O 09c.E 09d.O 09d.E 10a.O 10a.E 10b.O \
4149     + 10b.E 10c.O 10c.E 10d.O 10d.E 10a0.O 10a0.E 10a1.O 10a1.E \
4150     + 10a2.O 10a2.E 10e.O 10e.E 10f.O 10f.E 10g.O 10g.E 11a.O 11a.E \
4151     + 11b.O 11b.E 11c.O 11c.E 11d.O 11d.E 12a.O 12a.E 12b.O 12b.E \
4152     + 12c.O 12c.E 12d.O 12d.E 13a.O 13a.E 13b.O 13b.E 14a.O 14a.E \
4153     + 14b.O 14b.E 15a.O 15a.E 15b.O 15b.E 15c.O 15c.E 15d.O 15d.E \
4154     + 15e.O 15e.E 16a.O 16a.E 17.O 17.E 18a.O 18a.E 18b.O 18b.E \
4155     + 18c.O 18c.E 18d.O 18d.E 18e.O 18e.E 19a.O 19a.E 19b.O 19b.E \
4156     + 20a.O 20a.E 21a.O 21a.E 21b.O 21b.E 21c.O 21c.E 21d.O 21d.E \
4157     + 21e.O 21e.E 21f.O 21f.E 21g.O 21g.E 22a.O 22a.E 22b.O 22b.E \
4158     + no-file1.O no-file1.E o-no-file1.O o-no-file1.E create-empty.O \
4159     + create-empty.E neg-nls.O neg-nls.E nul-nls.O nul-nls.E \
4160     + use-nl.O use-nl.E o2.O o2.E nul-tab.O nul-tab.E mb1.O mb2.O
4161     +EXTRA_DIST = Test.pm $x-tests $(explicit) $(maint_gen) mb1.I mb1.X mb2.I mb2.X
4162     +noinst_SCRIPTS = $x-tests # $x-mb-tests
4163     TESTS_ENVIRONMENT = \
4164     PATH="`pwd`/../../src$(PATH_SEPARATOR)$$PATH"
4165    
4166     editpl = sed -e 's,@''PERL''@,$(PERL),g' -e 's,@''srcdir''@,$(srcdir),g'
4167     -TESTS = $x-tests
4168     +TESTS = $x-tests $x-mb-tests
4169     mk_script = $(srcdir)/../mk-script
4170     MAINTAINERCLEANFILES = $x-tests $(maint_gen)
4171     CLEANFILES = $(run_gen)