Magellan Linux

Diff of /trunk/mkinitrd-magellan/busybox/coreutils/tr.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 983 by niro, Fri Apr 24 18:33:46 2009 UTC revision 984 by niro, Sun May 30 11:32:42 2010 UTC
# Line 16  Line 16 
16   * Licensed under GPLv2 or later, see file LICENSE in this tarball for details.   * Licensed under GPLv2 or later, see file LICENSE in this tarball for details.
17   */   */
18  /* http://www.opengroup.org/onlinepubs/009695399/utilities/tr.html  /* http://www.opengroup.org/onlinepubs/009695399/utilities/tr.html
19   * TODO: xdigit, graph, print   * TODO: graph, print
20   */   */
21  #include "libbb.h"  #include "libbb.h"
22    
23  #define ASCII 0377  enum {
24     ASCII = 256,
25     /* string buffer needs to be at least as big as the whole "alphabet".
26     * BUFSIZ == ASCII is ok, but we will realloc in expand
27     * even for smallest patterns, let's avoid that by using *2:
28     */
29     TR_BUFSIZ = (BUFSIZ > ASCII*2) ? BUFSIZ : ASCII*2,
30    };
31    
32  static void map(char *pvector,  static void map(char *pvector,
33   unsigned char *string1, unsigned int string1_len,   char *string1, unsigned string1_len,
34   unsigned char *string2, unsigned int string2_len)   char *string2, unsigned string2_len)
35  {  {
36   char last = '0';   char last = '0';
37   unsigned int i, j;   unsigned i, j;
38    
39   for (j = 0, i = 0; i < string1_len; i++) {   for (j = 0, i = 0; i < string1_len; i++) {
40   if (string2_len <= j)   if (string2_len <= j)
41   pvector[string1[i]] = last;   pvector[(unsigned char)(string1[i])] = last;
42   else   else
43   pvector[string1[i]] = last = string2[j++];   pvector[(unsigned char)(string1[i])] = last = string2[j++];
44   }   }
45  }  }
46    
47  /* supported constructs:  /* supported constructs:
48   *   Ranges,  e.g.,  0-9   ==>  0123456789   *   Ranges,  e.g.,  0-9   ==>  0123456789
  *   Ranges,  e.g.,  [0-9] ==>  0123456789  
49   *   Escapes, e.g.,  \a    ==>  Control-G   *   Escapes, e.g.,  \a    ==>  Control-G
50   *   Character classes, e.g. [:upper:] ==> A...Z   *   Character classes, e.g. [:upper:] ==> A...Z
51   *   Equiv classess, e.g. [=A=] ==> A   (hmmmmmmm?)   *   Equiv classess, e.g. [=A=] ==> A   (hmmmmmmm?)
52     * not supported:
53     *   \ooo-\ooo - octal ranges
54     *   [x*N] - repeat char x N times
55     *   [x*] - repeat char x until it fills STRING2:
56     * # echo qwe123 | /usr/bin/tr 123456789 '[d]'
57     * qwe[d]
58     * # echo qwe123 | /usr/bin/tr 123456789 '[d*]'
59     * qweddd
60   */   */
61  static unsigned int expand(const char *arg, char *buffer)  static unsigned expand(const char *arg, char **buffer_p)
62  {  {
63   char *buffer_start = buffer;   char *buffer = *buffer_p;
64     unsigned pos = 0;
65     unsigned size = TR_BUFSIZ;
66   unsigned i; /* can't be unsigned char: must be able to hold 256 */   unsigned i; /* can't be unsigned char: must be able to hold 256 */
67   unsigned char ac;   unsigned char ac;
68    
69   while (*arg) {   while (*arg) {
70     if (pos + ASCII > size) {
71     size += ASCII;
72     *buffer_p = buffer = xrealloc(buffer, size);
73     }
74   if (*arg == '\\') {   if (*arg == '\\') {
75   arg++;   arg++;
76   *buffer++ = bb_process_escape_sequence(&arg);   buffer[pos++] = bb_process_escape_sequence(&arg);
77   continue;   continue;
78   }   }
79   if (arg[1] == '-') { /* "0-9..." */   if (arg[1] == '-') { /* "0-9..." */
80   ac = arg[2];   ac = arg[2];
81   if (ac == '\0') { /* "0-": copy verbatim */   if (ac == '\0') { /* "0-": copy verbatim */
82   *buffer++ = *arg++; /* copy '0' */   buffer[pos++] = *arg++; /* copy '0' */
83   continue; /* next iter will copy '-' and stop */   continue; /* next iter will copy '-' and stop */
84   }   }
85   i = *arg;   i = (unsigned char) *arg;
86   while (i <= ac) /* ok: i is unsigned _int_ */   while (i <= ac) /* ok: i is unsigned _int_ */
87   *buffer++ = i++;   buffer[pos++] = i++;
88   arg += 3; /* skip 0-9 */   arg += 3; /* skip 0-9 */
89   continue;   continue;
90   }   }
91   if (*arg == '[') { /* "[xyz..." */   if ((ENABLE_FEATURE_TR_CLASSES || ENABLE_FEATURE_TR_EQUIV)
92     && *arg == '['
93     ) {
94   arg++;   arg++;
95   i = *arg++;   i = (unsigned char) *arg++;
96   /* "[xyz...", i=x, arg points to y */   /* "[xyz...". i=x, arg points to y */
97   if (ENABLE_FEATURE_TR_CLASSES && i == ':') {   if (ENABLE_FEATURE_TR_CLASSES && i == ':') { /* [:class:] */
98  #define CLO ":]\0"  #define CLO ":]\0"
99   static const char classes[] ALIGN1 =   static const char classes[] ALIGN1 =
100   "alpha"CLO "alnum"CLO "digit"CLO   "alpha"CLO "alnum"CLO "digit"CLO
101   "lower"CLO "upper"CLO "space"CLO   "lower"CLO "upper"CLO "space"CLO
102   "blank"CLO "punct"CLO "cntrl"CLO;   "blank"CLO "punct"CLO "cntrl"CLO
103  #define CLASS_invalid 0 /* we increment the retval */   "xdigit"CLO;
104  #define CLASS_alpha 1   enum {
105  #define CLASS_alnum 2   CLASS_invalid = 0, /* we increment the retval */
106  #define CLASS_digit 3   CLASS_alpha = 1,
107  #define CLASS_lower 4   CLASS_alnum = 2,
108  #define CLASS_upper 5   CLASS_digit = 3,
109  #define CLASS_space 6   CLASS_lower = 4,
110  #define CLASS_blank 7   CLASS_upper = 5,
111  #define CLASS_punct 8   CLASS_space = 6,
112  #define CLASS_cntrl 9   CLASS_blank = 7,
113  //#define CLASS_xdigit 10   CLASS_punct = 8,
114  //#define CLASS_graph 11   CLASS_cntrl = 9,
115  //#define CLASS_print 12   CLASS_xdigit = 10,
116     //CLASS_graph = 11,
117     //CLASS_print = 12,
118     };
119   smalluint j;   smalluint j;
120   { /* not really pretty.. */   char *tmp;
121   char *tmp = xstrndup(arg, 7); // warning: xdigit would need 8, not 7  
122   j = index_in_strings(classes, tmp) + 1;   /* xdigit needs 8, not 7 */
123   free(tmp);   i = 7 + (arg[0] == 'x');
124   }   tmp = xstrndup(arg, i);
125   if (j == CLASS_alnum || j == CLASS_digit) {   j = index_in_strings(classes, tmp) + 1;
126     free(tmp);
127    
128     if (j == CLASS_invalid)
129     goto skip_bracket;
130    
131     arg += i;
132     if (j == CLASS_alnum || j == CLASS_digit || j == CLASS_xdigit) {
133   for (i = '0'; i <= '9'; i++)   for (i = '0'; i <= '9'; i++)
134   *buffer++ = i;   buffer[pos++] = i;
135   }   }
136   if (j == CLASS_alpha || j == CLASS_alnum || j == CLASS_upper) {   if (j == CLASS_alpha || j == CLASS_alnum || j == CLASS_upper) {
137   for (i = 'A'; i <= 'Z'; i++)   for (i = 'A'; i <= 'Z'; i++)
138   *buffer++ = i;   buffer[pos++] = i;
139   }   }
140   if (j == CLASS_alpha || j == CLASS_alnum || j == CLASS_lower) {   if (j == CLASS_alpha || j == CLASS_alnum || j == CLASS_lower) {
141   for (i = 'a'; i <= 'z'; i++)   for (i = 'a'; i <= 'z'; i++)
142   *buffer++ = i;   buffer[pos++] = i;
143   }   }
144   if (j == CLASS_space || j == CLASS_blank) {   if (j == CLASS_space || j == CLASS_blank) {
145   *buffer++ = '\t';   buffer[pos++] = '\t';
146   if (j == CLASS_space) {   if (j == CLASS_space) {
147   *buffer++ = '\n';   buffer[pos++] = '\n';
148   *buffer++ = '\v';   buffer[pos++] = '\v';
149   *buffer++ = '\f';   buffer[pos++] = '\f';
150   *buffer++ = '\r';   buffer[pos++] = '\r';
151   }   }
152   *buffer++ = ' ';   buffer[pos++] = ' ';
153   }   }
154   if (j == CLASS_punct || j == CLASS_cntrl) {   if (j == CLASS_punct || j == CLASS_cntrl) {
155   for (i = '\0'; i <= ASCII; i++)   for (i = '\0'; i < ASCII; i++) {
156   if ((j == CLASS_punct && isprint(i) && !isalnum(i) && !isspace(i))   if ((j == CLASS_punct && isprint_asciionly(i) && !isalnum(i) && !isspace(i))
157   || (j == CLASS_cntrl && iscntrl(i)))   || (j == CLASS_cntrl && iscntrl(i))
158   *buffer++ = i;   ) {
159   }   buffer[pos++] = i;
160   if (j == CLASS_invalid) {   }
161   *buffer++ = '[';   }
  *buffer++ = ':';  
  continue;  
162   }   }
163   break;   if (j == CLASS_xdigit) {
164     for (i = 'A'; i <= 'F'; i++) {
165     buffer[pos + 6] = i | 0x20;
166     buffer[pos++] = i;
167     }
168     pos += 6;
169     }
170     continue;
171   }   }
172   /* "[xyz...", i=x, arg points to y */   /* "[xyz...", i=x, arg points to y */
173   if (ENABLE_FEATURE_TR_EQUIV && i == '=') { /* [=CHAR=] */   if (ENABLE_FEATURE_TR_EQUIV && i == '=') { /* [=CHAR=] */
174   *buffer++ = *arg; /* copy CHAR */   buffer[pos++] = *arg; /* copy CHAR */
175   if (!*arg || arg[1] != '=' || arg[2] != ']')   if (!arg[0] || arg[1] != '=' || arg[2] != ']')
176   bb_show_usage();   bb_show_usage();
177   arg += 3; /* skip CHAR=] */   arg += 3; /* skip CHAR=] */
178   continue;   continue;
179   }   }
180   if (i == '\0' || *arg != '-') { /* not [x-...] - copy verbatim */   /* The rest of "[xyz..." cases is treated as normal
181   *buffer++ = '[';   * string, "[" has no special meaning here:
182   arg--; /* points to x */   * tr "[a-z]" "[A-Z]" can be written as tr "a-z" "A-Z",
183   continue; /* copy all, including eventual ']' */   * also try tr "[a-z]" "_A-Z+" and you'll see that
184   }   * [] is not special here.
185   /* [x-z] */   */
186   arg++; /* skip - */   skip_bracket:
187   if (arg[0] == '\0' || arg[1] != ']')   arg -= 2; /* points to "[" in "[xyz..." */
  bb_show_usage();  
  ac = *arg++;  
  while (i <= ac)  
  *buffer++ = i++;  
  arg++; /* skip ] */  
  continue;  
188   }   }
189   *buffer++ = *arg++;   buffer[pos++] = *arg++;
190   }   }
191   return (buffer - buffer_start);   return pos;
192  }  }
193    
194    /* NB: buffer is guaranteed to be at least TR_BUFSIZE
195     * (which is >= ASCII) big.
196     */
197  static int complement(char *buffer, int buffer_len)  static int complement(char *buffer, int buffer_len)
198  {  {
199   int i, j, ix;   int len;
200   char conv[ASCII + 2];   char conv[ASCII];
201     unsigned char ch;
202   ix = 0;  
203   for (i = '\0'; i <= ASCII; i++) {   len = 0;
204   for (j = 0; j < buffer_len; j++)   ch = '\0';
205   if (buffer[j] == i)   while (1) {
206   break;   if (memchr(buffer, ch, buffer_len) == NULL)
207   if (j == buffer_len)   conv[len++] = ch;
208   conv[ix++] = i & ASCII;   if (++ch == '\0')
209     break;
210   }   }
211   memcpy(buffer, conv, ix);   memcpy(buffer, conv, len);
212   return ix;   return len;
213  }  }
214    
215  int tr_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;  int tr_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
216  int tr_main(int argc UNUSED_PARAM, char **argv)  int tr_main(int argc UNUSED_PARAM, char **argv)
217  {  {
  int output_length = 0, input_length;  
218   int i;   int i;
219   smalluint flags;   smalluint opts;
220   ssize_t read_chars = 0;   ssize_t read_chars;
221   size_t in_index = 0, out_index = 0;   size_t in_index, out_index;
222   unsigned last = UCHAR_MAX + 1; /* not equal to any char */   unsigned last = UCHAR_MAX + 1; /* not equal to any char */
223   unsigned char coded, c;   unsigned char coded, c;
224   unsigned char *output = xmalloc(BUFSIZ);   char *str1 = xmalloc(TR_BUFSIZ);
225   char *vector = xzalloc((ASCII+1) * 3);   char *str2 = xmalloc(TR_BUFSIZ);
226   char *invec  = vector + (ASCII+1);   int str2_length;
227   char *outvec = vector + (ASCII+1) * 2;   int str1_length;
228     char *vector = xzalloc(ASCII * 3);
229  #define TR_OPT_complement (1 << 0)   char *invec  = vector + ASCII;
230  #define TR_OPT_delete (1 << 1)   char *outvec = vector + ASCII * 2;
231  #define TR_OPT_squeeze_reps (1 << 2)  
232    #define TR_OPT_complement (3 << 0)
233   flags = getopt32(argv, "+cds"); /* '+': stop at first non-option */  #define TR_OPT_delete (1 << 2)
234   argv += optind;  #define TR_OPT_squeeze_reps (1 << 3)
235    
236   for (i = 0; i <= ASCII; i++) {   for (i = 0; i < ASCII; i++) {
237   vector[i] = i;   vector[i] = i;
238   /*invec[i] = outvec[i] = FALSE; - done by xzalloc */   /*invec[i] = outvec[i] = FALSE; - done by xzalloc */
239   }   }
240    
241  #define tr_buf bb_common_bufsiz1   /* -C/-c difference is that -C complements "characters",
242   if (*argv != NULL) {   * and -c complements "values" (binary bytes I guess).
243   input_length = expand(*argv++, tr_buf);   * In POSIX locale, these are the same.
244   if (flags & TR_OPT_complement)   */
245   input_length = complement(tr_buf, input_length);  
246   if (*argv) {   opt_complementary = "-1";
247   if (argv[0][0] == '\0')   opts = getopt32(argv, "+Ccds"); /* '+': stop at first non-option */
248   bb_error_msg_and_die("STRING2 cannot be empty");   argv += optind;
249   output_length = expand(*argv, (char *)output);  
250   map(vector, (unsigned char *)tr_buf, input_length, output, output_length);   str1_length = expand(*argv++, &str1);
251   }   str2_length = 0;
252   for (i = 0; i < input_length; i++)   if (opts & TR_OPT_complement)
253   invec[(unsigned char)tr_buf[i]] = TRUE;   str1_length = complement(str1, str1_length);
254   for (i = 0; i < output_length; i++)   if (*argv) {
255   outvec[output[i]] = TRUE;   if (argv[0][0] == '\0')
256     bb_error_msg_and_die("STRING2 cannot be empty");
257     str2_length = expand(*argv, &str2);
258     map(vector, str1, str1_length,
259     str2, str2_length);
260   }   }
261     for (i = 0; i < str1_length; i++)
262     invec[(unsigned char)(str1[i])] = TRUE;
263     for (i = 0; i < str2_length; i++)
264     outvec[(unsigned char)(str2[i])] = TRUE;
265    
266     goto start_from;
267    
268     /* In this loop, str1 space is reused as input buffer,
269     * str2 - as output one. */
270   for (;;) {   for (;;) {
271   /* If we're out of input, flush output and read more input. */   /* If we're out of input, flush output and read more input. */
272   if ((ssize_t)in_index == read_chars) {   if ((ssize_t)in_index == read_chars) {
273   if (out_index) {   if (out_index) {
274   xwrite(STDOUT_FILENO, (char *)output, out_index);   xwrite(STDOUT_FILENO, str2, out_index);
275     start_from:
276   out_index = 0;   out_index = 0;
277   }   }
278   read_chars = safe_read(STDIN_FILENO, tr_buf, BUFSIZ);   read_chars = safe_read(STDIN_FILENO, str1, TR_BUFSIZ);
279   if (read_chars <= 0) {   if (read_chars <= 0) {
280   if (read_chars < 0)   if (read_chars < 0)
281   bb_perror_msg_and_die(bb_msg_read_error);   bb_perror_msg_and_die(bb_msg_read_error);
282   exit(EXIT_SUCCESS);   break;
283   }   }
284   in_index = 0;   in_index = 0;
285   }   }
286   c = tr_buf[in_index++];   c = str1[in_index++];
287   coded = vector[c];   if ((opts & TR_OPT_delete) && invec[c])
  if ((flags & TR_OPT_delete) && invec[c])  
288   continue;   continue;
289   if ((flags & TR_OPT_squeeze_reps) && last == coded   coded = vector[c];
290   && (invec[c] || outvec[coded]))   if ((opts & TR_OPT_squeeze_reps) && last == coded
291     && (invec[c] || outvec[coded])
292     ) {
293   continue;   continue;
294   output[out_index++] = last = coded;   }
295     str2[out_index++] = last = coded;
296   }   }
297   /* NOTREACHED */  
298   return EXIT_SUCCESS;   return EXIT_SUCCESS;
299  }  }

Legend:
Removed from v.983  
changed lines
  Added in v.984