Magellan Linux

Diff of /trunk/mkinitrd-magellan/busybox/coreutils/tr.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 532 by niro, Sat Sep 1 22:45:15 2007 UTC revision 816 by niro, Fri Apr 24 18:33:46 2009 UTC
# Line 15  Line 15 
15   *   *
16   * Licensed under GPLv2 or later, see file LICENSE in this tarball for details.   * Licensed under GPLv2 or later, see file LICENSE in this tarball for details.
17   */   */
18    /* http://www.opengroup.org/onlinepubs/009695399/utilities/tr.html
19  #include "busybox.h"   * TODO: xdigit, graph, print
20     */
21  // Even with -funsigned-char, gcc still complains about char as an array index.  #include "libbb.h"
   
 #define GCC4_IS_STUPID int  
22    
23  #define ASCII 0377  #define ASCII 0377
24    
25  /* some "globals" shared across this file */  static void map(char *pvector,
26  static char com_fl, del_fl, sq_fl;   unsigned char *string1, unsigned int string1_len,
27  /* these last are pointers to static buffers declared in tr_main */   unsigned char *string2, unsigned int string2_len)
 static char *poutput, *pvector, *pinvec, *poutvec;  
   
 static void convert(void)  
 {  
  int read_chars = 0, in_index = 0, out_index = 0, c, coded, last = -1;  
   
  for (;;) {  
  // If we're out of input, flush output and read more input.  
   
  if (in_index == read_chars) {  
  if (out_index) {  
  if (write(1, (char *) poutput, out_index) != out_index)  
  bb_error_msg_and_die(bb_msg_write_error);  
  out_index = 0;  
  }  
   
  if ((read_chars = read(0, bb_common_bufsiz1, BUFSIZ)) <= 0) {  
  if (write(1, (char *) poutput, out_index) != out_index)  
  bb_error_msg(bb_msg_write_error);  
  exit(0);  
  }  
  in_index = 0;  
  }  
  c = bb_common_bufsiz1[in_index++];  
  coded = pvector[c];  
  if (del_fl && pinvec[c])  
  continue;  
  if (sq_fl && last == coded && (pinvec[c] || poutvec[coded]))  
  continue;  
  poutput[out_index++] = last = coded;  
  }  
   
  /* NOTREACHED */  
 }  
   
 static void map(char *string1, unsigned int string1_len,  
  char *string2, unsigned int string2_len)  
28  {  {
29   char last = '0';   char last = '0';
30   unsigned int i, j;   unsigned int i, j;
31    
32   for (j = 0, i = 0; i < string1_len; i++) {   for (j = 0, i = 0; i < string1_len; i++) {
33   if (string2_len <= j)   if (string2_len <= j)
34   pvector[(GCC4_IS_STUPID)string1[i]] = last;   pvector[string1[i]] = last;
35   else   else
36   pvector[(GCC4_IS_STUPID)string1[i]] = last = string2[j++];   pvector[string1[i]] = last = string2[j++];
37   }   }
38  }  }
39    
40  /* supported constructs:  /* supported constructs:
41   *   Ranges,  e.g.,  [0-9]  ==>  0123456789   *   Ranges,  e.g.,  0-9   ==>  0123456789
42   *   Escapes, e.g.,  \a     ==>  Control-G   *   Ranges,  e.g.,  [0-9] ==>  0123456789
43   * Character classes, e.g. [:upper:] ==> A ... Z   *   Escapes, e.g.,  \a    ==>  Control-G
44     *   Character classes, e.g. [:upper:] ==> A...Z
45     *   Equiv classess, e.g. [=A=] ==> A   (hmmmmmmm?)
46   */   */
47  static unsigned int expand(const char *arg, char *buffer)  static unsigned int expand(const char *arg, char *buffer)
48  {  {
49   char *buffer_start = buffer;   char *buffer_start = buffer;
50   int i, ac;   unsigned i; /* can't be unsigned char: must be able to hold 256 */
51     unsigned char ac;
52    
53   while (*arg) {   while (*arg) {
54   if (*arg == '\\') {   if (*arg == '\\') {
55   arg++;   arg++;
56   *buffer++ = bb_process_escape_sequence(&arg);   *buffer++ = bb_process_escape_sequence(&arg);
57   } else if (*(arg+1) == '-') {   continue;
58   ac = *(arg+2);   }
59   if(ac == 0) {   if (arg[1] == '-') { /* "0-9..." */
60   *buffer++ = *arg++;   ac = arg[2];
61   continue;   if (ac == '\0') { /* "0-": copy verbatim */
62     *buffer++ = *arg++; /* copy '0' */
63     continue; /* next iter will copy '-' and stop */
64   }   }
65   i = *arg;   i = *arg;
66   while (i <= ac)   while (i <= ac) /* ok: i is unsigned _int_ */
67   *buffer++ = i++;   *buffer++ = i++;
68   arg += 3; /* Skip the assumed a-z */   arg += 3; /* skip 0-9 */
69   } else if (*arg == '[') {   continue;
70     }
71     if (*arg == '[') { /* "[xyz..." */
72   arg++;   arg++;
73   i = *arg++;   i = *arg++;
74     /* "[xyz...", i=x, arg points to y */
75   if (ENABLE_FEATURE_TR_CLASSES && i == ':') {   if (ENABLE_FEATURE_TR_CLASSES && i == ':') {
76   if (strncmp(arg, "alpha", 5) == 0) {  #define CLO ":]\0"
77   for (i = 'A'; i <= 'Z'; i++)   static const char classes[] ALIGN1 =
78   *buffer++ = i;   "alpha"CLO "alnum"CLO "digit"CLO
79   for (i = 'a'; i <= 'z'; i++)   "lower"CLO "upper"CLO "space"CLO
80   *buffer++ = i;   "blank"CLO "punct"CLO "cntrl"CLO;
81    #define CLASS_invalid 0 /* we increment the retval */
82    #define CLASS_alpha 1
83    #define CLASS_alnum 2
84    #define CLASS_digit 3
85    #define CLASS_lower 4
86    #define CLASS_upper 5
87    #define CLASS_space 6
88    #define CLASS_blank 7
89    #define CLASS_punct 8
90    #define CLASS_cntrl 9
91    //#define CLASS_xdigit 10
92    //#define CLASS_graph 11
93    //#define CLASS_print 12
94     smalluint j;
95     { /* not really pretty.. */
96     char *tmp = xstrndup(arg, 7); // warning: xdigit would need 8, not 7
97     j = index_in_strings(classes, tmp) + 1;
98     free(tmp);
99   }   }
100   else if (strncmp(arg, "alnum", 5) == 0) {   if (j == CLASS_alnum || j == CLASS_digit) {
101   for (i = '0'; i <= '9'; i++)   for (i = '0'; i <= '9'; i++)
102   *buffer++ = i;   *buffer++ = i;
103     }
104     if (j == CLASS_alpha || j == CLASS_alnum || j == CLASS_upper) {
105   for (i = 'A'; i <= 'Z'; i++)   for (i = 'A'; i <= 'Z'; i++)
106   *buffer++ = i;   *buffer++ = i;
  for (i = 'a'; i <= 'z'; i++)  
  *buffer++ = i;  
107   }   }
108   else if (strncmp(arg, "digit", 5) == 0)   if (j == CLASS_alpha || j == CLASS_alnum || j == CLASS_lower) {
  for (i = '0'; i <= '9'; i++)  
  *buffer++ = i;  
  else if (strncmp(arg, "lower", 5) == 0)  
109   for (i = 'a'; i <= 'z'; i++)   for (i = 'a'; i <= 'z'; i++)
110   *buffer++ = i;   *buffer++ = i;
  else if (strncmp(arg, "upper", 5) == 0)  
  for (i = 'A'; i <= 'Z'; i++)  
  *buffer++ = i;  
  else if (strncmp(arg, "space", 5) == 0) {  
  const char s[] = "\t\n\v\f\r ";  
  strcat((char*)buffer, s);  
  buffer += sizeof(s) - 1;  
111   }   }
112   else if (strncmp(arg, "blank", 5) == 0) {   if (j == CLASS_space || j == CLASS_blank) {
113   *buffer++ = '\t';   *buffer++ = '\t';
114     if (j == CLASS_space) {
115     *buffer++ = '\n';
116     *buffer++ = '\v';
117     *buffer++ = '\f';
118     *buffer++ = '\r';
119     }
120   *buffer++ = ' ';   *buffer++ = ' ';
121   }   }
122   /* gcc gives a warning if braces aren't used here */   if (j == CLASS_punct || j == CLASS_cntrl) {
123   else if (strncmp(arg, "punct", 5) == 0) {   for (i = '\0'; i <= ASCII; i++)
124   for (i = 0; i <= ASCII; i++)   if ((j == CLASS_punct && isprint(i) && !isalnum(i) && !isspace(i))
125   if (isprint(i) && (!isalnum(i)) && (!isspace(i)))   || (j == CLASS_cntrl && iscntrl(i)))
126   *buffer++ = i;   *buffer++ = i;
127   }   }
128   else if (strncmp(arg, "cntrl", 5) == 0) {   if (j == CLASS_invalid) {
  for (i = 0; i <= ASCII; i++)  
  if (iscntrl(i))  
  *buffer++ = i;  
  }  
  else {  
129   *buffer++ = '[';   *buffer++ = '[';
130   *buffer++ = ':';   *buffer++ = ':';
131   continue;   continue;
132   }   }
133   break;   break;
134   }   }
135   if (ENABLE_FEATURE_TR_EQUIV && i == '=') {   /* "[xyz...", i=x, arg points to y */
136   *buffer++ = *arg;   if (ENABLE_FEATURE_TR_EQUIV && i == '=') { /* [=CHAR=] */
137   /* skip the closing =] */   *buffer++ = *arg; /* copy CHAR */
138   arg += 3;   if (!*arg || arg[1] != '=' || arg[2] != ']')
139     bb_show_usage();
140     arg += 3; /* skip CHAR=] */
141   continue;   continue;
142   }   }
143   if (*arg++ != '-') {   if (i == '\0' || *arg != '-') { /* not [x-...] - copy verbatim */
144   *buffer++ = '[';   *buffer++ = '[';
145   arg -= 2;   arg--; /* points to x */
146   continue;   continue; /* copy all, including eventual ']' */
147   }   }
148     /* [x-z] */
149     arg++; /* skip - */
150     if (arg[0] == '\0' || arg[1] != ']')
151     bb_show_usage();
152   ac = *arg++;   ac = *arg++;
153   while (i <= ac)   while (i <= ac)
154   *buffer++ = i++;   *buffer++ = i++;
155   arg++; /* Skip the assumed ']' */   arg++; /* skip ] */
156   } else   continue;
157   *buffer++ = *arg++;   }
158     *buffer++ = *arg++;
159   }   }
   
160   return (buffer - buffer_start);   return (buffer - buffer_start);
161  }  }
162    
163  static int complement(char *buffer, int buffer_len)  static int complement(char *buffer, int buffer_len)
164  {  {
165   short i, j, ix;   int i, j, ix;
166   char conv[ASCII + 2];   char conv[ASCII + 2];
167    
168   ix = 0;   ix = 0;
169   for (i = 0; i <= ASCII; i++) {   for (i = '\0'; i <= ASCII; i++) {
170   for (j = 0; j < buffer_len; j++)   for (j = 0; j < buffer_len; j++)
171   if (buffer[j] == i)   if (buffer[j] == i)
172   break;   break;
# Line 193  static int complement(char *buffer, int Line 177  static int complement(char *buffer, int
177   return ix;   return ix;
178  }  }
179    
180  int tr_main(int argc, char **argv)  int tr_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
181    int tr_main(int argc UNUSED_PARAM, char **argv)
182  {  {
183   unsigned char *ptr;   int output_length = 0, input_length;
  int output_length=0, input_length;  
  int idx = 1;  
184   int i;   int i;
185   RESERVE_CONFIG_BUFFER(output, BUFSIZ);   smalluint flags;
186   RESERVE_CONFIG_BUFFER(vector, ASCII+1);   ssize_t read_chars = 0;
187   RESERVE_CONFIG_BUFFER(invec,  ASCII+1);   size_t in_index = 0, out_index = 0;
188   RESERVE_CONFIG_BUFFER(outvec, ASCII+1);   unsigned last = UCHAR_MAX + 1; /* not equal to any char */
189     unsigned char coded, c;
190   /* ... but make them available globally */   unsigned char *output = xmalloc(BUFSIZ);
191   poutput = output;   char *vector = xzalloc((ASCII+1) * 3);
192   pvector = vector;   char *invec  = vector + (ASCII+1);
193   pinvec  = invec;   char *outvec = vector + (ASCII+1) * 2;
194   poutvec = outvec;  
195    #define TR_OPT_complement (1 << 0)
196   if (argc > 1 && argv[idx][0] == '-') {  #define TR_OPT_delete (1 << 1)
197   for (ptr = (unsigned char *) &argv[idx][1]; *ptr; ptr++) {  #define TR_OPT_squeeze_reps (1 << 2)
198   switch (*ptr) {  
199   case 'c':   flags = getopt32(argv, "+cds"); /* '+': stop at first non-option */
200   com_fl = TRUE;   argv += optind;
201   break;  
  case 'd':  
  del_fl = TRUE;  
  break;  
  case 's':  
  sq_fl = TRUE;  
  break;  
  default:  
  bb_show_usage();  
  }  
  }  
  idx++;  
  }  
202   for (i = 0; i <= ASCII; i++) {   for (i = 0; i <= ASCII; i++) {
203   vector[i] = i;   vector[i] = i;
204   invec[i] = outvec[i] = FALSE;   /*invec[i] = outvec[i] = FALSE; - done by xzalloc */
205   }   }
206    
207   if (argv[idx] != NULL) {  #define tr_buf bb_common_bufsiz1
208   input_length = expand(argv[idx++], bb_common_bufsiz1);   if (*argv != NULL) {
209   if (com_fl)   input_length = expand(*argv++, tr_buf);
210   input_length = complement(bb_common_bufsiz1, input_length);   if (flags & TR_OPT_complement)
211   if (argv[idx] != NULL) {   input_length = complement(tr_buf, input_length);
212   if (*argv[idx] == '\0')   if (*argv) {
213     if (argv[0][0] == '\0')
214   bb_error_msg_and_die("STRING2 cannot be empty");   bb_error_msg_and_die("STRING2 cannot be empty");
215   output_length = expand(argv[idx], output);   output_length = expand(*argv, (char *)output);
216   map(bb_common_bufsiz1, input_length, output, output_length);   map(vector, (unsigned char *)tr_buf, input_length, output, output_length);
217   }   }
218   for (i = 0; i < input_length; i++)   for (i = 0; i < input_length; i++)
219   invec[(GCC4_IS_STUPID)bb_common_bufsiz1[i]] = TRUE;   invec[(unsigned char)tr_buf[i]] = TRUE;
220   for (i = 0; i < output_length; i++)   for (i = 0; i < output_length; i++)
221   outvec[(GCC4_IS_STUPID)output[i]] = TRUE;   outvec[output[i]] = TRUE;
222   }   }
223   convert();  
224   return 0;   for (;;) {
225     /* If we're out of input, flush output and read more input. */
226     if ((ssize_t)in_index == read_chars) {
227     if (out_index) {
228     xwrite(STDOUT_FILENO, (char *)output, out_index);
229     out_index = 0;
230     }
231     read_chars = safe_read(STDIN_FILENO, tr_buf, BUFSIZ);
232     if (read_chars <= 0) {
233     if (read_chars < 0)
234     bb_perror_msg_and_die(bb_msg_read_error);
235     exit(EXIT_SUCCESS);
236     }
237     in_index = 0;
238     }
239     c = tr_buf[in_index++];
240     coded = vector[c];
241     if ((flags & TR_OPT_delete) && invec[c])
242     continue;
243     if ((flags & TR_OPT_squeeze_reps) && last == coded
244     && (invec[c] || outvec[coded]))
245     continue;
246     output[out_index++] = last = coded;
247     }
248     /* NOTREACHED */
249     return EXIT_SUCCESS;
250  }  }

Legend:
Removed from v.532  
changed lines
  Added in v.816