Magellan Linux

Diff of /trunk/mkinitrd-magellan/busybox/editors/sed.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 532 by niro, Sat Sep 1 22:45:15 2007 UTC revision 1123 by niro, Wed Aug 18 21:56:57 2010 UTC
# Line 5  Line 5 
5   * Copyright (C) 1999,2000,2001 by Lineo, inc. and Mark Whitley   * Copyright (C) 1999,2000,2001 by Lineo, inc. and Mark Whitley
6   * Copyright (C) 1999,2000,2001 by Mark Whitley <markw@codepoet.org>   * Copyright (C) 1999,2000,2001 by Mark Whitley <markw@codepoet.org>
7   * Copyright (C) 2002  Matt Kraai   * Copyright (C) 2002  Matt Kraai
8   * Copyright (C) 2003 by Glenn McGrath <bug1@iinet.net.au>   * Copyright (C) 2003 by Glenn McGrath
9   * Copyright (C) 2003,2004 by Rob Landley <rob@landley.net>   * Copyright (C) 2003,2004 by Rob Landley <rob@landley.net>
10   *   *
11   * MAINTAINER: Rob Landley <rob@landley.net>   * MAINTAINER: Rob Landley <rob@landley.net>
# Line 21  Line 21 
21    add_cmd() is called on each line of sed command text (from a file or from    add_cmd() is called on each line of sed command text (from a file or from
22    the command line).  It calls get_address() and parse_cmd_args().  The    the command line).  It calls get_address() and parse_cmd_args().  The
23    resulting sed_cmd_t structures are appended to a linked list    resulting sed_cmd_t structures are appended to a linked list
24    (bbg.sed_cmd_head/bbg.sed_cmd_tail).    (G.sed_cmd_head/G.sed_cmd_tail).
25    
26    add_input_file() adds a FILE * to the list of input files.  We need to    add_input_file() adds a FILE* to the list of input files.  We need to
27    know all input sources ahead of time to find the last line for the $ match.    know all input sources ahead of time to find the last line for the $ match.
28    
29    process_files() does actual sedding, reading data lines from each input FILE *    process_files() does actual sedding, reading data lines from each input FILE *
# Line 58  Line 58 
58   Reference http://www.opengroup.org/onlinepubs/007904975/utilities/sed.html   Reference http://www.opengroup.org/onlinepubs/007904975/utilities/sed.html
59  */  */
60    
61  #include "busybox.h"  #include "libbb.h"
62  #include "xregex.h"  #include "xregex.h"
63    
64  /* Each sed command turns into one of these structures. */  /* Each sed command turns into one of these structures. */
65  typedef struct sed_cmd_s {  typedef struct sed_cmd_s {
66   /* Ordered by alignment requirements: currently 36 bytes on x86 */   /* Ordered by alignment requirements: currently 36 bytes on x86 */
67     struct sed_cmd_s *next; /* Next command (linked list, NULL terminated) */
68    
69   /* address storage */   /* address storage */
70   regex_t *beg_match;     /* sed -e '/match/cmd' */   regex_t *beg_match;     /* sed -e '/match/cmd' */
# Line 72  typedef struct sed_cmd_s { Line 73  typedef struct sed_cmd_s {
73   int beg_line;           /* 'sed 1p'   0 == apply commands to all lines */   int beg_line;           /* 'sed 1p'   0 == apply commands to all lines */
74   int end_line;           /* 'sed 1,3p' 0 == one line only. -1 = last line ($) */   int end_line;           /* 'sed 1,3p' 0 == one line only. -1 = last line ($) */
75    
76   FILE *file;             /* File (sw) command writes to, -1 for none. */   FILE *sw_file;          /* File (sw) command writes to, -1 for none. */
77   char *string;           /* Data string for (saicytb) commands. */   char *string;           /* Data string for (saicytb) commands. */
78    
79   unsigned short which_match;     /* (s) Which match to replace (0 for all) */   unsigned which_match;   /* (s) Which match to replace (0 for all) */
80    
81   /* Bitfields (gcc won't group them if we don't) */   /* Bitfields (gcc won't group them if we don't) */
82   unsigned int invert:1;          /* the '!' after the address */   unsigned invert:1;      /* the '!' after the address */
83   unsigned int in_match:1;        /* Next line also included in match? */   unsigned in_match:1;    /* Next line also included in match? */
84   unsigned int sub_p:1;           /* (s) print option */   unsigned sub_p:1;       /* (s) print option */
85    
86   int last_char;                  /* Last line written by (sw) had no '\n' */   char sw_last_char;      /* Last line written by (sw) had no '\n' */
87    
88   /* GENERAL FIELDS */   /* GENERAL FIELDS */
89   char cmd;               /* The command char: abcdDgGhHilnNpPqrstwxy:={} */   char cmd;               /* The command char: abcdDgGhHilnNpPqrstwxy:={} */
  struct sed_cmd_s *next; /* Next command (linked list, NULL terminated) */  
90  } sed_cmd_t;  } sed_cmd_t;
91    
92  static const char *const semicolon_whitespace = "; \n\r\t\v";  static const char semicolon_whitespace[] ALIGN1 = "; \n\r\t\v";
93    
94  struct sed_globals {  struct globals {
95   /* options */   /* options */
96   int be_quiet, regex_type;   int be_quiet, regex_type;
97   FILE *nonstdout;   FILE *nonstdout;
# Line 117  struct sed_globals { Line 117  struct sed_globals {
117   int idx; /* Space used */   int idx; /* Space used */
118   int len; /* Space allocated */   int len; /* Space allocated */
119   } pipeline;   } pipeline;
120  } bbg;  } FIX_ALIASING;
121    #define G (*(struct globals*)&bb_common_bufsiz1)
122    struct BUG_G_too_big {
123            char BUG_G_too_big[sizeof(G) <= COMMON_BUFSIZE ? 1 : -1];
124    };
125    #define INIT_G() do { \
126     G.sed_cmd_tail = &G.sed_cmd_head; \
127    } while (0)
128    
129    
130  #if ENABLE_FEATURE_CLEAN_UP  #if ENABLE_FEATURE_CLEAN_UP
131  static void sed_free_and_close_stuff(void)  static void sed_free_and_close_stuff(void)
132  {  {
133   sed_cmd_t *sed_cmd = bbg.sed_cmd_head.next;   sed_cmd_t *sed_cmd = G.sed_cmd_head.next;
134    
135   llist_free(bbg.append_head, free);   llist_free(G.append_head, free);
136    
137   while (sed_cmd) {   while (sed_cmd) {
138   sed_cmd_t *sed_cmd_next = sed_cmd->next;   sed_cmd_t *sed_cmd_next = sed_cmd->next;
139    
140   if (sed_cmd->file)   if (sed_cmd->sw_file)
141   xprint_and_close_file(sed_cmd->file);   xprint_and_close_file(sed_cmd->sw_file);
142    
143   if (sed_cmd->beg_match) {   if (sed_cmd->beg_match) {
144   regfree(sed_cmd->beg_match);   regfree(sed_cmd->beg_match);
# Line 150  static void sed_free_and_close_stuff(voi Line 157  static void sed_free_and_close_stuff(voi
157   sed_cmd = sed_cmd_next;   sed_cmd = sed_cmd_next;
158   }   }
159    
160   if (bbg.hold_space) free(bbg.hold_space);   free(G.hold_space);
161    
162   while (bbg.current_input_file < bbg.input_file_count)   while (G.current_input_file < G.input_file_count)
163   fclose(bbg.input_file_list[bbg.current_input_file++]);   fclose(G.input_file_list[G.current_input_file++]);
164  }  }
165  #else  #else
166  void sed_free_and_close_stuff(void);  void sed_free_and_close_stuff(void);
# Line 163  void sed_free_and_close_stuff(void); Line 170  void sed_free_and_close_stuff(void);
170    
171  static void cleanup_outname(void)  static void cleanup_outname(void)
172  {  {
173   if (bbg.outname) unlink(bbg.outname);   if (G.outname) unlink(G.outname);
174  }  }
175    
176  /* strdup, replacing "\n" with '\n', and "\delimiter" with 'delimiter' */  /* strcpy, replacing "\from" with 'to'. If to is NUL, replacing "\any" with 'any' */
177    
178  static void parse_escapes(char *dest, char *string, int len, char from, char to)  static void parse_escapes(char *dest, const char *string, int len, char from, char to)
179  {  {
180   int i = 0;   int i = 0;
181    
# Line 181  static void parse_escapes(char *dest, ch Line 188  static void parse_escapes(char *dest, ch
188   }   }
189   *dest++ = string[i++];   *dest++ = string[i++];
190   }   }
191     /* TODO: is it safe wrt a string with trailing '\\' ? */
192   *dest++ = string[i++];   *dest++ = string[i++];
193   }   }
194   *dest = 0;   *dest = '\0';
195  }  }
196    
197  static char *copy_parsing_escapes(char *string, int len)  static char *copy_parsing_escapes(const char *string, int len)
198  {  {
199   char *dest = xmalloc(len + 1);   char *dest = xmalloc(len + 1);
200    
201   parse_escapes(dest, string, len, 'n', '\n');   parse_escapes(dest, string, len, 'n', '\n');
202     /* GNU sed also recognizes \t */
203     parse_escapes(dest, dest, strlen(dest), 't', '\t');
204   return dest;   return dest;
205  }  }
206    
# Line 198  static char *copy_parsing_escapes(char * Line 208  static char *copy_parsing_escapes(char *
208  /*  /*
209   * index_of_next_unescaped_regexp_delim - walks left to right through a string   * index_of_next_unescaped_regexp_delim - walks left to right through a string
210   * beginning at a specified index and returns the index of the next regular   * beginning at a specified index and returns the index of the next regular
211   * expression delimiter (typically a forward * slash ('/')) not preceded by   * expression delimiter (typically a forward slash ('/')) not preceded by
212   * a backslash ('\').  A negative delimiter disables square bracket checking.   * a backslash ('\').  A negative delimiter disables square bracket checking.
213   */   */
214  static int index_of_next_unescaped_regexp_delim(int delimiter, char *str)  static int index_of_next_unescaped_regexp_delim(int delimiter, const char *str)
215  {  {
216   int bracket = -1;   int bracket = -1;
217   int escaped = 0;   int escaped = 0;
# Line 235  static int index_of_next_unescaped_regex Line 245  static int index_of_next_unescaped_regex
245  /*  /*
246   *  Returns the index of the third delimiter   *  Returns the index of the third delimiter
247   */   */
248  static int parse_regex_delim(char *cmdstr, char **match, char **replace)  static int parse_regex_delim(const char *cmdstr, char **match, char **replace)
249  {  {
250   char *cmdstr_ptr = cmdstr;   const char *cmdstr_ptr = cmdstr;
251   char delimiter;   char delimiter;
252   int idx = 0;   int idx = 0;
253    
# Line 262  static int parse_regex_delim(char *cmdst Line 272  static int parse_regex_delim(char *cmdst
272  /*  /*
273   * returns the index in the string just past where the address ends.   * returns the index in the string just past where the address ends.
274   */   */
275  static int get_address(char *my_str, int *linenum, regex_t ** regex)  static int get_address(const char *my_str, int *linenum, regex_t ** regex)
276  {  {
277   char *pos = my_str;   const char *pos = my_str;
278    
279   if (isdigit(*my_str)) {   if (isdigit(*my_str)) {
280   *linenum = strtol(my_str, &pos, 10);   *linenum = strtol(my_str, (char**)&pos, 10);
281   /* endstr shouldnt ever equal NULL */   /* endstr shouldnt ever equal NULL */
282   } else if (*my_str == '$') {   } else if (*my_str == '$') {
283   *linenum = -1;   *linenum = -1;
# Line 282  static int get_address(char *my_str, int Line 292  static int get_address(char *my_str, int
292   next = index_of_next_unescaped_regexp_delim(delimiter, ++pos);   next = index_of_next_unescaped_regexp_delim(delimiter, ++pos);
293   temp = copy_parsing_escapes(pos, next);   temp = copy_parsing_escapes(pos, next);
294   *regex = xmalloc(sizeof(regex_t));   *regex = xmalloc(sizeof(regex_t));
295   xregcomp(*regex, temp, bbg.regex_type|REG_NEWLINE);   xregcomp(*regex, temp, G.regex_type|REG_NEWLINE);
296   free(temp);   free(temp);
297   /* Move position to next character after last delimiter */   /* Move position to next character after last delimiter */
298   pos += (next+1);   pos += (next+1);
# Line 291  static int get_address(char *my_str, int Line 301  static int get_address(char *my_str, int
301  }  }
302    
303  /* Grab a filename.  Whitespace at start is skipped, then goes to EOL. */  /* Grab a filename.  Whitespace at start is skipped, then goes to EOL. */
304  static int parse_file_cmd(sed_cmd_t *sed_cmd, char *filecmdstr, char **retval)  static int parse_file_cmd(/*sed_cmd_t *sed_cmd,*/ const char *filecmdstr, char **retval)
305  {  {
306   int start = 0, idx, hack = 0;   int start = 0, idx, hack = 0;
307    
308   /* Skip whitespace, then grab filename to end of line */   /* Skip whitespace, then grab filename to end of line */
309   while (isspace(filecmdstr[start])) start++;   while (isspace(filecmdstr[start]))
310     start++;
311   idx = start;   idx = start;
312   while (filecmdstr[idx] && filecmdstr[idx] != '\n') idx++;   while (filecmdstr[idx] && filecmdstr[idx] != '\n')
313     idx++;
314    
315   /* If lines glued together, put backslash back. */   /* If lines glued together, put backslash back. */
316   if (filecmdstr[idx] == '\n') hack = 1;   if (filecmdstr[idx] == '\n')
317     hack = 1;
318   if (idx == start)   if (idx == start)
319   bb_error_msg_and_die("empty filename");   bb_error_msg_and_die("empty filename");
320   *retval = xstrndup(filecmdstr+start, idx-start+hack+1);   *retval = xstrndup(filecmdstr+start, idx-start+hack+1);
321   if (hack) (*retval)[idx] = '\\';   if (hack)
322     (*retval)[idx] = '\\';
323    
324   return idx;   return idx;
325  }  }
326    
327  static int parse_subst_cmd(sed_cmd_t *sed_cmd, char *substr)  static int parse_subst_cmd(sed_cmd_t *sed_cmd, const char *substr)
328  {  {
329   int cflags = bbg.regex_type;   int cflags = G.regex_type;
330   char *match;   char *match;
331   int idx = 0;   int idx;
332    
333   /*   /*
334   * A substitution command should look something like this:   * A substitution command should look something like this:
# Line 337  static int parse_subst_cmd(sed_cmd_t *se Line 351  static int parse_subst_cmd(sed_cmd_t *se
351   if (isdigit(substr[idx])) {   if (isdigit(substr[idx])) {
352   if (match[0] != '^') {   if (match[0] != '^') {
353   /* Match 0 treated as all, multiple matches we take the last one. */   /* Match 0 treated as all, multiple matches we take the last one. */
354   char *pos = substr + idx;   const char *pos = substr + idx;
355   /* FIXME: error check? */  /* FIXME: error check? */
356   sed_cmd->which_match = (unsigned short)strtol(substr+idx, &pos, 10);   sed_cmd->which_match = (unsigned)strtol(substr+idx, (char**) &pos, 10);
357   idx = pos - substr;   idx = pos - substr;
358   }   }
359   continue;   continue;
360   }   }
361   /* Skip spaces */   /* Skip spaces */
362   if (isspace(substr[idx])) continue;   if (isspace(substr[idx]))
363     continue;
364    
365   switch (substr[idx]) {   switch (substr[idx]) {
366   /* Replace all occurrences */   /* Replace all occurrences */
367   case 'g':   case 'g':
368   if (match[0] != '^') sed_cmd->which_match = 0;   if (match[0] != '^')
369     sed_cmd->which_match = 0;
370   break;   break;
371   /* Print pattern space */   /* Print pattern space */
372   case 'p':   case 'p':
# Line 360  static int parse_subst_cmd(sed_cmd_t *se Line 376  static int parse_subst_cmd(sed_cmd_t *se
376   case 'w':   case 'w':
377   {   {
378   char *temp;   char *temp;
379   idx += parse_file_cmd(sed_cmd, substr+idx, &temp);   idx += parse_file_cmd(/*sed_cmd,*/ substr+idx, &temp);
   
380   break;   break;
381   }   }
382   /* Ignore case (gnu exension) */   /* Ignore case (gnu exension) */
# Line 370  static int parse_subst_cmd(sed_cmd_t *se Line 385  static int parse_subst_cmd(sed_cmd_t *se
385   break;   break;
386   /* Comment */   /* Comment */
387   case '#':   case '#':
388   while (substr[++idx]) /*skip all*/;   // while (substr[++idx]) continue;
389     idx += strlen(substr + idx); // same
390   /* Fall through */   /* Fall through */
391   /* End of command */   /* End of command */
392   case ';':   case ';':
# Line 380  static int parse_subst_cmd(sed_cmd_t *se Line 396  static int parse_subst_cmd(sed_cmd_t *se
396   bb_error_msg_and_die("bad option in substitution expression");   bb_error_msg_and_die("bad option in substitution expression");
397   }   }
398   }   }
399  out:   out:
400   /* compile the match string into a regex */   /* compile the match string into a regex */
401   if (*match != '\0') {   if (*match != '\0') {
402   /* If match is empty, we use last regex used at runtime */   /* If match is empty, we use last regex used at runtime */
# Line 395  out: Line 411  out:
411  /*  /*
412   *  Process the commands arguments   *  Process the commands arguments
413   */   */
414  static char *parse_cmd_args(sed_cmd_t *sed_cmd, char *cmdstr)  static const char *parse_cmd_args(sed_cmd_t *sed_cmd, const char *cmdstr)
415  {  {
416   /* handle (s)ubstitution command */   /* handle (s)ubstitution command */
417   if (sed_cmd->cmd == 's')   if (sed_cmd->cmd == 's')
# Line 403  static char *parse_cmd_args(sed_cmd_t *s Line 419  static char *parse_cmd_args(sed_cmd_t *s
419   /* handle edit cmds: (a)ppend, (i)nsert, and (c)hange */   /* handle edit cmds: (a)ppend, (i)nsert, and (c)hange */
420   else if (strchr("aic", sed_cmd->cmd)) {   else if (strchr("aic", sed_cmd->cmd)) {
421   if ((sed_cmd->end_line || sed_cmd->end_match) && sed_cmd->cmd != 'c')   if ((sed_cmd->end_line || sed_cmd->end_match) && sed_cmd->cmd != 'c')
422   bb_error_msg_and_die   bb_error_msg_and_die("only a beginning address can be specified for edit commands");
  ("only a beginning address can be specified for edit commands");  
423   for (;;) {   for (;;) {
424   if (*cmdstr == '\n' || *cmdstr == '\\') {   if (*cmdstr == '\n' || *cmdstr == '\\') {
425   cmdstr++;   cmdstr++;
426   break;   break;
427   } else if (isspace(*cmdstr))   }
428   cmdstr++;   if (!isspace(*cmdstr))
  else  
429   break;   break;
430     cmdstr++;
431   }   }
432   sed_cmd->string = xstrdup(cmdstr);   sed_cmd->string = xstrdup(cmdstr);
433   parse_escapes(sed_cmd->string, sed_cmd->string, strlen(cmdstr), 0, 0);   /* "\anychar" -> "anychar" */
434     parse_escapes(sed_cmd->string, sed_cmd->string, strlen(cmdstr), '\0', '\0');
435   cmdstr += strlen(cmdstr);   cmdstr += strlen(cmdstr);
436   /* handle file cmds: (r)ead */   /* handle file cmds: (r)ead */
437   } else if (strchr("rw", sed_cmd->cmd)) {   } else if (strchr("rw", sed_cmd->cmd)) {
438   if (sed_cmd->end_line || sed_cmd->end_match)   if (sed_cmd->end_line || sed_cmd->end_match)
439   bb_error_msg_and_die("command only uses one address");   bb_error_msg_and_die("command only uses one address");
440   cmdstr += parse_file_cmd(sed_cmd, cmdstr, &sed_cmd->string);   cmdstr += parse_file_cmd(/*sed_cmd,*/ cmdstr, &sed_cmd->string);
441   if (sed_cmd->cmd == 'w')   if (sed_cmd->cmd == 'w') {
442   sed_cmd->file = xfopen(sed_cmd->string, "w");   sed_cmd->sw_file = xfopen_for_write(sed_cmd->string);
443     sed_cmd->sw_last_char = '\n';
444     }
445   /* handle branch commands */   /* handle branch commands */
446   } else if (strchr(":btT", sed_cmd->cmd)) {   } else if (strchr(":btT", sed_cmd->cmd)) {
447   int length;   int length;
# Line 467  static char *parse_cmd_args(sed_cmd_t *s Line 485  static char *parse_cmd_args(sed_cmd_t *s
485    
486  /* Parse address+command sets, skipping comment lines. */  /* Parse address+command sets, skipping comment lines. */
487    
488  static void add_cmd(char *cmdstr)  static void add_cmd(const char *cmdstr)
489  {  {
490   sed_cmd_t *sed_cmd;   sed_cmd_t *sed_cmd;
491   int temp;   unsigned len, n;
492    
493   /* Append this line to any unfinished line from last time. */   /* Append this line to any unfinished line from last time. */
494   if (bbg.add_cmd_line) {   if (G.add_cmd_line) {
495   cmdstr = xasprintf("%s\n%s", bbg.add_cmd_line, cmdstr);   char *tp = xasprintf("%s\n%s", G.add_cmd_line, cmdstr);
496   free(bbg.add_cmd_line);   free(G.add_cmd_line);
497   bbg.add_cmd_line = cmdstr;   cmdstr = G.add_cmd_line = tp;
498   }   }
499    
500   /* If this line ends with backslash, request next line. */   /* If this line ends with unescaped backslash, request next line. */
501   temp = strlen(cmdstr);   n = len = strlen(cmdstr);
502   if (temp && cmdstr[temp-1] == '\\') {   while (n && cmdstr[n-1] == '\\')
503   if (!bbg.add_cmd_line)   n--;
504   bbg.add_cmd_line = xstrdup(cmdstr);   if ((len - n) & 1) { /* if odd number of trailing backslashes */
505   bbg.add_cmd_line[temp-1] = 0;   if (!G.add_cmd_line)
506     G.add_cmd_line = xstrdup(cmdstr);
507     G.add_cmd_line[len-1] = '\0';
508   return;   return;
509   }   }
510    
# Line 500  static void add_cmd(char *cmdstr) Line 520  static void add_cmd(char *cmdstr)
520   if (*cmdstr == '#') {   if (*cmdstr == '#') {
521   /* "#n" is the same as using -n on the command line */   /* "#n" is the same as using -n on the command line */
522   if (cmdstr[1] == 'n')   if (cmdstr[1] == 'n')
523   bbg.be_quiet++;   G.be_quiet++;
524   cmdstr = strpbrk(cmdstr, "\n\r");   cmdstr = strpbrk(cmdstr, "\n\r");
525   if (!cmdstr) break;   if (!cmdstr) break;
526   continue;   continue;
# Line 543  static void add_cmd(char *cmdstr) Line 563  static void add_cmd(char *cmdstr)
563   /* last part (mandatory) will be a command */   /* last part (mandatory) will be a command */
564   if (!*cmdstr)   if (!*cmdstr)
565   bb_error_msg_and_die("missing command");   bb_error_msg_and_die("missing command");
566   sed_cmd->cmd = *(cmdstr++);   sed_cmd->cmd = *cmdstr++;
567   cmdstr = parse_cmd_args(sed_cmd, cmdstr);   cmdstr = parse_cmd_args(sed_cmd, cmdstr);
568    
569   /* Add the command to the command array */   /* Add the command to the command array */
570   bbg.sed_cmd_tail->next = sed_cmd;   G.sed_cmd_tail->next = sed_cmd;
571   bbg.sed_cmd_tail = bbg.sed_cmd_tail->next;   G.sed_cmd_tail = G.sed_cmd_tail->next;
572   }   }
573    
574   /* If we glued multiple lines together, free the memory. */   /* If we glued multiple lines together, free the memory. */
575   free(bbg.add_cmd_line);   free(G.add_cmd_line);
576   bbg.add_cmd_line = NULL;   G.add_cmd_line = NULL;
577  }  }
578    
579  /* Append to a string, reallocating memory as necessary. */  /* Append to a string, reallocating memory as necessary. */
# Line 562  static void add_cmd(char *cmdstr) Line 582  static void add_cmd(char *cmdstr)
582    
583  static void pipe_putc(char c)  static void pipe_putc(char c)
584  {  {
585   if (bbg.pipeline.idx == bbg.pipeline.len) {   if (G.pipeline.idx == G.pipeline.len) {
586   bbg.pipeline.buf = xrealloc(bbg.pipeline.buf,   G.pipeline.buf = xrealloc(G.pipeline.buf,
587   bbg.pipeline.len + PIPE_GROW);   G.pipeline.len + PIPE_GROW);
588   bbg.pipeline.len += PIPE_GROW;   G.pipeline.len += PIPE_GROW;
589   }   }
590   bbg.pipeline.buf[bbg.pipeline.idx++] = c;   G.pipeline.buf[G.pipeline.idx++] = c;
591  }  }
592    
593  static void do_subst_w_backrefs(char *line, char *replace)  static void do_subst_w_backrefs(char *line, char *replace)
594  {  {
595   int i,j;   int i, j;
596    
597   /* go through the replacement string */   /* go through the replacement string */
598   for (i = 0; replace[i]; i++) {   for (i = 0; replace[i]; i++) {
# Line 580  static void do_subst_w_backrefs(char *li Line 600  static void do_subst_w_backrefs(char *li
600   if (replace[i] == '\\') {   if (replace[i] == '\\') {
601   unsigned backref = replace[++i] - '0';   unsigned backref = replace[++i] - '0';
602   if (backref <= 9) {   if (backref <= 9) {
603   /* print out the text held in bbg.regmatch[backref] */   /* print out the text held in G.regmatch[backref] */
604   if (bbg.regmatch[backref].rm_so != -1) {   if (G.regmatch[backref].rm_so != -1) {
605   j = bbg.regmatch[backref].rm_so;   j = G.regmatch[backref].rm_so;
606   while (j < bbg.regmatch[backref].rm_eo)   while (j < G.regmatch[backref].rm_eo)
607   pipe_putc(line[j++]);   pipe_putc(line[j++]);
608   }   }
609   continue;   continue;
# Line 597  static void do_subst_w_backrefs(char *li Line 617  static void do_subst_w_backrefs(char *li
617   }   }
618   /* if we find an unescaped '&' print out the whole matched text. */   /* if we find an unescaped '&' print out the whole matched text. */
619   if (replace[i] == '&') {   if (replace[i] == '&') {
620   j = bbg.regmatch[0].rm_so;   j = G.regmatch[0].rm_so;
621   while (j < bbg.regmatch[0].rm_eo)   while (j < G.regmatch[0].rm_eo)
622   pipe_putc(line[j++]);   pipe_putc(line[j++]);
623   continue;   continue;
624   }   }
# Line 607  static void do_subst_w_backrefs(char *li Line 627  static void do_subst_w_backrefs(char *li
627   }   }
628  }  }
629    
630  static int do_subst_command(sed_cmd_t *sed_cmd, char **line)  static int do_subst_command(sed_cmd_t *sed_cmd, char **line_p)
631  {  {
632   char *oldline = *line;   char *line = *line_p;
633   int altered = 0;   int altered = 0;
634   int match_count = 0;   unsigned match_count = 0;
635   regex_t *current_regex;   regex_t *current_regex;
636    
637     current_regex = sed_cmd->sub_match;
638   /* Handle empty regex. */   /* Handle empty regex. */
639   if (sed_cmd->sub_match == NULL) {   if (!current_regex) {
640   current_regex = bbg.previous_regex_ptr;   current_regex = G.previous_regex_ptr;
641   if (!current_regex)   if (!current_regex)
642   bb_error_msg_and_die("no previous regexp");   bb_error_msg_and_die("no previous regexp");
643   } else   }
644   bbg.previous_regex_ptr = current_regex = sed_cmd->sub_match;   G.previous_regex_ptr = current_regex;
645    
646   /* Find the first match */   /* Find the first match */
647   if (REG_NOMATCH == regexec(current_regex, oldline, 10, bbg.regmatch, 0))   if (REG_NOMATCH == regexec(current_regex, line, 10, G.regmatch, 0))
648   return 0;   return 0;
649    
650   /* Initialize temporary output buffer. */   /* Initialize temporary output buffer. */
651   bbg.pipeline.buf = xmalloc(PIPE_GROW);   G.pipeline.buf = xmalloc(PIPE_GROW);
652   bbg.pipeline.len = PIPE_GROW;   G.pipeline.len = PIPE_GROW;
653   bbg.pipeline.idx = 0;   G.pipeline.idx = 0;
654    
655   /* Now loop through, substituting for matches */   /* Now loop through, substituting for matches */
656   do {   do {
# Line 639  static int do_subst_command(sed_cmd_t *s Line 660  static int do_subst_command(sed_cmd_t *s
660     echo " a.b" | busybox sed 's [^ .]* x g'     echo " a.b" | busybox sed 's [^ .]* x g'
661     The match_count check is so not to break     The match_count check is so not to break
662     echo "hi" | busybox sed 's/^/!/g' */     echo "hi" | busybox sed 's/^/!/g' */
663   if (!bbg.regmatch[0].rm_so && !bbg.regmatch[0].rm_eo && match_count) {   if (!G.regmatch[0].rm_so && !G.regmatch[0].rm_eo && match_count) {
664   pipe_putc(*oldline++);   pipe_putc(*line++);
665   continue;   continue;
666   }   }
667    
# Line 648  static int do_subst_command(sed_cmd_t *s Line 669  static int do_subst_command(sed_cmd_t *s
669    
670   /* If we aren't interested in this match, output old line to   /* If we aren't interested in this match, output old line to
671     end of match and continue */     end of match and continue */
672   if (sed_cmd->which_match && sed_cmd->which_match != match_count) {   if (sed_cmd->which_match
673   for (i = 0; i < bbg.regmatch[0].rm_eo; i++)   && (sed_cmd->which_match != match_count)
674   pipe_putc(*oldline++);   ) {
675     for (i = 0; i < G.regmatch[0].rm_eo; i++)
676     pipe_putc(*line++);
677   continue;   continue;
678   }   }
679    
680   /* print everything before the match */   /* print everything before the match */
681   for (i = 0; i < bbg.regmatch[0].rm_so; i++)   for (i = 0; i < G.regmatch[0].rm_so; i++)
682   pipe_putc(oldline[i]);   pipe_putc(line[i]);
683    
684   /* then print the substitution string */   /* then print the substitution string */
685   do_subst_w_backrefs(oldline, sed_cmd->string);   do_subst_w_backrefs(line, sed_cmd->string);
686    
687   /* advance past the match */   /* advance past the match */
688   oldline += bbg.regmatch[0].rm_eo;   line += G.regmatch[0].rm_eo;
689   /* flag that something has changed */   /* flag that something has changed */
690   altered++;   altered++;
691    
692   /* if we're not doing this globally, get out now */   /* if we're not doing this globally, get out now */
693   if (sed_cmd->which_match) break;   if (sed_cmd->which_match)
694   } while (*oldline && (regexec(current_regex, oldline, 10, bbg.regmatch, 0) != REG_NOMATCH));   break;
695    
696   /* Copy rest of string into output pipeline */  //maybe (G.regmatch[0].rm_eo ? REG_NOTBOL : 0) instead of unconditional REG_NOTBOL?
697     } while (*line && regexec(current_regex, line, 10, G.regmatch, REG_NOTBOL) != REG_NOMATCH);
698    
699   while (*oldline)   /* Copy rest of string into output pipeline */
700   pipe_putc(*oldline++);   while (1) {
701   pipe_putc(0);   char c = *line++;
702     pipe_putc(c);
703     if (c == '\0')
704     break;
705     }
706    
707   free(*line);   free(*line_p);
708   *line = bbg.pipeline.buf;   *line_p = G.pipeline.buf;
709   return altered;   return altered;
710  }  }
711    
# Line 686  static sed_cmd_t *branch_to(char *label) Line 714  static sed_cmd_t *branch_to(char *label)
714  {  {
715   sed_cmd_t *sed_cmd;   sed_cmd_t *sed_cmd;
716    
717   for (sed_cmd = bbg.sed_cmd_head.next; sed_cmd; sed_cmd = sed_cmd->next) {   for (sed_cmd = G.sed_cmd_head.next; sed_cmd; sed_cmd = sed_cmd->next) {
718   if (sed_cmd->cmd == ':' && sed_cmd->string && !strcmp(sed_cmd->string, label)) {   if (sed_cmd->cmd == ':' && sed_cmd->string && !strcmp(sed_cmd->string, label)) {
719   return sed_cmd;   return sed_cmd;
720   }   }
# Line 696  static sed_cmd_t *branch_to(char *label) Line 724  static sed_cmd_t *branch_to(char *label)
724    
725  static void append(char *s)  static void append(char *s)
726  {  {
727   llist_add_to_end(&bbg.append_head, xstrdup(s));   llist_add_to_end(&G.append_head, xstrdup(s));
728  }  }
729    
730  static void flush_append(void)  static void flush_append(void)
# Line 704  static void flush_append(void) Line 732  static void flush_append(void)
732   char *data;   char *data;
733    
734   /* Output appended lines. */   /* Output appended lines. */
735   while ((data = (char *)llist_pop(&bbg.append_head))) {   while ((data = (char *)llist_pop(&G.append_head))) {
736   fprintf(bbg.nonstdout, "%s\n", data);   fprintf(G.nonstdout, "%s\n", data);
737   free(data);   free(data);
738   }   }
739  }  }
740    
741  static void add_input_file(FILE *file)  static void add_input_file(FILE *file)
742  {  {
743   bbg.input_file_list = xrealloc(bbg.input_file_list,   G.input_file_list = xrealloc_vector(G.input_file_list, 2, G.input_file_count);
744   (bbg.input_file_count + 1) * sizeof(FILE *));   G.input_file_list[G.input_file_count++] = file;
  bbg.input_file_list[bbg.input_file_count++] = file;  
745  }  }
746    
747  /* Get next line of input from bbg.input_file_list, flushing append buffer and  /* Get next line of input from G.input_file_list, flushing append buffer and
748   * noting if we ran out of files without a newline on the last line we read.   * noting if we ran out of files without a newline on the last line we read.
749   */   */
750  static char *get_next_line(int *last_char)  enum {
751     NO_EOL_CHAR = 1,
752     LAST_IS_NUL = 2,
753    };
754    static char *get_next_line(char *gets_char)
755  {  {
756   char *temp = NULL;   char *temp = NULL;
757   int len, lc;   int len;
758     char gc;
759    
  lc = 0;  
760   flush_append();   flush_append();
761   while (bbg.current_input_file < bbg.input_file_count) {  
762     /* will be returned if last line in the file
763     * doesn't end with either '\n' or '\0' */
764     gc = NO_EOL_CHAR;
765     while (G.current_input_file < G.input_file_count) {
766     FILE *fp = G.input_file_list[G.current_input_file];
767   /* Read line up to a newline or NUL byte, inclusive,   /* Read line up to a newline or NUL byte, inclusive,
768   * return malloc'ed char[]. length of the chunk read   * return malloc'ed char[]. length of the chunk read
769   * is stored in len. NULL if EOF/error */   * is stored in len. NULL if EOF/error */
770   temp = bb_get_chunk_from_file(   temp = bb_get_chunk_from_file(fp, &len);
  bbg.input_file_list[bbg.current_input_file], &len);  
771   if (temp) {   if (temp) {
772   /* len > 0 here, it's ok to do temp[len-1] */   /* len > 0 here, it's ok to do temp[len-1] */
773   char c = temp[len-1];   char c = temp[len-1];
774   if (c == '\n' || c == '\0') {   if (c == '\n' || c == '\0') {
775   temp[len-1] = '\0';   temp[len-1] = '\0';
776   lc |= (unsigned char)c;   gc = c;
777   break;   if (c == '\0') {
778     int ch = fgetc(fp);
779     if (ch != EOF)
780     ungetc(ch, fp);
781     else
782     gc = LAST_IS_NUL;
783     }
784   }   }
785   /* will be returned if last line in the file   /* else we put NO_EOL_CHAR into *gets_char */
  * doesn't end with either '\n' or '\0' */  
  lc |= 0x100;  
786   break;   break;
787    
788     /* NB: I had the idea of peeking next file(s) and returning
789     * NO_EOL_CHAR only if it is the *last* non-empty
790     * input file. But there is a case where this won't work:
791     * file1: "a woo\nb woo"
792     * file2: "c no\nd no"
793     * sed -ne 's/woo/bang/p' input1 input2 => "a bang\nb bang"
794     * (note: *no* newline after "b bang"!) */
795   }   }
796   /* Close this file and advance to next one */   /* Close this file and advance to next one */
797   fclose(bbg.input_file_list[bbg.current_input_file++]);   fclose(fp);
798   /* "this is the first line from new input file" */   G.current_input_file++;
  lc |= 0x200;  
799   }   }
800   *last_char = lc;   *gets_char = gc;
801   return temp;   return temp;
802  }  }
803    
804  /* Output line of text. */  /* Output line of text. */
805  /* Note:  /* Note:
806   * The tricks with 0x200 and last_puts_char are there to emulate gnu sed.   * The tricks with NO_EOL_CHAR and last_puts_char are there to emulate gnu sed.
807   * Without them, we had this:   * Without them, we had this:
808   * echo -n thingy >z1   * echo -n thingy >z1
809   * echo -n again >z2   * echo -n again >z2
# Line 769  static char *get_next_line(int *last_cha Line 815  static char *get_next_line(int *last_cha
815   * bbox:   * bbox:
816   * 00000000  74 68 7a 6e 67 79 61 67  61 7a 6e                 |thzngyagazn|   * 00000000  74 68 7a 6e 67 79 61 67  61 7a 6e                 |thzngyagazn|
817   */   */
818    static void puts_maybe_newline(char *s, FILE *file, char *last_puts_char, char last_gets_char)
 static int puts_maybe_newline(char *s, FILE *file, int prev_last_char, int last_char)  
819  {  {
820   static char last_puts_char;   char lpc = *last_puts_char;
821    
822   /* Is this a first line from new file   /* Need to insert a '\n' between two files because first file's
823   * and old file didn't end with '\n'? */   * last line wasn't terminated? */
824   if ((last_char & 0x200) && last_puts_char != '\n') {   if (lpc != '\n' && lpc != '\0') {
825   fputc('\n', file);   fputc('\n', file);
826   last_puts_char = '\n';   lpc = '\n';
827   }   }
828   fputs(s, file);   fputs(s, file);
829   /* why 'x'? - just something which is not '\n' */  
830     /* 'x' - just something which is not '\n', '\0' or NO_EOL_CHAR */
831   if (s[0])   if (s[0])
832   last_puts_char = 'x';   lpc = 'x';
833   if (!(last_char & 0x100)) { /* had trailing '\n' or '\0'? */  
834   last_char &= 0xff;   /* had trailing '\0' and it was last char of file? */
835   fputc(last_char, file);   if (last_gets_char == LAST_IS_NUL) {
836   last_puts_char = last_char;   fputc('\0', file);
837     lpc = 'x'; /* */
838     } else
839     /* had trailing '\n' or '\0'? */
840     if (last_gets_char != NO_EOL_CHAR) {
841     fputc(last_gets_char, file);
842     lpc = last_gets_char;
843   }   }
844    
845   if (ferror(file)) {   if (ferror(file)) {
846   xfunc_error_retval = 4;  /* It's what gnu sed exits with... */   xfunc_error_retval = 4;  /* It's what gnu sed exits with... */
847   bb_error_msg_and_die(bb_msg_write_error);   bb_error_msg_and_die(bb_msg_write_error);
848   }   }
849     *last_puts_char = lpc;
  return last_char;  
850  }  }
851    
852  #define sed_puts(s, n) \  #define sed_puts(s, n) (puts_maybe_newline(s, G.nonstdout, &last_puts_char, n))
853   (prev_last_char = puts_maybe_newline(s, bbg.nonstdout, prev_last_char, n))  
854    static int beg_match(sed_cmd_t *sed_cmd, const char *pattern_space)
855    {
856     int retval = sed_cmd->beg_match && !regexec(sed_cmd->beg_match, pattern_space, 0, NULL, 0);
857     if (retval)
858     G.previous_regex_ptr = sed_cmd->beg_match;
859     return retval;
860    }
861    
862  /* Process all the lines in all the files */  /* Process all the lines in all the files */
863    
864  static void process_files(void)  static void process_files(void)
865  {  {
866   char *pattern_space, *next_line;   char *pattern_space, *next_line;
867   int linenum = 0, prev_last_char = 0;   int linenum = 0;
868   int last_char, next_last_char = 0;   char last_puts_char = '\n';
869     char last_gets_char, next_gets_char;
870   sed_cmd_t *sed_cmd;   sed_cmd_t *sed_cmd;
871   int substituted;   int substituted;
872    
873   /* Prime the pump */   /* Prime the pump */
874   next_line = get_next_line(&next_last_char);   next_line = get_next_line(&next_gets_char);
875    
876   /* go through every line in each file */   /* Go through every line in each file */
877  again:   again:
878   substituted = 0;   substituted = 0;
879    
880   /* Advance to next line.  Stop if out of lines. */   /* Advance to next line.  Stop if out of lines. */
881   pattern_space = next_line;   pattern_space = next_line;
882   if (!pattern_space) return;   if (!pattern_space)
883   last_char = next_last_char;   return;
884     last_gets_char = next_gets_char;
885    
886   /* Read one line in advance so we can act on the last line,   /* Read one line in advance so we can act on the last line,
887   * the '$' address */   * the '$' address */
888   next_line = get_next_line(&next_last_char);   next_line = get_next_line(&next_gets_char);
889   linenum++;   linenum++;
890  restart:  
891   /* for every line, go through all the commands */   /* For every line, go through all the commands */
892   for (sed_cmd = bbg.sed_cmd_head.next; sed_cmd; sed_cmd = sed_cmd->next) {   restart:
893     for (sed_cmd = G.sed_cmd_head.next; sed_cmd; sed_cmd = sed_cmd->next) {
894   int old_matched, matched;   int old_matched, matched;
895    
896   old_matched = sed_cmd->in_match;   old_matched = sed_cmd->in_match;
897    
898   /* Determine if this command matches this line: */   /* Determine if this command matches this line: */
899    
900     //bb_error_msg("match1:%d", sed_cmd->in_match);
901     //bb_error_msg("match2:%d", (!sed_cmd->beg_line && !sed_cmd->end_line
902     // && !sed_cmd->beg_match && !sed_cmd->end_match));
903     //bb_error_msg("match3:%d", (sed_cmd->beg_line > 0
904     // && (sed_cmd->end_line || sed_cmd->end_match
905     //    ? (sed_cmd->beg_line <= linenum)
906     //    : (sed_cmd->beg_line == linenum)
907     //    )
908     // )
909     //bb_error_msg("match4:%d", (beg_match(sed_cmd, pattern_space)));
910     //bb_error_msg("match5:%d", (sed_cmd->beg_line == -1 && next_line == NULL));
911    
912   /* Are we continuing a previous multi-line match? */   /* Are we continuing a previous multi-line match? */
913   sed_cmd->in_match = sed_cmd->in_match   sed_cmd->in_match = sed_cmd->in_match
914   /* Or is no range necessary? */   /* Or is no range necessary? */
915   || (!sed_cmd->beg_line && !sed_cmd->end_line   || (!sed_cmd->beg_line && !sed_cmd->end_line
916   && !sed_cmd->beg_match && !sed_cmd->end_match)   && !sed_cmd->beg_match && !sed_cmd->end_match)
917   /* Or did we match the start of a numerical range? */   /* Or did we match the start of a numerical range? */
918   || (sed_cmd->beg_line > 0 && (sed_cmd->beg_line == linenum))   || (sed_cmd->beg_line > 0
919        && (sed_cmd->end_line || sed_cmd->end_match
920      /* note: even if end is numeric and is < linenum too,
921       * GNU sed matches! We match too */
922     ? (sed_cmd->beg_line <= linenum)    /* N,end */
923     : (sed_cmd->beg_line == linenum)    /* N */
924     )
925        )
926   /* Or does this line match our begin address regex? */   /* Or does this line match our begin address regex? */
927   || (sed_cmd->beg_match &&   || (beg_match(sed_cmd, pattern_space))
     !regexec(sed_cmd->beg_match, pattern_space, 0, NULL, 0))  
928   /* Or did we match last line of input? */   /* Or did we match last line of input? */
929   || (sed_cmd->beg_line == -1 && next_line == NULL);   || (sed_cmd->beg_line == -1 && next_line == NULL);
930    
931   /* Snapshot the value */   /* Snapshot the value */
   
932   matched = sed_cmd->in_match;   matched = sed_cmd->in_match;
933    
934     //bb_error_msg("cmd:'%c' matched:%d beg_line:%d end_line:%d linenum:%d",
935     //sed_cmd->cmd, matched, sed_cmd->beg_line, sed_cmd->end_line, linenum);
936    
937   /* Is this line the end of the current match? */   /* Is this line the end of the current match? */
938    
939   if (matched) {   if (matched) {
940     /* once matched, "n,xxx" range is dead, disabling it */
941     if (sed_cmd->beg_line > 0)
942     sed_cmd->beg_line = -2;
943   sed_cmd->in_match = !(   sed_cmd->in_match = !(
944   /* has the ending line come, or is this a single address command? */   /* has the ending line come, or is this a single address command? */
945   (sed_cmd->end_line ?   (sed_cmd->end_line ?
# Line 867  restart: Line 951  restart:
951   /* or does this line matches our last address regex */   /* or does this line matches our last address regex */
952   || (sed_cmd->end_match && old_matched   || (sed_cmd->end_match && old_matched
953       && (regexec(sed_cmd->end_match,       && (regexec(sed_cmd->end_match,
954               pattern_space, 0, NULL, 0) == 0))                   pattern_space, 0, NULL, 0) == 0))
955   );   );
956   }   }
957    
958   /* Skip blocks of commands we didn't match. */   /* Skip blocks of commands we didn't match */
959   if (sed_cmd->cmd == '{') {   if (sed_cmd->cmd == '{') {
960   if (sed_cmd->invert ? matched : !matched)   if (sed_cmd->invert ? matched : !matched) {
961   while (sed_cmd && sed_cmd->cmd != '}')   unsigned nest_cnt = 0;
962     while (1) {
963     if (sed_cmd->cmd == '{')
964     nest_cnt++;
965     if (sed_cmd->cmd == '}') {
966     nest_cnt--;
967     if (nest_cnt == 0)
968     break;
969     }
970   sed_cmd = sed_cmd->next;   sed_cmd = sed_cmd->next;
971   if (!sed_cmd) bb_error_msg_and_die("unterminated {");   if (!sed_cmd)
972     bb_error_msg_and_die("unterminated {");
973     }
974     }
975   continue;   continue;
976   }   }
977    
978   /* Okay, so did this line match? */   /* Okay, so did this line match? */
979   if (sed_cmd->invert ? !matched : matched) {   if (sed_cmd->invert ? matched : !matched)
980   /* Update last used regex in case a blank substitute BRE is found */   continue; /* no */
  if (sed_cmd->beg_match) {  
  bbg.previous_regex_ptr = sed_cmd->beg_match;  
  }  
981    
982   /* actual sedding */   /* Update last used regex in case a blank substitute BRE is found */
983   switch (sed_cmd->cmd) {   if (sed_cmd->beg_match) {
984     G.previous_regex_ptr = sed_cmd->beg_match;
985     }
986    
987   /* Print line number */   /* actual sedding */
988   case '=':   switch (sed_cmd->cmd) {
  fprintf(bbg.nonstdout, "%d\n", linenum);  
  break;  
989    
990   /* Write the current pattern space up to the first newline */   /* Print line number */
991   case 'P':   case '=':
992   {   fprintf(G.nonstdout, "%d\n", linenum);
993   char *tmp = strchr(pattern_space, '\n');   break;
   
  if (tmp) {  
  *tmp = '\0';  
  sed_puts(pattern_space, 1);  
  *tmp = '\n';  
  break;  
  }  
  /* Fall Through */  
  }  
994    
995   /* Write the current pattern space to output */   /* Write the current pattern space up to the first newline */
996   case 'p':   case 'P':
997   sed_puts(pattern_space, last_char);   {
998     char *tmp = strchr(pattern_space, '\n');
999     if (tmp) {
1000     *tmp = '\0';
1001     /* TODO: explain why '\n' below */
1002     sed_puts(pattern_space, '\n');
1003     *tmp = '\n';
1004   break;   break;
  /* Delete up through first newline */  
  case 'D':  
  {  
  char *tmp = strchr(pattern_space, '\n');  
   
  if (tmp) {  
  tmp = xstrdup(tmp+1);  
  free(pattern_space);  
  pattern_space = tmp;  
  goto restart;  
  }  
1005   }   }
1006   /* discard this line. */   /* Fall Through */
1007   case 'd':   }
  goto discard_line;  
1008    
1009   /* Substitute with regex */   /* Write the current pattern space to output */
1010   case 's':   case 'p':
1011   if (!do_subst_command(sed_cmd, &pattern_space))   /* NB: we print this _before_ the last line
1012   break;   * (of current file) is printed. Even if
1013   substituted |= 1;   * that line is nonterminated, we print
1014     * '\n' here (gnu sed does the same) */
1015   /* handle p option */   sed_puts(pattern_space, '\n');
1016   if (sed_cmd->sub_p)   break;
1017   sed_puts(pattern_space, last_char);   /* Delete up through first newline */
1018   /* handle w option */   case 'D':
1019   if (sed_cmd->file)   {
1020   sed_cmd->last_char = puts_maybe_newline(   char *tmp = strchr(pattern_space, '\n');
1021   pattern_space, sed_cmd->file,   if (tmp) {
1022   sed_cmd->last_char, last_char);   overlapping_strcpy(pattern_space, tmp + 1);
1023     goto restart;
1024     }
1025     }
1026     /* discard this line. */
1027     case 'd':
1028     goto discard_line;
1029    
1030     /* Substitute with regex */
1031     case 's':
1032     if (!do_subst_command(sed_cmd, &pattern_space))
1033   break;   break;
1034     substituted |= 1;
1035    
1036   /* Append line to linked list to be printed later */   /* handle p option */
1037   case 'a':   if (sed_cmd->sub_p)
1038   append(sed_cmd->string);   sed_puts(pattern_space, last_gets_char);
1039   break;   /* handle w option */
1040     if (sed_cmd->sw_file)
1041     puts_maybe_newline(
1042     pattern_space, sed_cmd->sw_file,
1043     &sed_cmd->sw_last_char, last_gets_char);
1044     break;
1045    
1046   /* Insert text before this line */   /* Append line to linked list to be printed later */
1047   case 'i':   case 'a':
1048   sed_puts(sed_cmd->string, 1);   append(sed_cmd->string);
1049   break;   break;
1050    
1051   /* Cut and paste text (replace) */   /* Insert text before this line */
1052   case 'c':   case 'i':
1053   /* Only triggers on last line of a matching range. */   sed_puts(sed_cmd->string, '\n');
1054   if (!sed_cmd->in_match)   break;
  sed_puts(sed_cmd->string, 0);  
  goto discard_line;  
1055    
1056   /* Read file, append contents to output */   /* Cut and paste text (replace) */
1057   case 'r':   case 'c':
1058   {   /* Only triggers on last line of a matching range. */
1059   FILE *rfile;   if (!sed_cmd->in_match)
1060     sed_puts(sed_cmd->string, '\n');
1061   rfile = fopen(sed_cmd->string, "r");   goto discard_line;
  if (rfile) {  
  char *line;  
   
  while ((line = xmalloc_getline(rfile))  
  != NULL)  
  append(line);  
  xprint_and_close_file(rfile);  
  }  
1062    
1063   break;   /* Read file, append contents to output */
1064     case 'r':
1065     {
1066     FILE *rfile;
1067     rfile = fopen_for_read(sed_cmd->string);
1068     if (rfile) {
1069     char *line;
1070    
1071     while ((line = xmalloc_fgetline(rfile))
1072     != NULL)
1073     append(line);
1074     xprint_and_close_file(rfile);
1075   }   }
1076    
1077   /* Write pattern space to file. */   break;
1078   case 'w':   }
1079   sed_cmd->last_char = puts_maybe_newline(  
1080   pattern_space, sed_cmd->file,   /* Write pattern space to file. */
1081   sed_cmd->last_char, last_char);   case 'w':
1082     puts_maybe_newline(
1083     pattern_space, sed_cmd->sw_file,
1084     &sed_cmd->sw_last_char, last_gets_char);
1085     break;
1086    
1087     /* Read next line from input */
1088     case 'n':
1089     if (!G.be_quiet)
1090     sed_puts(pattern_space, last_gets_char);
1091     if (next_line) {
1092     free(pattern_space);
1093     pattern_space = next_line;
1094     last_gets_char = next_gets_char;
1095     next_line = get_next_line(&next_gets_char);
1096     substituted = 0;
1097     linenum++;
1098   break;   break;
1099     }
1100     /* fall through */
1101    
1102   /* Read next line from input */   /* Quit.  End of script, end of input. */
1103   case 'n':   case 'q':
1104   if (!bbg.be_quiet)   /* Exit the outer while loop */
1105   sed_puts(pattern_space, last_char);   free(next_line);
1106   if (next_line) {   next_line = NULL;
1107   free(pattern_space);   goto discard_commands;
  pattern_space = next_line;  
  last_char = next_last_char;  
  next_line = get_next_line(&next_last_char);  
  linenum++;  
  break;  
  }  
  /* fall through */  
1108    
1109   /* Quit.  End of script, end of input. */   /* Append the next line to the current line */
1110   case 'q':   case 'N':
1111   /* Exit the outer while loop */   {
1112     int len;
1113     /* If no next line, jump to end of script and exit. */
1114     if (next_line == NULL) {
1115   free(next_line);   free(next_line);
1116   next_line = NULL;   next_line = NULL;
1117   goto discard_commands;   goto discard_line;
   
  /* Append the next line to the current line */  
  case 'N':  
  {  
  int len;  
  /* If no next line, jump to end of script and exit. */  
  if (next_line == NULL) {  
  /* Jump to end of script and exit */  
  free(next_line);  
  next_line = NULL;  
  goto discard_line;  
  /* append next_line, read new next_line. */  
  }  
  len = strlen(pattern_space);  
  pattern_space = realloc(pattern_space, len + strlen(next_line) + 2);  
  pattern_space[len] = '\n';  
  strcpy(pattern_space + len+1, next_line);  
  last_char = next_last_char;  
  next_line = get_next_line(&next_last_char);  
  linenum++;  
  break;  
1118   }   }
1119     /* Append next_line, read new next_line. */
1120     len = strlen(pattern_space);
1121     pattern_space = xrealloc(pattern_space, len + strlen(next_line) + 2);
1122     pattern_space[len] = '\n';
1123     strcpy(pattern_space + len+1, next_line);
1124     last_gets_char = next_gets_char;
1125     next_line = get_next_line(&next_gets_char);
1126     linenum++;
1127     break;
1128     }
1129    
1130   /* Test/branch if substitution occurred */   /* Test/branch if substitution occurred */
1131   case 't':   case 't':
1132   if (!substituted) break;   if (!substituted) break;
1133   substituted = 0;   substituted = 0;
1134   /* Fall through */   /* Fall through */
1135   /* Test/branch if substitution didn't occur */   /* Test/branch if substitution didn't occur */
1136   case 'T':   case 'T':
1137   if (substituted) break;   if (substituted) break;
1138   /* Fall through */   /* Fall through */
1139   /* Branch to label */   /* Branch to label */
1140   case 'b':   case 'b':
1141   if (!sed_cmd->string) goto discard_commands;   if (!sed_cmd->string) goto discard_commands;
1142   else sed_cmd = branch_to(sed_cmd->string);   else sed_cmd = branch_to(sed_cmd->string);
1143   break;   break;
1144   /* Transliterate characters */   /* Transliterate characters */
1145   case 'y':   case 'y':
1146   {   {
1147   int i, j;   int i, j;
1148     for (i = 0; pattern_space[i]; i++) {
1149   for (i = 0; pattern_space[i]; i++) {   for (j = 0; sed_cmd->string[j]; j += 2) {
1150   for (j = 0; sed_cmd->string[j]; j += 2) {   if (pattern_space[i] == sed_cmd->string[j]) {
1151   if (pattern_space[i] == sed_cmd->string[j]) {   pattern_space[i] = sed_cmd->string[j + 1];
1152   pattern_space[i] = sed_cmd->string[j + 1];   break;
  break;  
  }  
1153   }   }
1154   }   }
   
  break;  
1155   }   }
  case 'g': /* Replace pattern space with hold space */  
  free(pattern_space);  
  pattern_space = xstrdup(bbg.hold_space ? bbg.hold_space : "");  
  break;  
  case 'G': /* Append newline and hold space to pattern space */  
  {  
  int pattern_space_size = 2;  
  int hold_space_size = 0;  
   
  if (pattern_space)  
  pattern_space_size += strlen(pattern_space);  
  if (bbg.hold_space)  
  hold_space_size = strlen(bbg.hold_space);  
  pattern_space = xrealloc(pattern_space,  
  pattern_space_size + hold_space_size);  
  if (pattern_space_size == 2)  
  pattern_space[0] = 0;  
  strcat(pattern_space, "\n");  
  if (bbg.hold_space)  
  strcat(pattern_space, bbg.hold_space);  
  last_char = '\n';  
1156    
1157   break;   break;
1158   }   }
1159   case 'h': /* Replace hold space with pattern space */   case 'g': /* Replace pattern space with hold space */
1160   free(bbg.hold_space);   free(pattern_space);
1161   bbg.hold_space = xstrdup(pattern_space);   pattern_space = xstrdup(G.hold_space ? G.hold_space : "");
1162   break;   break;
1163   case 'H': /* Append newline and pattern space to hold space */   case 'G': /* Append newline and hold space to pattern space */
1164   {   {
1165   int hold_space_size = 2;   int pattern_space_size = 2;
1166   int pattern_space_size = 0;   int hold_space_size = 0;
   
  if (bbg.hold_space)  
  hold_space_size += strlen(bbg.hold_space);  
  if (pattern_space)  
  pattern_space_size = strlen(pattern_space);  
  bbg.hold_space = xrealloc(bbg.hold_space,  
  hold_space_size + pattern_space_size);  
   
  if (hold_space_size == 2)  
  *bbg.hold_space = 0;  
  strcat(bbg.hold_space, "\n");  
  if (pattern_space)  
  strcat(bbg.hold_space, pattern_space);  
1167    
1168   break;   if (pattern_space)
1169   }   pattern_space_size += strlen(pattern_space);
1170   case 'x': /* Exchange hold and pattern space */   if (G.hold_space)
1171   {   hold_space_size = strlen(G.hold_space);
1172   char *tmp = pattern_space;   pattern_space = xrealloc(pattern_space,
1173   pattern_space = bbg.hold_space ? : xzalloc(1);   pattern_space_size + hold_space_size);
1174   last_char = '\n';   if (pattern_space_size == 2)
1175   bbg.hold_space = tmp;   pattern_space[0] = 0;
1176   break;   strcat(pattern_space, "\n");
1177   }   if (G.hold_space)
1178   }   strcat(pattern_space, G.hold_space);
1179     last_gets_char = '\n';
1180    
1181     break;
1182   }   }
1183   }   case 'h': /* Replace hold space with pattern space */
1184     free(G.hold_space);
1185     G.hold_space = xstrdup(pattern_space);
1186     break;
1187     case 'H': /* Append newline and pattern space to hold space */
1188     {
1189     int hold_space_size = 2;
1190     int pattern_space_size = 0;
1191    
1192     if (G.hold_space)
1193     hold_space_size += strlen(G.hold_space);
1194     if (pattern_space)
1195     pattern_space_size = strlen(pattern_space);
1196     G.hold_space = xrealloc(G.hold_space,
1197     hold_space_size + pattern_space_size);
1198    
1199     if (hold_space_size == 2)
1200     *G.hold_space = 0;
1201     strcat(G.hold_space, "\n");
1202     if (pattern_space)
1203     strcat(G.hold_space, pattern_space);
1204    
1205     break;
1206     }
1207     case 'x': /* Exchange hold and pattern space */
1208     {
1209     char *tmp = pattern_space;
1210     pattern_space = G.hold_space ? G.hold_space : xzalloc(1);
1211     last_gets_char = '\n';
1212     G.hold_space = tmp;
1213     break;
1214     }
1215     } /* switch */
1216     } /* for each cmd */
1217    
1218   /*   /*
1219   * exit point from sedding...   * Exit point from sedding...
1220   */   */
1221  discard_commands:   discard_commands:
1222   /* we will print the line unless we were told to be quiet ('-n')   /* we will print the line unless we were told to be quiet ('-n')
1223     or if the line was suppressed (ala 'd'elete) */     or if the line was suppressed (ala 'd'elete) */
1224   if (!bbg.be_quiet) sed_puts(pattern_space, last_char);   if (!G.be_quiet)
1225     sed_puts(pattern_space, last_gets_char);
1226    
1227   /* Delete and such jump here. */   /* Delete and such jump here. */
1228  discard_line:   discard_line:
1229   flush_append();   flush_append();
1230   free(pattern_space);   free(pattern_space);
1231    
# Line 1137  discard_line: Line 1233  discard_line:
1233  }  }
1234    
1235  /* It is possible to have a command line argument with embedded  /* It is possible to have a command line argument with embedded
1236     newlines.  This counts as multiple command lines. */   * newlines.  This counts as multiple command lines.
1237     * However, newline can be escaped: 's/e/z\<newline>z/'
1238     * We check for this.
1239     */
1240    
1241  static void add_cmd_block(char *cmdstr)  static void add_cmd_block(char *cmdstr)
1242  {  {
1243   int go = 1;   char *sv, *eol;
  char *temp = xstrdup(cmdstr), *temp2 = temp;  
   
  while (go) {  
  int len = strcspn(temp2, "\n");  
  if (!temp2[len]) go = 0;  
  else temp2[len] = 0;  
  add_cmd(temp2);  
  temp2 += len+1;  
  }  
  free(temp);  
 }  
   
 static void add_cmds_link(llist_t *opt_e)  
 {  
  if (!opt_e) return;  
  add_cmds_link(opt_e->link);  
  add_cmd_block(opt_e->data);  
  free(opt_e);  
 }  
1244    
1245  static void add_files_link(llist_t *opt_f)   cmdstr = sv = xstrdup(cmdstr);
1246  {   do {
1247   char *line;   eol = strchr(cmdstr, '\n');
1248   FILE *cmdfile;   next:
1249   if (!opt_f) return;   if (eol) {
1250   add_files_link(opt_f->link);   /* Count preceding slashes */
1251   cmdfile = xfopen(opt_f->data, "r");   int slashes = 0;
1252   while ((line = xmalloc_getline(cmdfile)) != NULL) {   char *sl = eol;
1253   add_cmd(line);  
1254   free(line);   while (sl != cmdstr && *--sl == '\\')
1255   }   slashes++;
1256   xprint_and_close_file(cmdfile);   /* Odd number of preceding slashes - newline is escaped */
1257   free(opt_f);   if (slashes & 1) {
1258     overlapping_strcpy(eol - 1, eol);
1259     eol = strchr(eol, '\n');
1260     goto next;
1261     }
1262     *eol = '\0';
1263     }
1264     add_cmd(cmdstr);
1265     cmdstr = eol + 1;
1266     } while (eol);
1267     free(sv);
1268  }  }
1269    
1270  int sed_main(int argc, char **argv)  int sed_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
1271    int sed_main(int argc UNUSED_PARAM, char **argv)
1272  {  {
1273   enum {   enum {
1274   OPT_in_place = 1 << 0,   OPT_in_place = 1 << 0,
# Line 1186  int sed_main(int argc, char **argv) Line 1277  int sed_main(int argc, char **argv)
1277   llist_t *opt_e, *opt_f;   llist_t *opt_e, *opt_f;
1278   int status = EXIT_SUCCESS;   int status = EXIT_SUCCESS;
1279    
1280   bbg.sed_cmd_tail = &bbg.sed_cmd_head;   INIT_G();
1281    
1282   /* destroy command strings on exit */   /* destroy command strings on exit */
1283   if (ENABLE_FEATURE_CLEAN_UP) atexit(sed_free_and_close_stuff);   if (ENABLE_FEATURE_CLEAN_UP) atexit(sed_free_and_close_stuff);
1284    
1285   /* Lie to autoconf when it starts asking stupid questions. */   /* Lie to autoconf when it starts asking stupid questions. */
1286   if (argc == 2 && !strcmp(argv[1], "--version")) {   if (argv[1] && !strcmp(argv[1], "--version")) {
1287   puts("This is not GNU sed version 4.0");   puts("This is not GNU sed version 4.0");
1288   return 0;   return 0;
1289   }   }
# Line 1201  int sed_main(int argc, char **argv) Line 1292  int sed_main(int argc, char **argv)
1292   opt_e = opt_f = NULL;   opt_e = opt_f = NULL;
1293   opt_complementary = "e::f::" /* can occur multiple times */   opt_complementary = "e::f::" /* can occur multiple times */
1294                      "nn"; /* count -n */                      "nn"; /* count -n */
1295   opt = getopt32(argc, argv, "irne:f:", &opt_e, &opt_f,   opt = getopt32(argv, "irne:f:", &opt_e, &opt_f,
1296      &bbg.be_quiet); /* counter for -n */      &G.be_quiet); /* counter for -n */
1297   argc -= optind;   //argc -= optind;
1298   argv += optind;   argv += optind;
1299   if (opt & OPT_in_place) { // -i   if (opt & OPT_in_place) { // -i
1300   atexit(cleanup_outname);   atexit(cleanup_outname);
1301   }   }
1302   if (opt & 0x2) bbg.regex_type |= REG_EXTENDED; // -r   if (opt & 0x2) G.regex_type |= REG_EXTENDED; // -r
1303   //if (opt & 0x4) bbg.be_quiet++; // -n   //if (opt & 0x4) G.be_quiet++; // -n
1304   if (opt & 0x8) { // -e   while (opt_e) { // -e
1305   /* getopt32 reverses order of arguments, handle it */   add_cmd_block(llist_pop(&opt_e));
1306   add_cmds_link(opt_e);   }
1307   }   while (opt_f) { // -f
1308   if (opt & 0x10) { // -f   char *line;
1309   /* getopt32 reverses order of arguments, handle it */   FILE *cmdfile;
1310   add_files_link(opt_f);   cmdfile = xfopen_for_read(llist_pop(&opt_f));
1311     while ((line = xmalloc_fgetline(cmdfile)) != NULL) {
1312     add_cmd(line);
1313     free(line);
1314     }
1315     fclose(cmdfile);
1316   }   }
1317   /* if we didn't get a pattern from -e or -f, use argv[0] */   /* if we didn't get a pattern from -e or -f, use argv[0] */
1318   if (!(opt & 0x18)) {   if (!(opt & 0x18)) {
1319   if (!argc)   if (!*argv)
1320   bb_show_usage();   bb_show_usage();
1321   add_cmd_block(*argv++);   add_cmd_block(*argv++);
  argc--;  
1322   }   }
1323   /* Flush any unfinished commands. */   /* Flush any unfinished commands. */
1324   add_cmd("");   add_cmd("");
1325    
1326   /* By default, we write to stdout */   /* By default, we write to stdout */
1327   bbg.nonstdout = stdout;   G.nonstdout = stdout;
1328    
1329   /* argv[0..(argc-1)] should be names of file to process. If no   /* argv[0..(argc-1)] should be names of file to process. If no
1330   * files were specified or '-' was specified, take input from stdin.   * files were specified or '-' was specified, take input from stdin.
# Line 1238  int sed_main(int argc, char **argv) Line 1333  int sed_main(int argc, char **argv)
1333   if (opt & OPT_in_place)   if (opt & OPT_in_place)
1334   bb_error_msg_and_die(bb_msg_requires_arg, "-i");   bb_error_msg_and_die(bb_msg_requires_arg, "-i");
1335   add_input_file(stdin);   add_input_file(stdin);
  process_files();  
1336   } else {   } else {
1337   int i;   int i;
1338   FILE *file;   FILE *file;
1339    
1340   for (i = 0; i < argc; i++) {   for (i = 0; argv[i]; i++) {
1341   struct stat statbuf;   struct stat statbuf;
1342   int nonstdoutfd;   int nonstdoutfd;
1343    
# Line 1262  int sed_main(int argc, char **argv) Line 1356  int sed_main(int argc, char **argv)
1356   continue;   continue;
1357   }   }
1358    
1359   bbg.outname = xasprintf("%sXXXXXX", argv[i]);   G.outname = xasprintf("%sXXXXXX", argv[i]);
1360   nonstdoutfd = mkstemp(bbg.outname);   nonstdoutfd = mkstemp(G.outname);
1361   if (-1 == nonstdoutfd)   if (-1 == nonstdoutfd)
1362   bb_error_msg_and_die("no temp file");   bb_perror_msg_and_die("can't create temp file %s", G.outname);
1363   bbg.nonstdout = fdopen(nonstdoutfd, "w");   G.nonstdout = xfdopen_for_write(nonstdoutfd);
   
  /* Set permissions of output file */  
1364    
1365     /* Set permissions/owner of output file */
1366   fstat(fileno(file), &statbuf);   fstat(fileno(file), &statbuf);
1367     /* chmod'ing AFTER chown would preserve suid/sgid bits,
1368     * but GNU sed 4.2.1 does not preserve them either */
1369   fchmod(nonstdoutfd, statbuf.st_mode);   fchmod(nonstdoutfd, statbuf.st_mode);
1370     fchown(nonstdoutfd, statbuf.st_uid, statbuf.st_gid);
1371   add_input_file(file);   add_input_file(file);
1372   process_files();   process_files();
1373   fclose(bbg.nonstdout);   fclose(G.nonstdout);
1374    
1375   bbg.nonstdout = stdout;   G.nonstdout = stdout;
1376   /* unlink(argv[i]); */   /* unlink(argv[i]); */
1377   // FIXME: error check / message?   xrename(G.outname, argv[i]);
1378   rename(bbg.outname, argv[i]);   free(G.outname);
1379   free(bbg.outname);   G.outname = NULL;
1380   bbg.outname = 0;   }
1381   }   /* Here, to handle "sed 'cmds' nonexistent_file" case we did:
1382   if (bbg.input_file_count > bbg.current_input_file)   * if (G.current_input_file >= G.input_file_count)
1383   process_files();   * return status;
1384     * but it's not needed since process_files() works correctly
1385     * in this case too. */
1386   }   }
1387     process_files();
1388    
1389   return status;   return status;
1390  }  }

Legend:
Removed from v.532  
changed lines
  Added in v.1123