Magellan Linux

Diff of /trunk/mkinitrd-magellan/busybox/editors/sed.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 532 by niro, Sat Sep 1 22:45:15 2007 UTC revision 1126 by niro, Wed Aug 18 22:00:28 2010 UTC
# Line 5  Line 5 
5   * Copyright (C) 1999,2000,2001 by Lineo, inc. and Mark Whitley   * Copyright (C) 1999,2000,2001 by Lineo, inc. and Mark Whitley
6   * Copyright (C) 1999,2000,2001 by Mark Whitley <markw@codepoet.org>   * Copyright (C) 1999,2000,2001 by Mark Whitley <markw@codepoet.org>
7   * Copyright (C) 2002  Matt Kraai   * Copyright (C) 2002  Matt Kraai
8   * Copyright (C) 2003 by Glenn McGrath <bug1@iinet.net.au>   * Copyright (C) 2003 by Glenn McGrath
9   * Copyright (C) 2003,2004 by Rob Landley <rob@landley.net>   * Copyright (C) 2003,2004 by Rob Landley <rob@landley.net>
10   *   *
11   * MAINTAINER: Rob Landley <rob@landley.net>   * MAINTAINER: Rob Landley <rob@landley.net>
# Line 21  Line 21 
21    add_cmd() is called on each line of sed command text (from a file or from    add_cmd() is called on each line of sed command text (from a file or from
22    the command line).  It calls get_address() and parse_cmd_args().  The    the command line).  It calls get_address() and parse_cmd_args().  The
23    resulting sed_cmd_t structures are appended to a linked list    resulting sed_cmd_t structures are appended to a linked list
24    (bbg.sed_cmd_head/bbg.sed_cmd_tail).    (G.sed_cmd_head/G.sed_cmd_tail).
25    
26    add_input_file() adds a FILE * to the list of input files.  We need to    add_input_file() adds a FILE* to the list of input files.  We need to
27    know all input sources ahead of time to find the last line for the $ match.    know all input sources ahead of time to find the last line for the $ match.
28    
29    process_files() does actual sedding, reading data lines from each input FILE *    process_files() does actual sedding, reading data lines from each input FILE *
# Line 58  Line 58 
58   Reference http://www.opengroup.org/onlinepubs/007904975/utilities/sed.html   Reference http://www.opengroup.org/onlinepubs/007904975/utilities/sed.html
59  */  */
60    
61  #include "busybox.h"  #include "libbb.h"
62  #include "xregex.h"  #include "xregex.h"
63    
64    enum {
65     OPT_in_place = 1 << 0,
66    };
67    
68  /* Each sed command turns into one of these structures. */  /* Each sed command turns into one of these structures. */
69  typedef struct sed_cmd_s {  typedef struct sed_cmd_s {
70   /* Ordered by alignment requirements: currently 36 bytes on x86 */   /* Ordered by alignment requirements: currently 36 bytes on x86 */
71     struct sed_cmd_s *next; /* Next command (linked list, NULL terminated) */
72    
73   /* address storage */   /* address storage */
74   regex_t *beg_match;     /* sed -e '/match/cmd' */   regex_t *beg_match;     /* sed -e '/match/cmd' */
# Line 72  typedef struct sed_cmd_s { Line 77  typedef struct sed_cmd_s {
77   int beg_line;           /* 'sed 1p'   0 == apply commands to all lines */   int beg_line;           /* 'sed 1p'   0 == apply commands to all lines */
78   int end_line;           /* 'sed 1,3p' 0 == one line only. -1 = last line ($) */   int end_line;           /* 'sed 1,3p' 0 == one line only. -1 = last line ($) */
79    
80   FILE *file;             /* File (sw) command writes to, -1 for none. */   FILE *sw_file;          /* File (sw) command writes to, -1 for none. */
81   char *string;           /* Data string for (saicytb) commands. */   char *string;           /* Data string for (saicytb) commands. */
82    
83   unsigned short which_match;     /* (s) Which match to replace (0 for all) */   unsigned which_match;   /* (s) Which match to replace (0 for all) */
84    
85   /* Bitfields (gcc won't group them if we don't) */   /* Bitfields (gcc won't group them if we don't) */
86   unsigned int invert:1;          /* the '!' after the address */   unsigned invert:1;      /* the '!' after the address */
87   unsigned int in_match:1;        /* Next line also included in match? */   unsigned in_match:1;    /* Next line also included in match? */
88   unsigned int sub_p:1;           /* (s) print option */   unsigned sub_p:1;       /* (s) print option */
89    
90   int last_char;                  /* Last line written by (sw) had no '\n' */   char sw_last_char;      /* Last line written by (sw) had no '\n' */
91    
92   /* GENERAL FIELDS */   /* GENERAL FIELDS */
93   char cmd;               /* The command char: abcdDgGhHilnNpPqrstwxy:={} */   char cmd;               /* The command char: abcdDgGhHilnNpPqrstwxy:={} */
  struct sed_cmd_s *next; /* Next command (linked list, NULL terminated) */  
94  } sed_cmd_t;  } sed_cmd_t;
95    
96  static const char *const semicolon_whitespace = "; \n\r\t\v";  static const char semicolon_whitespace[] ALIGN1 = "; \n\r\t\v";
97    
98  struct sed_globals {  struct globals {
99   /* options */   /* options */
100   int be_quiet, regex_type;   int be_quiet, regex_type;
101   FILE *nonstdout;   FILE *nonstdout;
# Line 117  struct sed_globals { Line 121  struct sed_globals {
121   int idx; /* Space used */   int idx; /* Space used */
122   int len; /* Space allocated */   int len; /* Space allocated */
123   } pipeline;   } pipeline;
124  } bbg;  } FIX_ALIASING;
125    #define G (*(struct globals*)&bb_common_bufsiz1)
126    struct BUG_G_too_big {
127            char BUG_G_too_big[sizeof(G) <= COMMON_BUFSIZE ? 1 : -1];
128    };
129    #define INIT_G() do { \
130     G.sed_cmd_tail = &G.sed_cmd_head; \
131    } while (0)
132    
133    
134  #if ENABLE_FEATURE_CLEAN_UP  #if ENABLE_FEATURE_CLEAN_UP
135  static void sed_free_and_close_stuff(void)  static void sed_free_and_close_stuff(void)
136  {  {
137   sed_cmd_t *sed_cmd = bbg.sed_cmd_head.next;   sed_cmd_t *sed_cmd = G.sed_cmd_head.next;
138    
139   llist_free(bbg.append_head, free);   llist_free(G.append_head, free);
140    
141   while (sed_cmd) {   while (sed_cmd) {
142   sed_cmd_t *sed_cmd_next = sed_cmd->next;   sed_cmd_t *sed_cmd_next = sed_cmd->next;
143    
144   if (sed_cmd->file)   if (sed_cmd->sw_file)
145   xprint_and_close_file(sed_cmd->file);   xprint_and_close_file(sed_cmd->sw_file);
146    
147   if (sed_cmd->beg_match) {   if (sed_cmd->beg_match) {
148   regfree(sed_cmd->beg_match);   regfree(sed_cmd->beg_match);
# Line 150  static void sed_free_and_close_stuff(voi Line 161  static void sed_free_and_close_stuff(voi
161   sed_cmd = sed_cmd_next;   sed_cmd = sed_cmd_next;
162   }   }
163    
164   if (bbg.hold_space) free(bbg.hold_space);   free(G.hold_space);
165    
166   while (bbg.current_input_file < bbg.input_file_count)   while (G.current_input_file < G.input_file_count)
167   fclose(bbg.input_file_list[bbg.current_input_file++]);   fclose(G.input_file_list[G.current_input_file++]);
168  }  }
169  #else  #else
170  void sed_free_and_close_stuff(void);  void sed_free_and_close_stuff(void);
# Line 163  void sed_free_and_close_stuff(void); Line 174  void sed_free_and_close_stuff(void);
174    
175  static void cleanup_outname(void)  static void cleanup_outname(void)
176  {  {
177   if (bbg.outname) unlink(bbg.outname);   if (G.outname) unlink(G.outname);
178  }  }
179    
180  /* strdup, replacing "\n" with '\n', and "\delimiter" with 'delimiter' */  /* strcpy, replacing "\from" with 'to'. If to is NUL, replacing "\any" with 'any' */
181    
182  static void parse_escapes(char *dest, char *string, int len, char from, char to)  static void parse_escapes(char *dest, const char *string, int len, char from, char to)
183  {  {
184   int i = 0;   int i = 0;
185    
# Line 181  static void parse_escapes(char *dest, ch Line 192  static void parse_escapes(char *dest, ch
192   }   }
193   *dest++ = string[i++];   *dest++ = string[i++];
194   }   }
195     /* TODO: is it safe wrt a string with trailing '\\' ? */
196   *dest++ = string[i++];   *dest++ = string[i++];
197   }   }
198   *dest = 0;   *dest = '\0';
199  }  }
200    
201  static char *copy_parsing_escapes(char *string, int len)  static char *copy_parsing_escapes(const char *string, int len)
202  {  {
203   char *dest = xmalloc(len + 1);   char *dest = xmalloc(len + 1);
204    
205   parse_escapes(dest, string, len, 'n', '\n');   parse_escapes(dest, string, len, 'n', '\n');
206     /* GNU sed also recognizes \t */
207     parse_escapes(dest, dest, strlen(dest), 't', '\t');
208   return dest;   return dest;
209  }  }
210    
# Line 198  static char *copy_parsing_escapes(char * Line 212  static char *copy_parsing_escapes(char *
212  /*  /*
213   * index_of_next_unescaped_regexp_delim - walks left to right through a string   * index_of_next_unescaped_regexp_delim - walks left to right through a string
214   * beginning at a specified index and returns the index of the next regular   * beginning at a specified index and returns the index of the next regular
215   * expression delimiter (typically a forward * slash ('/')) not preceded by   * expression delimiter (typically a forward slash ('/')) not preceded by
216   * a backslash ('\').  A negative delimiter disables square bracket checking.   * a backslash ('\').  A negative delimiter disables square bracket checking.
217   */   */
218  static int index_of_next_unescaped_regexp_delim(int delimiter, char *str)  static int index_of_next_unescaped_regexp_delim(int delimiter, const char *str)
219  {  {
220   int bracket = -1;   int bracket = -1;
221   int escaped = 0;   int escaped = 0;
# Line 235  static int index_of_next_unescaped_regex Line 249  static int index_of_next_unescaped_regex
249  /*  /*
250   *  Returns the index of the third delimiter   *  Returns the index of the third delimiter
251   */   */
252  static int parse_regex_delim(char *cmdstr, char **match, char **replace)  static int parse_regex_delim(const char *cmdstr, char **match, char **replace)
253  {  {
254   char *cmdstr_ptr = cmdstr;   const char *cmdstr_ptr = cmdstr;
255   char delimiter;   char delimiter;
256   int idx = 0;   int idx = 0;
257    
# Line 262  static int parse_regex_delim(char *cmdst Line 276  static int parse_regex_delim(char *cmdst
276  /*  /*
277   * returns the index in the string just past where the address ends.   * returns the index in the string just past where the address ends.
278   */   */
279  static int get_address(char *my_str, int *linenum, regex_t ** regex)  static int get_address(const char *my_str, int *linenum, regex_t ** regex)
280  {  {
281   char *pos = my_str;   const char *pos = my_str;
282    
283   if (isdigit(*my_str)) {   if (isdigit(*my_str)) {
284   *linenum = strtol(my_str, &pos, 10);   *linenum = strtol(my_str, (char**)&pos, 10);
285   /* endstr shouldnt ever equal NULL */   /* endstr shouldnt ever equal NULL */
286   } else if (*my_str == '$') {   } else if (*my_str == '$') {
287   *linenum = -1;   *linenum = -1;
# Line 282  static int get_address(char *my_str, int Line 296  static int get_address(char *my_str, int
296   next = index_of_next_unescaped_regexp_delim(delimiter, ++pos);   next = index_of_next_unescaped_regexp_delim(delimiter, ++pos);
297   temp = copy_parsing_escapes(pos, next);   temp = copy_parsing_escapes(pos, next);
298   *regex = xmalloc(sizeof(regex_t));   *regex = xmalloc(sizeof(regex_t));
299   xregcomp(*regex, temp, bbg.regex_type|REG_NEWLINE);   xregcomp(*regex, temp, G.regex_type|REG_NEWLINE);
300   free(temp);   free(temp);
301   /* Move position to next character after last delimiter */   /* Move position to next character after last delimiter */
302   pos += (next+1);   pos += (next+1);
# Line 291  static int get_address(char *my_str, int Line 305  static int get_address(char *my_str, int
305  }  }
306    
307  /* Grab a filename.  Whitespace at start is skipped, then goes to EOL. */  /* Grab a filename.  Whitespace at start is skipped, then goes to EOL. */
308  static int parse_file_cmd(sed_cmd_t *sed_cmd, char *filecmdstr, char **retval)  static int parse_file_cmd(/*sed_cmd_t *sed_cmd,*/ const char *filecmdstr, char **retval)
309  {  {
310   int start = 0, idx, hack = 0;   int start = 0, idx, hack = 0;
311    
312   /* Skip whitespace, then grab filename to end of line */   /* Skip whitespace, then grab filename to end of line */
313   while (isspace(filecmdstr[start])) start++;   while (isspace(filecmdstr[start]))
314     start++;
315   idx = start;   idx = start;
316   while (filecmdstr[idx] && filecmdstr[idx] != '\n') idx++;   while (filecmdstr[idx] && filecmdstr[idx] != '\n')
317     idx++;
318    
319   /* If lines glued together, put backslash back. */   /* If lines glued together, put backslash back. */
320   if (filecmdstr[idx] == '\n') hack = 1;   if (filecmdstr[idx] == '\n')
321     hack = 1;
322   if (idx == start)   if (idx == start)
323   bb_error_msg_and_die("empty filename");   bb_error_msg_and_die("empty filename");
324   *retval = xstrndup(filecmdstr+start, idx-start+hack+1);   *retval = xstrndup(filecmdstr+start, idx-start+hack+1);
325   if (hack) (*retval)[idx] = '\\';   if (hack)
326     (*retval)[idx] = '\\';
327    
328   return idx;   return idx;
329  }  }
330    
331  static int parse_subst_cmd(sed_cmd_t *sed_cmd, char *substr)  static int parse_subst_cmd(sed_cmd_t *sed_cmd, const char *substr)
332  {  {
333   int cflags = bbg.regex_type;   int cflags = G.regex_type;
334   char *match;   char *match;
335   int idx = 0;   int idx;
336    
337   /*   /*
338   * A substitution command should look something like this:   * A substitution command should look something like this:
# Line 337  static int parse_subst_cmd(sed_cmd_t *se Line 355  static int parse_subst_cmd(sed_cmd_t *se
355   if (isdigit(substr[idx])) {   if (isdigit(substr[idx])) {
356   if (match[0] != '^') {   if (match[0] != '^') {
357   /* Match 0 treated as all, multiple matches we take the last one. */   /* Match 0 treated as all, multiple matches we take the last one. */
358   char *pos = substr + idx;   const char *pos = substr + idx;
359   /* FIXME: error check? */  /* FIXME: error check? */
360   sed_cmd->which_match = (unsigned short)strtol(substr+idx, &pos, 10);   sed_cmd->which_match = (unsigned)strtol(substr+idx, (char**) &pos, 10);
361   idx = pos - substr;   idx = pos - substr;
362   }   }
363   continue;   continue;
364   }   }
365   /* Skip spaces */   /* Skip spaces */
366   if (isspace(substr[idx])) continue;   if (isspace(substr[idx]))
367     continue;
368    
369   switch (substr[idx]) {   switch (substr[idx]) {
370   /* Replace all occurrences */   /* Replace all occurrences */
371   case 'g':   case 'g':
372   if (match[0] != '^') sed_cmd->which_match = 0;   if (match[0] != '^')
373     sed_cmd->which_match = 0;
374   break;   break;
375   /* Print pattern space */   /* Print pattern space */
376   case 'p':   case 'p':
# Line 360  static int parse_subst_cmd(sed_cmd_t *se Line 380  static int parse_subst_cmd(sed_cmd_t *se
380   case 'w':   case 'w':
381   {   {
382   char *temp;   char *temp;
383   idx += parse_file_cmd(sed_cmd, substr+idx, &temp);   idx += parse_file_cmd(/*sed_cmd,*/ substr+idx, &temp);
   
384   break;   break;
385   }   }
386   /* Ignore case (gnu exension) */   /* Ignore case (gnu exension) */
# Line 370  static int parse_subst_cmd(sed_cmd_t *se Line 389  static int parse_subst_cmd(sed_cmd_t *se
389   break;   break;
390   /* Comment */   /* Comment */
391   case '#':   case '#':
392   while (substr[++idx]) /*skip all*/;   // while (substr[++idx]) continue;
393     idx += strlen(substr + idx); // same
394   /* Fall through */   /* Fall through */
395   /* End of command */   /* End of command */
396   case ';':   case ';':
# Line 380  static int parse_subst_cmd(sed_cmd_t *se Line 400  static int parse_subst_cmd(sed_cmd_t *se
400   bb_error_msg_and_die("bad option in substitution expression");   bb_error_msg_and_die("bad option in substitution expression");
401   }   }
402   }   }
403  out:   out:
404   /* compile the match string into a regex */   /* compile the match string into a regex */
405   if (*match != '\0') {   if (*match != '\0') {
406   /* If match is empty, we use last regex used at runtime */   /* If match is empty, we use last regex used at runtime */
# Line 395  out: Line 415  out:
415  /*  /*
416   *  Process the commands arguments   *  Process the commands arguments
417   */   */
418  static char *parse_cmd_args(sed_cmd_t *sed_cmd, char *cmdstr)  static const char *parse_cmd_args(sed_cmd_t *sed_cmd, const char *cmdstr)
419  {  {
420   /* handle (s)ubstitution command */   /* handle (s)ubstitution command */
421   if (sed_cmd->cmd == 's')   if (sed_cmd->cmd == 's')
# Line 403  static char *parse_cmd_args(sed_cmd_t *s Line 423  static char *parse_cmd_args(sed_cmd_t *s
423   /* handle edit cmds: (a)ppend, (i)nsert, and (c)hange */   /* handle edit cmds: (a)ppend, (i)nsert, and (c)hange */
424   else if (strchr("aic", sed_cmd->cmd)) {   else if (strchr("aic", sed_cmd->cmd)) {
425   if ((sed_cmd->end_line || sed_cmd->end_match) && sed_cmd->cmd != 'c')   if ((sed_cmd->end_line || sed_cmd->end_match) && sed_cmd->cmd != 'c')
426   bb_error_msg_and_die   bb_error_msg_and_die("only a beginning address can be specified for edit commands");
  ("only a beginning address can be specified for edit commands");  
427   for (;;) {   for (;;) {
428   if (*cmdstr == '\n' || *cmdstr == '\\') {   if (*cmdstr == '\n' || *cmdstr == '\\') {
429   cmdstr++;   cmdstr++;
430   break;   break;
431   } else if (isspace(*cmdstr))   }
432   cmdstr++;   if (!isspace(*cmdstr))
  else  
433   break;   break;
434     cmdstr++;
435   }   }
436   sed_cmd->string = xstrdup(cmdstr);   sed_cmd->string = xstrdup(cmdstr);
437   parse_escapes(sed_cmd->string, sed_cmd->string, strlen(cmdstr), 0, 0);   /* "\anychar" -> "anychar" */
438     parse_escapes(sed_cmd->string, sed_cmd->string, strlen(cmdstr), '\0', '\0');
439   cmdstr += strlen(cmdstr);   cmdstr += strlen(cmdstr);
440   /* handle file cmds: (r)ead */   /* handle file cmds: (r)ead */
441   } else if (strchr("rw", sed_cmd->cmd)) {   } else if (strchr("rw", sed_cmd->cmd)) {
442   if (sed_cmd->end_line || sed_cmd->end_match)   if (sed_cmd->end_line || sed_cmd->end_match)
443   bb_error_msg_and_die("command only uses one address");   bb_error_msg_and_die("command only uses one address");
444   cmdstr += parse_file_cmd(sed_cmd, cmdstr, &sed_cmd->string);   cmdstr += parse_file_cmd(/*sed_cmd,*/ cmdstr, &sed_cmd->string);
445   if (sed_cmd->cmd == 'w')   if (sed_cmd->cmd == 'w') {
446   sed_cmd->file = xfopen(sed_cmd->string, "w");   sed_cmd->sw_file = xfopen_for_write(sed_cmd->string);
447     sed_cmd->sw_last_char = '\n';
448     }
449   /* handle branch commands */   /* handle branch commands */
450   } else if (strchr(":btT", sed_cmd->cmd)) {   } else if (strchr(":btT", sed_cmd->cmd)) {
451   int length;   int length;
# Line 467  static char *parse_cmd_args(sed_cmd_t *s Line 489  static char *parse_cmd_args(sed_cmd_t *s
489    
490  /* Parse address+command sets, skipping comment lines. */  /* Parse address+command sets, skipping comment lines. */
491    
492  static void add_cmd(char *cmdstr)  static void add_cmd(const char *cmdstr)
493  {  {
494   sed_cmd_t *sed_cmd;   sed_cmd_t *sed_cmd;
495   int temp;   unsigned len, n;
496    
497   /* Append this line to any unfinished line from last time. */   /* Append this line to any unfinished line from last time. */
498   if (bbg.add_cmd_line) {   if (G.add_cmd_line) {
499   cmdstr = xasprintf("%s\n%s", bbg.add_cmd_line, cmdstr);   char *tp = xasprintf("%s\n%s", G.add_cmd_line, cmdstr);
500   free(bbg.add_cmd_line);   free(G.add_cmd_line);
501   bbg.add_cmd_line = cmdstr;   cmdstr = G.add_cmd_line = tp;
502   }   }
503    
504   /* If this line ends with backslash, request next line. */   /* If this line ends with unescaped backslash, request next line. */
505   temp = strlen(cmdstr);   n = len = strlen(cmdstr);
506   if (temp && cmdstr[temp-1] == '\\') {   while (n && cmdstr[n-1] == '\\')
507   if (!bbg.add_cmd_line)   n--;
508   bbg.add_cmd_line = xstrdup(cmdstr);   if ((len - n) & 1) { /* if odd number of trailing backslashes */
509   bbg.add_cmd_line[temp-1] = 0;   if (!G.add_cmd_line)
510     G.add_cmd_line = xstrdup(cmdstr);
511     G.add_cmd_line[len-1] = '\0';
512   return;   return;
513   }   }
514    
# Line 500  static void add_cmd(char *cmdstr) Line 524  static void add_cmd(char *cmdstr)
524   if (*cmdstr == '#') {   if (*cmdstr == '#') {
525   /* "#n" is the same as using -n on the command line */   /* "#n" is the same as using -n on the command line */
526   if (cmdstr[1] == 'n')   if (cmdstr[1] == 'n')
527   bbg.be_quiet++;   G.be_quiet++;
528   cmdstr = strpbrk(cmdstr, "\n\r");   cmdstr = strpbrk(cmdstr, "\n\r");
529   if (!cmdstr) break;   if (!cmdstr) break;
530   continue;   continue;
# Line 543  static void add_cmd(char *cmdstr) Line 567  static void add_cmd(char *cmdstr)
567   /* last part (mandatory) will be a command */   /* last part (mandatory) will be a command */
568   if (!*cmdstr)   if (!*cmdstr)
569   bb_error_msg_and_die("missing command");   bb_error_msg_and_die("missing command");
570   sed_cmd->cmd = *(cmdstr++);   sed_cmd->cmd = *cmdstr++;
571   cmdstr = parse_cmd_args(sed_cmd, cmdstr);   cmdstr = parse_cmd_args(sed_cmd, cmdstr);
572    
573   /* Add the command to the command array */   /* Add the command to the command array */
574   bbg.sed_cmd_tail->next = sed_cmd;   G.sed_cmd_tail->next = sed_cmd;
575   bbg.sed_cmd_tail = bbg.sed_cmd_tail->next;   G.sed_cmd_tail = G.sed_cmd_tail->next;
576   }   }
577    
578   /* If we glued multiple lines together, free the memory. */   /* If we glued multiple lines together, free the memory. */
579   free(bbg.add_cmd_line);   free(G.add_cmd_line);
580   bbg.add_cmd_line = NULL;   G.add_cmd_line = NULL;
581  }  }
582    
583  /* Append to a string, reallocating memory as necessary. */  /* Append to a string, reallocating memory as necessary. */
# Line 562  static void add_cmd(char *cmdstr) Line 586  static void add_cmd(char *cmdstr)
586    
587  static void pipe_putc(char c)  static void pipe_putc(char c)
588  {  {
589   if (bbg.pipeline.idx == bbg.pipeline.len) {   if (G.pipeline.idx == G.pipeline.len) {
590   bbg.pipeline.buf = xrealloc(bbg.pipeline.buf,   G.pipeline.buf = xrealloc(G.pipeline.buf,
591   bbg.pipeline.len + PIPE_GROW);   G.pipeline.len + PIPE_GROW);
592   bbg.pipeline.len += PIPE_GROW;   G.pipeline.len += PIPE_GROW;
593   }   }
594   bbg.pipeline.buf[bbg.pipeline.idx++] = c;   G.pipeline.buf[G.pipeline.idx++] = c;
595  }  }
596    
597  static void do_subst_w_backrefs(char *line, char *replace)  static void do_subst_w_backrefs(char *line, char *replace)
598  {  {
599   int i,j;   int i, j;
600    
601   /* go through the replacement string */   /* go through the replacement string */
602   for (i = 0; replace[i]; i++) {   for (i = 0; replace[i]; i++) {
# Line 580  static void do_subst_w_backrefs(char *li Line 604  static void do_subst_w_backrefs(char *li
604   if (replace[i] == '\\') {   if (replace[i] == '\\') {
605   unsigned backref = replace[++i] - '0';   unsigned backref = replace[++i] - '0';
606   if (backref <= 9) {   if (backref <= 9) {
607   /* print out the text held in bbg.regmatch[backref] */   /* print out the text held in G.regmatch[backref] */
608   if (bbg.regmatch[backref].rm_so != -1) {   if (G.regmatch[backref].rm_so != -1) {
609   j = bbg.regmatch[backref].rm_so;   j = G.regmatch[backref].rm_so;
610   while (j < bbg.regmatch[backref].rm_eo)   while (j < G.regmatch[backref].rm_eo)
611   pipe_putc(line[j++]);   pipe_putc(line[j++]);
612   }   }
613   continue;   continue;
# Line 597  static void do_subst_w_backrefs(char *li Line 621  static void do_subst_w_backrefs(char *li
621   }   }
622   /* if we find an unescaped '&' print out the whole matched text. */   /* if we find an unescaped '&' print out the whole matched text. */
623   if (replace[i] == '&') {   if (replace[i] == '&') {
624   j = bbg.regmatch[0].rm_so;   j = G.regmatch[0].rm_so;
625   while (j < bbg.regmatch[0].rm_eo)   while (j < G.regmatch[0].rm_eo)
626   pipe_putc(line[j++]);   pipe_putc(line[j++]);
627   continue;   continue;
628   }   }
# Line 607  static void do_subst_w_backrefs(char *li Line 631  static void do_subst_w_backrefs(char *li
631   }   }
632  }  }
633    
634  static int do_subst_command(sed_cmd_t *sed_cmd, char **line)  static int do_subst_command(sed_cmd_t *sed_cmd, char **line_p)
635  {  {
636   char *oldline = *line;   char *line = *line_p;
637   int altered = 0;   int altered = 0;
638   int match_count = 0;   unsigned match_count = 0;
639   regex_t *current_regex;   regex_t *current_regex;
640    
641     current_regex = sed_cmd->sub_match;
642   /* Handle empty regex. */   /* Handle empty regex. */
643   if (sed_cmd->sub_match == NULL) {   if (!current_regex) {
644   current_regex = bbg.previous_regex_ptr;   current_regex = G.previous_regex_ptr;
645   if (!current_regex)   if (!current_regex)
646   bb_error_msg_and_die("no previous regexp");   bb_error_msg_and_die("no previous regexp");
647   } else   }
648   bbg.previous_regex_ptr = current_regex = sed_cmd->sub_match;   G.previous_regex_ptr = current_regex;
649    
650   /* Find the first match */   /* Find the first match */
651   if (REG_NOMATCH == regexec(current_regex, oldline, 10, bbg.regmatch, 0))   if (REG_NOMATCH == regexec(current_regex, line, 10, G.regmatch, 0))
652   return 0;   return 0;
653    
654   /* Initialize temporary output buffer. */   /* Initialize temporary output buffer. */
655   bbg.pipeline.buf = xmalloc(PIPE_GROW);   G.pipeline.buf = xmalloc(PIPE_GROW);
656   bbg.pipeline.len = PIPE_GROW;   G.pipeline.len = PIPE_GROW;
657   bbg.pipeline.idx = 0;   G.pipeline.idx = 0;
658    
659   /* Now loop through, substituting for matches */   /* Now loop through, substituting for matches */
660   do {   do {
# Line 639  static int do_subst_command(sed_cmd_t *s Line 664  static int do_subst_command(sed_cmd_t *s
664     echo " a.b" | busybox sed 's [^ .]* x g'     echo " a.b" | busybox sed 's [^ .]* x g'
665     The match_count check is so not to break     The match_count check is so not to break
666     echo "hi" | busybox sed 's/^/!/g' */     echo "hi" | busybox sed 's/^/!/g' */
667   if (!bbg.regmatch[0].rm_so && !bbg.regmatch[0].rm_eo && match_count) {   if (!G.regmatch[0].rm_so && !G.regmatch[0].rm_eo && match_count) {
668   pipe_putc(*oldline++);   pipe_putc(*line++);
669   continue;   continue;
670   }   }
671    
# Line 648  static int do_subst_command(sed_cmd_t *s Line 673  static int do_subst_command(sed_cmd_t *s
673    
674   /* If we aren't interested in this match, output old line to   /* If we aren't interested in this match, output old line to
675     end of match and continue */     end of match and continue */
676   if (sed_cmd->which_match && sed_cmd->which_match != match_count) {   if (sed_cmd->which_match
677   for (i = 0; i < bbg.regmatch[0].rm_eo; i++)   && (sed_cmd->which_match != match_count)
678   pipe_putc(*oldline++);   ) {
679     for (i = 0; i < G.regmatch[0].rm_eo; i++)
680     pipe_putc(*line++);
681   continue;   continue;
682   }   }
683    
684   /* print everything before the match */   /* print everything before the match */
685   for (i = 0; i < bbg.regmatch[0].rm_so; i++)   for (i = 0; i < G.regmatch[0].rm_so; i++)
686   pipe_putc(oldline[i]);   pipe_putc(line[i]);
687    
688   /* then print the substitution string */   /* then print the substitution string */
689   do_subst_w_backrefs(oldline, sed_cmd->string);   do_subst_w_backrefs(line, sed_cmd->string);
690    
691   /* advance past the match */   /* advance past the match */
692   oldline += bbg.regmatch[0].rm_eo;   line += G.regmatch[0].rm_eo;
693   /* flag that something has changed */   /* flag that something has changed */
694   altered++;   altered++;
695    
696   /* if we're not doing this globally, get out now */   /* if we're not doing this globally, get out now */
697   if (sed_cmd->which_match) break;   if (sed_cmd->which_match)
698   } while (*oldline && (regexec(current_regex, oldline, 10, bbg.regmatch, 0) != REG_NOMATCH));   break;
699    
700   /* Copy rest of string into output pipeline */  //maybe (G.regmatch[0].rm_eo ? REG_NOTBOL : 0) instead of unconditional REG_NOTBOL?
701     } while (*line && regexec(current_regex, line, 10, G.regmatch, REG_NOTBOL) != REG_NOMATCH);
702    
703   while (*oldline)   /* Copy rest of string into output pipeline */
704   pipe_putc(*oldline++);   while (1) {
705   pipe_putc(0);   char c = *line++;
706     pipe_putc(c);
707     if (c == '\0')
708     break;
709     }
710    
711   free(*line);   free(*line_p);
712   *line = bbg.pipeline.buf;   *line_p = G.pipeline.buf;
713   return altered;   return altered;
714  }  }
715    
# Line 686  static sed_cmd_t *branch_to(char *label) Line 718  static sed_cmd_t *branch_to(char *label)
718  {  {
719   sed_cmd_t *sed_cmd;   sed_cmd_t *sed_cmd;
720    
721   for (sed_cmd = bbg.sed_cmd_head.next; sed_cmd; sed_cmd = sed_cmd->next) {   for (sed_cmd = G.sed_cmd_head.next; sed_cmd; sed_cmd = sed_cmd->next) {
722   if (sed_cmd->cmd == ':' && sed_cmd->string && !strcmp(sed_cmd->string, label)) {   if (sed_cmd->cmd == ':' && sed_cmd->string && !strcmp(sed_cmd->string, label)) {
723   return sed_cmd;   return sed_cmd;
724   }   }
# Line 696  static sed_cmd_t *branch_to(char *label) Line 728  static sed_cmd_t *branch_to(char *label)
728    
729  static void append(char *s)  static void append(char *s)
730  {  {
731   llist_add_to_end(&bbg.append_head, xstrdup(s));   llist_add_to_end(&G.append_head, xstrdup(s));
732  }  }
733    
734  static void flush_append(void)  static void flush_append(void)
# Line 704  static void flush_append(void) Line 736  static void flush_append(void)
736   char *data;   char *data;
737    
738   /* Output appended lines. */   /* Output appended lines. */
739   while ((data = (char *)llist_pop(&bbg.append_head))) {   while ((data = (char *)llist_pop(&G.append_head))) {
740   fprintf(bbg.nonstdout, "%s\n", data);   fprintf(G.nonstdout, "%s\n", data);
741   free(data);   free(data);
742   }   }
743  }  }
744    
745  static void add_input_file(FILE *file)  static void add_input_file(FILE *file)
746  {  {
747   bbg.input_file_list = xrealloc(bbg.input_file_list,   G.input_file_list = xrealloc_vector(G.input_file_list, 2, G.input_file_count);
748   (bbg.input_file_count + 1) * sizeof(FILE *));   G.input_file_list[G.input_file_count++] = file;
  bbg.input_file_list[bbg.input_file_count++] = file;  
749  }  }
750    
751  /* Get next line of input from bbg.input_file_list, flushing append buffer and  /* Get next line of input from G.input_file_list, flushing append buffer and
752   * noting if we ran out of files without a newline on the last line we read.   * noting if we ran out of files without a newline on the last line we read.
753   */   */
754  static char *get_next_line(int *last_char)  enum {
755     NO_EOL_CHAR = 1,
756     LAST_IS_NUL = 2,
757    };
758    static char *get_next_line(char *gets_char)
759  {  {
760   char *temp = NULL;   char *temp = NULL;
761   int len, lc;   int len;
762     char gc;
763    
  lc = 0;  
764   flush_append();   flush_append();
765   while (bbg.current_input_file < bbg.input_file_count) {  
766     /* will be returned if last line in the file
767     * doesn't end with either '\n' or '\0' */
768     gc = NO_EOL_CHAR;
769     while (G.current_input_file < G.input_file_count) {
770     FILE *fp = G.input_file_list[G.current_input_file];
771   /* Read line up to a newline or NUL byte, inclusive,   /* Read line up to a newline or NUL byte, inclusive,
772   * return malloc'ed char[]. length of the chunk read   * return malloc'ed char[]. length of the chunk read
773   * is stored in len. NULL if EOF/error */   * is stored in len. NULL if EOF/error */
774   temp = bb_get_chunk_from_file(   temp = bb_get_chunk_from_file(fp, &len);
  bbg.input_file_list[bbg.current_input_file], &len);  
775   if (temp) {   if (temp) {
776   /* len > 0 here, it's ok to do temp[len-1] */   /* len > 0 here, it's ok to do temp[len-1] */
777   char c = temp[len-1];   char c = temp[len-1];
778   if (c == '\n' || c == '\0') {   if (c == '\n' || c == '\0') {
779   temp[len-1] = '\0';   temp[len-1] = '\0';
780   lc |= (unsigned char)c;   gc = c;
781   break;   if (c == '\0') {
782     int ch = fgetc(fp);
783     if (ch != EOF)
784     ungetc(ch, fp);
785     else
786     gc = LAST_IS_NUL;
787     }
788   }   }
789   /* will be returned if last line in the file   /* else we put NO_EOL_CHAR into *gets_char */
  * doesn't end with either '\n' or '\0' */  
  lc |= 0x100;  
790   break;   break;
791    
792     /* NB: I had the idea of peeking next file(s) and returning
793     * NO_EOL_CHAR only if it is the *last* non-empty
794     * input file. But there is a case where this won't work:
795     * file1: "a woo\nb woo"
796     * file2: "c no\nd no"
797     * sed -ne 's/woo/bang/p' input1 input2 => "a bang\nb bang"
798     * (note: *no* newline after "b bang"!) */
799   }   }
800   /* Close this file and advance to next one */   /* Close this file and advance to next one */
801   fclose(bbg.input_file_list[bbg.current_input_file++]);   fclose(fp);
802   /* "this is the first line from new input file" */   G.current_input_file++;
  lc |= 0x200;  
803   }   }
804   *last_char = lc;   *gets_char = gc;
805   return temp;   return temp;
806  }  }
807    
808  /* Output line of text. */  /* Output line of text. */
809  /* Note:  /* Note:
810   * The tricks with 0x200 and last_puts_char are there to emulate gnu sed.   * The tricks with NO_EOL_CHAR and last_puts_char are there to emulate gnu sed.
811   * Without them, we had this:   * Without them, we had this:
812   * echo -n thingy >z1   * echo -n thingy >z1
813   * echo -n again >z2   * echo -n again >z2
# Line 769  static char *get_next_line(int *last_cha Line 819  static char *get_next_line(int *last_cha
819   * bbox:   * bbox:
820   * 00000000  74 68 7a 6e 67 79 61 67  61 7a 6e                 |thzngyagazn|   * 00000000  74 68 7a 6e 67 79 61 67  61 7a 6e                 |thzngyagazn|
821   */   */
822    static void puts_maybe_newline(char *s, FILE *file, char *last_puts_char, char last_gets_char)
 static int puts_maybe_newline(char *s, FILE *file, int prev_last_char, int last_char)  
823  {  {
824   static char last_puts_char;   char lpc = *last_puts_char;
825    
826   /* Is this a first line from new file   /* Need to insert a '\n' between two files because first file's
827   * and old file didn't end with '\n'? */   * last line wasn't terminated? */
828   if ((last_char & 0x200) && last_puts_char != '\n') {   if (lpc != '\n' && lpc != '\0') {
829   fputc('\n', file);   fputc('\n', file);
830   last_puts_char = '\n';   lpc = '\n';
831   }   }
832   fputs(s, file);   fputs(s, file);
833   /* why 'x'? - just something which is not '\n' */  
834     /* 'x' - just something which is not '\n', '\0' or NO_EOL_CHAR */
835   if (s[0])   if (s[0])
836   last_puts_char = 'x';   lpc = 'x';
837   if (!(last_char & 0x100)) { /* had trailing '\n' or '\0'? */  
838   last_char &= 0xff;   /* had trailing '\0' and it was last char of file? */
839   fputc(last_char, file);   if (last_gets_char == LAST_IS_NUL) {
840   last_puts_char = last_char;   fputc('\0', file);
841     lpc = 'x'; /* */
842     } else
843     /* had trailing '\n' or '\0'? */
844     if (last_gets_char != NO_EOL_CHAR) {
845     fputc(last_gets_char, file);
846     lpc = last_gets_char;
847   }   }
848    
849   if (ferror(file)) {   if (ferror(file)) {
850   xfunc_error_retval = 4;  /* It's what gnu sed exits with... */   xfunc_error_retval = 4;  /* It's what gnu sed exits with... */
851   bb_error_msg_and_die(bb_msg_write_error);   bb_error_msg_and_die(bb_msg_write_error);
852   }   }
853     *last_puts_char = lpc;
  return last_char;  
854  }  }
855    
856  #define sed_puts(s, n) \  #define sed_puts(s, n) (puts_maybe_newline(s, G.nonstdout, &last_puts_char, n))
857   (prev_last_char = puts_maybe_newline(s, bbg.nonstdout, prev_last_char, n))  
858    static int beg_match(sed_cmd_t *sed_cmd, const char *pattern_space)
859    {
860     int retval = sed_cmd->beg_match && !regexec(sed_cmd->beg_match, pattern_space, 0, NULL, 0);
861     if (retval)
862     G.previous_regex_ptr = sed_cmd->beg_match;
863     return retval;
864    }
865    
866  /* Process all the lines in all the files */  /* Process all the lines in all the files */
867    
868  static void process_files(void)  static void process_files(void)
869  {  {
870   char *pattern_space, *next_line;   char *pattern_space, *next_line;
871   int linenum = 0, prev_last_char = 0;   int linenum = 0;
872   int last_char, next_last_char = 0;   char last_puts_char = '\n';
873     char last_gets_char, next_gets_char;
874   sed_cmd_t *sed_cmd;   sed_cmd_t *sed_cmd;
875   int substituted;   int substituted;
876    
877   /* Prime the pump */   /* Prime the pump */
878   next_line = get_next_line(&next_last_char);   next_line = get_next_line(&next_gets_char);
879    
880   /* go through every line in each file */   /* Go through every line in each file */
881  again:   again:
882   substituted = 0;   substituted = 0;
883    
884   /* Advance to next line.  Stop if out of lines. */   /* Advance to next line.  Stop if out of lines. */
885   pattern_space = next_line;   pattern_space = next_line;
886   if (!pattern_space) return;   if (!pattern_space)
887   last_char = next_last_char;   return;
888     last_gets_char = next_gets_char;
889    
890   /* Read one line in advance so we can act on the last line,   /* Read one line in advance so we can act on the last line,
891   * the '$' address */   * the '$' address */
892   next_line = get_next_line(&next_last_char);   next_line = get_next_line(&next_gets_char);
893   linenum++;   linenum++;
894  restart:  
895   /* for every line, go through all the commands */   /* For every line, go through all the commands */
896   for (sed_cmd = bbg.sed_cmd_head.next; sed_cmd; sed_cmd = sed_cmd->next) {   restart:
897     for (sed_cmd = G.sed_cmd_head.next; sed_cmd; sed_cmd = sed_cmd->next) {
898   int old_matched, matched;   int old_matched, matched;
899    
900   old_matched = sed_cmd->in_match;   old_matched = sed_cmd->in_match;
901    
902   /* Determine if this command matches this line: */   /* Determine if this command matches this line: */
903    
904     //bb_error_msg("match1:%d", sed_cmd->in_match);
905     //bb_error_msg("match2:%d", (!sed_cmd->beg_line && !sed_cmd->end_line
906     // && !sed_cmd->beg_match && !sed_cmd->end_match));
907     //bb_error_msg("match3:%d", (sed_cmd->beg_line > 0
908     // && (sed_cmd->end_line || sed_cmd->end_match
909     //    ? (sed_cmd->beg_line <= linenum)
910     //    : (sed_cmd->beg_line == linenum)
911     //    )
912     // )
913     //bb_error_msg("match4:%d", (beg_match(sed_cmd, pattern_space)));
914     //bb_error_msg("match5:%d", (sed_cmd->beg_line == -1 && next_line == NULL));
915    
916   /* Are we continuing a previous multi-line match? */   /* Are we continuing a previous multi-line match? */
917   sed_cmd->in_match = sed_cmd->in_match   sed_cmd->in_match = sed_cmd->in_match
918   /* Or is no range necessary? */   /* Or is no range necessary? */
919   || (!sed_cmd->beg_line && !sed_cmd->end_line   || (!sed_cmd->beg_line && !sed_cmd->end_line
920   && !sed_cmd->beg_match && !sed_cmd->end_match)   && !sed_cmd->beg_match && !sed_cmd->end_match)
921   /* Or did we match the start of a numerical range? */   /* Or did we match the start of a numerical range? */
922   || (sed_cmd->beg_line > 0 && (sed_cmd->beg_line == linenum))   || (sed_cmd->beg_line > 0
923        && (sed_cmd->end_line || sed_cmd->end_match
924      /* note: even if end is numeric and is < linenum too,
925       * GNU sed matches! We match too */
926     ? (sed_cmd->beg_line <= linenum)    /* N,end */
927     : (sed_cmd->beg_line == linenum)    /* N */
928     )
929        )
930   /* Or does this line match our begin address regex? */   /* Or does this line match our begin address regex? */
931   || (sed_cmd->beg_match &&   || (beg_match(sed_cmd, pattern_space))
     !regexec(sed_cmd->beg_match, pattern_space, 0, NULL, 0))  
932   /* Or did we match last line of input? */   /* Or did we match last line of input? */
933   || (sed_cmd->beg_line == -1 && next_line == NULL);   || (sed_cmd->beg_line == -1 && next_line == NULL);
934    
935   /* Snapshot the value */   /* Snapshot the value */
   
936   matched = sed_cmd->in_match;   matched = sed_cmd->in_match;
937    
938     //bb_error_msg("cmd:'%c' matched:%d beg_line:%d end_line:%d linenum:%d",
939     //sed_cmd->cmd, matched, sed_cmd->beg_line, sed_cmd->end_line, linenum);
940    
941   /* Is this line the end of the current match? */   /* Is this line the end of the current match? */
942    
943   if (matched) {   if (matched) {
944     /* once matched, "n,xxx" range is dead, disabling it */
945     if (sed_cmd->beg_line > 0
946     && !(option_mask32 & OPT_in_place) /* but not for -i */
947     ) {
948     sed_cmd->beg_line = -2;
949     }
950   sed_cmd->in_match = !(   sed_cmd->in_match = !(
951   /* has the ending line come, or is this a single address command? */   /* has the ending line come, or is this a single address command? */
952   (sed_cmd->end_line ?   (sed_cmd->end_line ?
# Line 867  restart: Line 958  restart:
958   /* or does this line matches our last address regex */   /* or does this line matches our last address regex */
959   || (sed_cmd->end_match && old_matched   || (sed_cmd->end_match && old_matched
960       && (regexec(sed_cmd->end_match,       && (regexec(sed_cmd->end_match,
961               pattern_space, 0, NULL, 0) == 0))                   pattern_space, 0, NULL, 0) == 0))
962   );   );
963   }   }
964    
965   /* Skip blocks of commands we didn't match. */   /* Skip blocks of commands we didn't match */
966   if (sed_cmd->cmd == '{') {   if (sed_cmd->cmd == '{') {
967   if (sed_cmd->invert ? matched : !matched)   if (sed_cmd->invert ? matched : !matched) {
968   while (sed_cmd && sed_cmd->cmd != '}')   unsigned nest_cnt = 0;
969     while (1) {
970     if (sed_cmd->cmd == '{')
971     nest_cnt++;
972     if (sed_cmd->cmd == '}') {
973     nest_cnt--;
974     if (nest_cnt == 0)
975     break;
976     }
977   sed_cmd = sed_cmd->next;   sed_cmd = sed_cmd->next;
978   if (!sed_cmd) bb_error_msg_and_die("unterminated {");   if (!sed_cmd)
979     bb_error_msg_and_die("unterminated {");
980     }
981     }
982   continue;   continue;
983   }   }
984    
985   /* Okay, so did this line match? */   /* Okay, so did this line match? */
986   if (sed_cmd->invert ? !matched : matched) {   if (sed_cmd->invert ? matched : !matched)
987   /* Update last used regex in case a blank substitute BRE is found */   continue; /* no */
  if (sed_cmd->beg_match) {  
  bbg.previous_regex_ptr = sed_cmd->beg_match;  
  }  
988    
989   /* actual sedding */   /* Update last used regex in case a blank substitute BRE is found */
990   switch (sed_cmd->cmd) {   if (sed_cmd->beg_match) {
991     G.previous_regex_ptr = sed_cmd->beg_match;
992     }
993    
994   /* Print line number */   /* actual sedding */
995   case '=':   //bb_error_msg("pattern_space:'%s' next_line:'%s' cmd:%c",
996   fprintf(bbg.nonstdout, "%d\n", linenum);   //pattern_space, next_line, sed_cmd->cmd);
997   break;   switch (sed_cmd->cmd) {
998    
999   /* Write the current pattern space up to the first newline */   /* Print line number */
1000   case 'P':   case '=':
1001   {   fprintf(G.nonstdout, "%d\n", linenum);
1002   char *tmp = strchr(pattern_space, '\n');   break;
   
  if (tmp) {  
  *tmp = '\0';  
  sed_puts(pattern_space, 1);  
  *tmp = '\n';  
  break;  
  }  
  /* Fall Through */  
  }  
1003    
1004   /* Write the current pattern space to output */   /* Write the current pattern space up to the first newline */
1005   case 'p':   case 'P':
1006   sed_puts(pattern_space, last_char);   {
1007     char *tmp = strchr(pattern_space, '\n');
1008     if (tmp) {
1009     *tmp = '\0';
1010     /* TODO: explain why '\n' below */
1011     sed_puts(pattern_space, '\n');
1012     *tmp = '\n';
1013   break;   break;
  /* Delete up through first newline */  
  case 'D':  
  {  
  char *tmp = strchr(pattern_space, '\n');  
   
  if (tmp) {  
  tmp = xstrdup(tmp+1);  
  free(pattern_space);  
  pattern_space = tmp;  
  goto restart;  
  }  
1014   }   }
1015   /* discard this line. */   /* Fall Through */
1016   case 'd':   }
  goto discard_line;  
   
  /* Substitute with regex */  
  case 's':  
  if (!do_subst_command(sed_cmd, &pattern_space))  
  break;  
  substituted |= 1;  
   
  /* handle p option */  
  if (sed_cmd->sub_p)  
  sed_puts(pattern_space, last_char);  
  /* handle w option */  
  if (sed_cmd->file)  
  sed_cmd->last_char = puts_maybe_newline(  
  pattern_space, sed_cmd->file,  
  sed_cmd->last_char, last_char);  
  break;  
1017    
1018   /* Append line to linked list to be printed later */   /* Write the current pattern space to output */
1019   case 'a':   case 'p':
1020   append(sed_cmd->string);   /* NB: we print this _before_ the last line
1021     * (of current file) is printed. Even if
1022     * that line is nonterminated, we print
1023     * '\n' here (gnu sed does the same) */
1024     sed_puts(pattern_space, '\n');
1025     break;
1026     /* Delete up through first newline */
1027     case 'D':
1028     {
1029     char *tmp = strchr(pattern_space, '\n');
1030     if (tmp) {
1031     overlapping_strcpy(pattern_space, tmp + 1);
1032     goto restart;
1033     }
1034     }
1035     /* discard this line. */
1036     case 'd':
1037     goto discard_line;
1038    
1039     /* Substitute with regex */
1040     case 's':
1041     if (!do_subst_command(sed_cmd, &pattern_space))
1042   break;   break;
1043     substituted |= 1;
1044    
1045   /* Insert text before this line */   /* handle p option */
1046   case 'i':   if (sed_cmd->sub_p)
1047   sed_puts(sed_cmd->string, 1);   sed_puts(pattern_space, last_gets_char);
1048   break;   /* handle w option */
1049     if (sed_cmd->sw_file)
1050     puts_maybe_newline(
1051     pattern_space, sed_cmd->sw_file,
1052     &sed_cmd->sw_last_char, last_gets_char);
1053     break;
1054    
1055   /* Cut and paste text (replace) */   /* Append line to linked list to be printed later */
1056   case 'c':   case 'a':
1057   /* Only triggers on last line of a matching range. */   append(sed_cmd->string);
1058   if (!sed_cmd->in_match)   break;
  sed_puts(sed_cmd->string, 0);  
  goto discard_line;  
   
  /* Read file, append contents to output */  
  case 'r':  
  {  
  FILE *rfile;  
   
  rfile = fopen(sed_cmd->string, "r");  
  if (rfile) {  
  char *line;  
   
  while ((line = xmalloc_getline(rfile))  
  != NULL)  
  append(line);  
  xprint_and_close_file(rfile);  
  }  
1059    
1060   break;   /* Insert text before this line */
1061     case 'i':
1062     sed_puts(sed_cmd->string, '\n');
1063     break;
1064    
1065     /* Cut and paste text (replace) */
1066     case 'c':
1067     /* Only triggers on last line of a matching range. */
1068     if (!sed_cmd->in_match)
1069     sed_puts(sed_cmd->string, '\n');
1070     goto discard_line;
1071    
1072     /* Read file, append contents to output */
1073     case 'r':
1074     {
1075     FILE *rfile;
1076     rfile = fopen_for_read(sed_cmd->string);
1077     if (rfile) {
1078     char *line;
1079    
1080     while ((line = xmalloc_fgetline(rfile))
1081     != NULL)
1082     append(line);
1083     xprint_and_close_file(rfile);
1084   }   }
1085    
1086   /* Write pattern space to file. */   break;
1087   case 'w':   }
  sed_cmd->last_char = puts_maybe_newline(  
  pattern_space, sed_cmd->file,  
  sed_cmd->last_char, last_char);  
  break;  
1088    
1089   /* Read next line from input */   /* Write pattern space to file. */
1090   case 'n':   case 'w':
1091   if (!bbg.be_quiet)   puts_maybe_newline(
1092   sed_puts(pattern_space, last_char);   pattern_space, sed_cmd->sw_file,
1093   if (next_line) {   &sed_cmd->sw_last_char, last_gets_char);
1094   free(pattern_space);   break;
  pattern_space = next_line;  
  last_char = next_last_char;  
  next_line = get_next_line(&next_last_char);  
  linenum++;  
  break;  
  }  
  /* fall through */  
1095    
1096   /* Quit.  End of script, end of input. */   /* Read next line from input */
1097   case 'q':   case 'n':
1098   /* Exit the outer while loop */   if (!G.be_quiet)
1099   free(next_line);   sed_puts(pattern_space, last_gets_char);
1100   next_line = NULL;   if (next_line) {
1101   goto discard_commands;   free(pattern_space);
1102     pattern_space = next_line;
1103   /* Append the next line to the current line */   last_gets_char = next_gets_char;
1104   case 'N':   next_line = get_next_line(&next_gets_char);
1105   {   substituted = 0;
  int len;  
  /* If no next line, jump to end of script and exit. */  
  if (next_line == NULL) {  
  /* Jump to end of script and exit */  
  free(next_line);  
  next_line = NULL;  
  goto discard_line;  
  /* append next_line, read new next_line. */  
  }  
  len = strlen(pattern_space);  
  pattern_space = realloc(pattern_space, len + strlen(next_line) + 2);  
  pattern_space[len] = '\n';  
  strcpy(pattern_space + len+1, next_line);  
  last_char = next_last_char;  
  next_line = get_next_line(&next_last_char);  
1106   linenum++;   linenum++;
1107   break;   break;
1108   }   }
1109     /* fall through */
1110    
1111   /* Test/branch if substitution occurred */   /* Quit.  End of script, end of input. */
1112   case 't':   case 'q':
1113   if (!substituted) break;   /* Exit the outer while loop */
1114   substituted = 0;   free(next_line);
1115   /* Fall through */   next_line = NULL;
1116   /* Test/branch if substitution didn't occur */   goto discard_commands;
  case 'T':  
  if (substituted) break;  
  /* Fall through */  
  /* Branch to label */  
  case 'b':  
  if (!sed_cmd->string) goto discard_commands;  
  else sed_cmd = branch_to(sed_cmd->string);  
  break;  
  /* Transliterate characters */  
  case 'y':  
  {  
  int i, j;  
   
  for (i = 0; pattern_space[i]; i++) {  
  for (j = 0; sed_cmd->string[j]; j += 2) {  
  if (pattern_space[i] == sed_cmd->string[j]) {  
  pattern_space[i] = sed_cmd->string[j + 1];  
  break;  
  }  
  }  
  }  
1117    
1118   break;   /* Append the next line to the current line */
1119     case 'N':
1120     {
1121     int len;
1122     /* If no next line, jump to end of script and exit. */
1123     /* http://www.gnu.org/software/sed/manual/sed.html:
1124     * "Most versions of sed exit without printing anything
1125     * when the N command is issued on the last line of
1126     * a file. GNU sed prints pattern space before exiting
1127     * unless of course the -n command switch has been
1128     * specified. This choice is by design."
1129     */
1130     if (next_line == NULL) {
1131     //goto discard_line;
1132     goto discard_commands; /* GNU behavior */
1133   }   }
1134   case 'g': /* Replace pattern space with hold space */   /* Append next_line, read new next_line. */
1135   free(pattern_space);   len = strlen(pattern_space);
1136   pattern_space = xstrdup(bbg.hold_space ? bbg.hold_space : "");   pattern_space = xrealloc(pattern_space, len + strlen(next_line) + 2);
1137   break;   pattern_space[len] = '\n';
1138   case 'G': /* Append newline and hold space to pattern space */   strcpy(pattern_space + len+1, next_line);
1139   {   last_gets_char = next_gets_char;
1140   int pattern_space_size = 2;   next_line = get_next_line(&next_gets_char);
1141   int hold_space_size = 0;   linenum++;
1142     break;
1143   if (pattern_space)   }
  pattern_space_size += strlen(pattern_space);  
  if (bbg.hold_space)  
  hold_space_size = strlen(bbg.hold_space);  
  pattern_space = xrealloc(pattern_space,  
  pattern_space_size + hold_space_size);  
  if (pattern_space_size == 2)  
  pattern_space[0] = 0;  
  strcat(pattern_space, "\n");  
  if (bbg.hold_space)  
  strcat(pattern_space, bbg.hold_space);  
  last_char = '\n';  
1144    
1145   break;   /* Test/branch if substitution occurred */
1146     case 't':
1147     if (!substituted) break;
1148     substituted = 0;
1149     /* Fall through */
1150     /* Test/branch if substitution didn't occur */
1151     case 'T':
1152     if (substituted) break;
1153     /* Fall through */
1154     /* Branch to label */
1155     case 'b':
1156     if (!sed_cmd->string) goto discard_commands;
1157     else sed_cmd = branch_to(sed_cmd->string);
1158     break;
1159     /* Transliterate characters */
1160     case 'y':
1161     {
1162     int i, j;
1163     for (i = 0; pattern_space[i]; i++) {
1164     for (j = 0; sed_cmd->string[j]; j += 2) {
1165     if (pattern_space[i] == sed_cmd->string[j]) {
1166     pattern_space[i] = sed_cmd->string[j + 1];
1167     break;
1168     }
1169     }
1170   }   }
  case 'h': /* Replace hold space with pattern space */  
  free(bbg.hold_space);  
  bbg.hold_space = xstrdup(pattern_space);  
  break;  
  case 'H': /* Append newline and pattern space to hold space */  
  {  
  int hold_space_size = 2;  
  int pattern_space_size = 0;  
   
  if (bbg.hold_space)  
  hold_space_size += strlen(bbg.hold_space);  
  if (pattern_space)  
  pattern_space_size = strlen(pattern_space);  
  bbg.hold_space = xrealloc(bbg.hold_space,  
  hold_space_size + pattern_space_size);  
   
  if (hold_space_size == 2)  
  *bbg.hold_space = 0;  
  strcat(bbg.hold_space, "\n");  
  if (pattern_space)  
  strcat(bbg.hold_space, pattern_space);  
1171    
1172   break;   break;
  }  
  case 'x': /* Exchange hold and pattern space */  
  {  
  char *tmp = pattern_space;  
  pattern_space = bbg.hold_space ? : xzalloc(1);  
  last_char = '\n';  
  bbg.hold_space = tmp;  
  break;  
  }  
  }  
1173   }   }
1174   }   case 'g': /* Replace pattern space with hold space */
1175     free(pattern_space);
1176     pattern_space = xstrdup(G.hold_space ? G.hold_space : "");
1177     break;
1178     case 'G': /* Append newline and hold space to pattern space */
1179     {
1180     int pattern_space_size = 2;
1181     int hold_space_size = 0;
1182    
1183     if (pattern_space)
1184     pattern_space_size += strlen(pattern_space);
1185     if (G.hold_space)
1186     hold_space_size = strlen(G.hold_space);
1187     pattern_space = xrealloc(pattern_space,
1188     pattern_space_size + hold_space_size);
1189     if (pattern_space_size == 2)
1190     pattern_space[0] = 0;
1191     strcat(pattern_space, "\n");
1192     if (G.hold_space)
1193     strcat(pattern_space, G.hold_space);
1194     last_gets_char = '\n';
1195    
1196     break;
1197     }
1198     case 'h': /* Replace hold space with pattern space */
1199     free(G.hold_space);
1200     G.hold_space = xstrdup(pattern_space);
1201     break;
1202     case 'H': /* Append newline and pattern space to hold space */
1203     {
1204     int hold_space_size = 2;
1205     int pattern_space_size = 0;
1206    
1207     if (G.hold_space)
1208     hold_space_size += strlen(G.hold_space);
1209     if (pattern_space)
1210     pattern_space_size = strlen(pattern_space);
1211     G.hold_space = xrealloc(G.hold_space,
1212     hold_space_size + pattern_space_size);
1213    
1214     if (hold_space_size == 2)
1215     *G.hold_space = 0;
1216     strcat(G.hold_space, "\n");
1217     if (pattern_space)
1218     strcat(G.hold_space, pattern_space);
1219    
1220     break;
1221     }
1222     case 'x': /* Exchange hold and pattern space */
1223     {
1224     char *tmp = pattern_space;
1225     pattern_space = G.hold_space ? G.hold_space : xzalloc(1);
1226     last_gets_char = '\n';
1227     G.hold_space = tmp;
1228     break;
1229     }
1230     } /* switch */
1231     } /* for each cmd */
1232    
1233   /*   /*
1234   * exit point from sedding...   * Exit point from sedding...
1235   */   */
1236  discard_commands:   discard_commands:
1237   /* we will print the line unless we were told to be quiet ('-n')   /* we will print the line unless we were told to be quiet ('-n')
1238     or if the line was suppressed (ala 'd'elete) */     or if the line was suppressed (ala 'd'elete) */
1239   if (!bbg.be_quiet) sed_puts(pattern_space, last_char);   if (!G.be_quiet)
1240     sed_puts(pattern_space, last_gets_char);
1241    
1242   /* Delete and such jump here. */   /* Delete and such jump here. */
1243  discard_line:   discard_line:
1244   flush_append();   flush_append();
1245   free(pattern_space);   free(pattern_space);
1246    
# Line 1137  discard_line: Line 1248  discard_line:
1248  }  }
1249    
1250  /* It is possible to have a command line argument with embedded  /* It is possible to have a command line argument with embedded
1251     newlines.  This counts as multiple command lines. */   * newlines.  This counts as multiple command lines.
1252     * However, newline can be escaped: 's/e/z\<newline>z/'
1253     * We check for this.
1254     */
1255    
1256  static void add_cmd_block(char *cmdstr)  static void add_cmd_block(char *cmdstr)
1257  {  {
1258   int go = 1;   char *sv, *eol;
  char *temp = xstrdup(cmdstr), *temp2 = temp;  
   
  while (go) {  
  int len = strcspn(temp2, "\n");  
  if (!temp2[len]) go = 0;  
  else temp2[len] = 0;  
  add_cmd(temp2);  
  temp2 += len+1;  
  }  
  free(temp);  
 }  
   
 static void add_cmds_link(llist_t *opt_e)  
 {  
  if (!opt_e) return;  
  add_cmds_link(opt_e->link);  
  add_cmd_block(opt_e->data);  
  free(opt_e);  
 }  
1259    
1260  static void add_files_link(llist_t *opt_f)   cmdstr = sv = xstrdup(cmdstr);
1261  {   do {
1262   char *line;   eol = strchr(cmdstr, '\n');
1263   FILE *cmdfile;   next:
1264   if (!opt_f) return;   if (eol) {
1265   add_files_link(opt_f->link);   /* Count preceding slashes */
1266   cmdfile = xfopen(opt_f->data, "r");   int slashes = 0;
1267   while ((line = xmalloc_getline(cmdfile)) != NULL) {   char *sl = eol;
1268   add_cmd(line);  
1269   free(line);   while (sl != cmdstr && *--sl == '\\')
1270   }   slashes++;
1271   xprint_and_close_file(cmdfile);   /* Odd number of preceding slashes - newline is escaped */
1272   free(opt_f);   if (slashes & 1) {
1273     overlapping_strcpy(eol - 1, eol);
1274     eol = strchr(eol, '\n');
1275     goto next;
1276     }
1277     *eol = '\0';
1278     }
1279     add_cmd(cmdstr);
1280     cmdstr = eol + 1;
1281     } while (eol);
1282     free(sv);
1283  }  }
1284    
1285  int sed_main(int argc, char **argv)  int sed_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
1286    int sed_main(int argc UNUSED_PARAM, char **argv)
1287  {  {
  enum {  
  OPT_in_place = 1 << 0,  
  };  
1288   unsigned opt;   unsigned opt;
1289   llist_t *opt_e, *opt_f;   llist_t *opt_e, *opt_f;
1290   int status = EXIT_SUCCESS;   int status = EXIT_SUCCESS;
1291    
1292   bbg.sed_cmd_tail = &bbg.sed_cmd_head;   INIT_G();
1293    
1294   /* destroy command strings on exit */   /* destroy command strings on exit */
1295   if (ENABLE_FEATURE_CLEAN_UP) atexit(sed_free_and_close_stuff);   if (ENABLE_FEATURE_CLEAN_UP) atexit(sed_free_and_close_stuff);
1296    
1297   /* Lie to autoconf when it starts asking stupid questions. */   /* Lie to autoconf when it starts asking stupid questions. */
1298   if (argc == 2 && !strcmp(argv[1], "--version")) {   if (argv[1] && !strcmp(argv[1], "--version")) {
1299   puts("This is not GNU sed version 4.0");   puts("This is not GNU sed version 4.0");
1300   return 0;   return 0;
1301   }   }
# Line 1201  int sed_main(int argc, char **argv) Line 1304  int sed_main(int argc, char **argv)
1304   opt_e = opt_f = NULL;   opt_e = opt_f = NULL;
1305   opt_complementary = "e::f::" /* can occur multiple times */   opt_complementary = "e::f::" /* can occur multiple times */
1306                      "nn"; /* count -n */                      "nn"; /* count -n */
1307   opt = getopt32(argc, argv, "irne:f:", &opt_e, &opt_f,   /* -i must be first, to match OPT_in_place definition */
1308      &bbg.be_quiet); /* counter for -n */   opt = getopt32(argv, "irne:f:", &opt_e, &opt_f,
1309   argc -= optind;      &G.be_quiet); /* counter for -n */
1310     //argc -= optind;
1311   argv += optind;   argv += optind;
1312   if (opt & OPT_in_place) { // -i   if (opt & OPT_in_place) { // -i
1313   atexit(cleanup_outname);   atexit(cleanup_outname);
1314   }   }
1315   if (opt & 0x2) bbg.regex_type |= REG_EXTENDED; // -r   if (opt & 0x2) G.regex_type |= REG_EXTENDED; // -r
1316   //if (opt & 0x4) bbg.be_quiet++; // -n   //if (opt & 0x4) G.be_quiet++; // -n
1317   if (opt & 0x8) { // -e   while (opt_e) { // -e
1318   /* getopt32 reverses order of arguments, handle it */   add_cmd_block(llist_pop(&opt_e));
1319   add_cmds_link(opt_e);   }
1320   }   while (opt_f) { // -f
1321   if (opt & 0x10) { // -f   char *line;
1322   /* getopt32 reverses order of arguments, handle it */   FILE *cmdfile;
1323   add_files_link(opt_f);   cmdfile = xfopen_for_read(llist_pop(&opt_f));
1324     while ((line = xmalloc_fgetline(cmdfile)) != NULL) {
1325     add_cmd(line);
1326     free(line);
1327     }
1328     fclose(cmdfile);
1329   }   }
1330   /* if we didn't get a pattern from -e or -f, use argv[0] */   /* if we didn't get a pattern from -e or -f, use argv[0] */
1331   if (!(opt & 0x18)) {   if (!(opt & 0x18)) {
1332   if (!argc)   if (!*argv)
1333   bb_show_usage();   bb_show_usage();
1334   add_cmd_block(*argv++);   add_cmd_block(*argv++);
  argc--;  
1335   }   }
1336   /* Flush any unfinished commands. */   /* Flush any unfinished commands. */
1337   add_cmd("");   add_cmd("");
1338    
1339   /* By default, we write to stdout */   /* By default, we write to stdout */
1340   bbg.nonstdout = stdout;   G.nonstdout = stdout;
1341    
1342   /* argv[0..(argc-1)] should be names of file to process. If no   /* argv[0..(argc-1)] should be names of file to process. If no
1343   * files were specified or '-' was specified, take input from stdin.   * files were specified or '-' was specified, take input from stdin.
# Line 1238  int sed_main(int argc, char **argv) Line 1346  int sed_main(int argc, char **argv)
1346   if (opt & OPT_in_place)   if (opt & OPT_in_place)
1347   bb_error_msg_and_die(bb_msg_requires_arg, "-i");   bb_error_msg_and_die(bb_msg_requires_arg, "-i");
1348   add_input_file(stdin);   add_input_file(stdin);
  process_files();  
1349   } else {   } else {
1350   int i;   int i;
1351   FILE *file;   FILE *file;
1352    
1353   for (i = 0; i < argc; i++) {   for (i = 0; argv[i]; i++) {
1354   struct stat statbuf;   struct stat statbuf;
1355   int nonstdoutfd;   int nonstdoutfd;
1356    
# Line 1262  int sed_main(int argc, char **argv) Line 1369  int sed_main(int argc, char **argv)
1369   continue;   continue;
1370   }   }
1371    
1372   bbg.outname = xasprintf("%sXXXXXX", argv[i]);   G.outname = xasprintf("%sXXXXXX", argv[i]);
1373   nonstdoutfd = mkstemp(bbg.outname);   nonstdoutfd = mkstemp(G.outname);
1374   if (-1 == nonstdoutfd)   if (-1 == nonstdoutfd)
1375   bb_error_msg_and_die("no temp file");   bb_perror_msg_and_die("can't create temp file %s", G.outname);
1376   bbg.nonstdout = fdopen(nonstdoutfd, "w");   G.nonstdout = xfdopen_for_write(nonstdoutfd);
   
  /* Set permissions of output file */  
1377    
1378     /* Set permissions/owner of output file */
1379   fstat(fileno(file), &statbuf);   fstat(fileno(file), &statbuf);
1380     /* chmod'ing AFTER chown would preserve suid/sgid bits,
1381     * but GNU sed 4.2.1 does not preserve them either */
1382   fchmod(nonstdoutfd, statbuf.st_mode);   fchmod(nonstdoutfd, statbuf.st_mode);
1383     fchown(nonstdoutfd, statbuf.st_uid, statbuf.st_gid);
1384   add_input_file(file);   add_input_file(file);
1385   process_files();   process_files();
1386   fclose(bbg.nonstdout);   fclose(G.nonstdout);
1387    
1388   bbg.nonstdout = stdout;   G.nonstdout = stdout;
1389   /* unlink(argv[i]); */   /* unlink(argv[i]); */
1390   // FIXME: error check / message?   xrename(G.outname, argv[i]);
1391   rename(bbg.outname, argv[i]);   free(G.outname);
1392   free(bbg.outname);   G.outname = NULL;
1393   bbg.outname = 0;   }
1394   }   /* Here, to handle "sed 'cmds' nonexistent_file" case we did:
1395   if (bbg.input_file_count > bbg.current_input_file)   * if (G.current_input_file >= G.input_file_count)
1396   process_files();   * return status;
1397     * but it's not needed since process_files() works correctly
1398     * in this case too. */
1399   }   }
1400     process_files();
1401    
1402   return status;   return status;
1403  }  }

Legend:
Removed from v.532  
changed lines
  Added in v.1126