Magellan Linux

Contents of /trunk/mkinitrd-magellan/busybox/editors/awk.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1123 - (show annotations) (download)
Wed Aug 18 21:56:57 2010 UTC (13 years, 9 months ago) by niro
File MIME type: text/plain
File size: 68598 byte(s)
-updated to busybox-1.17.1
1 /* vi: set sw=4 ts=4: */
2 /*
3 * awk implementation for busybox
4 *
5 * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
6 *
7 * Licensed under the GPL v2 or later, see the file LICENSE in this tarball.
8 */
9
10 #include "libbb.h"
11 #include "xregex.h"
12 #include <math.h>
13
14 /* This is a NOEXEC applet. Be very careful! */
15
16
17 /* If you comment out one of these below, it will be #defined later
18 * to perform debug printfs to stderr: */
19 #define debug_printf_walker(...) do {} while (0)
20
21 #ifndef debug_printf_walker
22 # define debug_printf_walker(...) (fprintf(stderr, __VA_ARGS__))
23 #endif
24
25
26
27 #define MAXVARFMT 240
28 #define MINNVBLOCK 64
29
30 /* variable flags */
31 #define VF_NUMBER 0x0001 /* 1 = primary type is number */
32 #define VF_ARRAY 0x0002 /* 1 = it's an array */
33
34 #define VF_CACHED 0x0100 /* 1 = num/str value has cached str/num eq */
35 #define VF_USER 0x0200 /* 1 = user input (may be numeric string) */
36 #define VF_SPECIAL 0x0400 /* 1 = requires extra handling when changed */
37 #define VF_WALK 0x0800 /* 1 = variable has alloc'd x.walker list */
38 #define VF_FSTR 0x1000 /* 1 = var::string points to fstring buffer */
39 #define VF_CHILD 0x2000 /* 1 = function arg; x.parent points to source */
40 #define VF_DIRTY 0x4000 /* 1 = variable was set explicitly */
41
42 /* these flags are static, don't change them when value is changed */
43 #define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
44
45 typedef struct walker_list {
46 char *end;
47 char *cur;
48 struct walker_list *prev;
49 char wbuf[1];
50 } walker_list;
51
52 /* Variable */
53 typedef struct var_s {
54 unsigned type; /* flags */
55 double number;
56 char *string;
57 union {
58 int aidx; /* func arg idx (for compilation stage) */
59 struct xhash_s *array; /* array ptr */
60 struct var_s *parent; /* for func args, ptr to actual parameter */
61 walker_list *walker; /* list of array elements (for..in) */
62 } x;
63 } var;
64
65 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
66 typedef struct chain_s {
67 struct node_s *first;
68 struct node_s *last;
69 const char *programname;
70 } chain;
71
72 /* Function */
73 typedef struct func_s {
74 unsigned nargs;
75 struct chain_s body;
76 } func;
77
78 /* I/O stream */
79 typedef struct rstream_s {
80 FILE *F;
81 char *buffer;
82 int adv;
83 int size;
84 int pos;
85 smallint is_pipe;
86 } rstream;
87
88 typedef struct hash_item_s {
89 union {
90 struct var_s v; /* variable/array hash */
91 struct rstream_s rs; /* redirect streams hash */
92 struct func_s f; /* functions hash */
93 } data;
94 struct hash_item_s *next; /* next in chain */
95 char name[1]; /* really it's longer */
96 } hash_item;
97
98 typedef struct xhash_s {
99 unsigned nel; /* num of elements */
100 unsigned csize; /* current hash size */
101 unsigned nprime; /* next hash size in PRIMES[] */
102 unsigned glen; /* summary length of item names */
103 struct hash_item_s **items;
104 } xhash;
105
106 /* Tree node */
107 typedef struct node_s {
108 uint32_t info;
109 unsigned lineno;
110 union {
111 struct node_s *n;
112 var *v;
113 int aidx;
114 char *new_progname;
115 regex_t *re;
116 } l;
117 union {
118 struct node_s *n;
119 regex_t *ire;
120 func *f;
121 } r;
122 union {
123 struct node_s *n;
124 } a;
125 } node;
126
127 /* Block of temporary variables */
128 typedef struct nvblock_s {
129 int size;
130 var *pos;
131 struct nvblock_s *prev;
132 struct nvblock_s *next;
133 var nv[];
134 } nvblock;
135
136 typedef struct tsplitter_s {
137 node n;
138 regex_t re[2];
139 } tsplitter;
140
141 /* simple token classes */
142 /* Order and hex values are very important!!! See next_token() */
143 #define TC_SEQSTART 1 /* ( */
144 #define TC_SEQTERM (1 << 1) /* ) */
145 #define TC_REGEXP (1 << 2) /* /.../ */
146 #define TC_OUTRDR (1 << 3) /* | > >> */
147 #define TC_UOPPOST (1 << 4) /* unary postfix operator */
148 #define TC_UOPPRE1 (1 << 5) /* unary prefix operator */
149 #define TC_BINOPX (1 << 6) /* two-opnd operator */
150 #define TC_IN (1 << 7)
151 #define TC_COMMA (1 << 8)
152 #define TC_PIPE (1 << 9) /* input redirection pipe */
153 #define TC_UOPPRE2 (1 << 10) /* unary prefix operator */
154 #define TC_ARRTERM (1 << 11) /* ] */
155 #define TC_GRPSTART (1 << 12) /* { */
156 #define TC_GRPTERM (1 << 13) /* } */
157 #define TC_SEMICOL (1 << 14)
158 #define TC_NEWLINE (1 << 15)
159 #define TC_STATX (1 << 16) /* ctl statement (for, next...) */
160 #define TC_WHILE (1 << 17)
161 #define TC_ELSE (1 << 18)
162 #define TC_BUILTIN (1 << 19)
163 #define TC_GETLINE (1 << 20)
164 #define TC_FUNCDECL (1 << 21) /* `function' `func' */
165 #define TC_BEGIN (1 << 22)
166 #define TC_END (1 << 23)
167 #define TC_EOF (1 << 24)
168 #define TC_VARIABLE (1 << 25)
169 #define TC_ARRAY (1 << 26)
170 #define TC_FUNCTION (1 << 27)
171 #define TC_STRING (1 << 28)
172 #define TC_NUMBER (1 << 29)
173
174 #define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
175
176 /* combined token classes */
177 #define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
178 #define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
179 #define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
180 | TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
181
182 #define TC_STATEMNT (TC_STATX | TC_WHILE)
183 #define TC_OPTERM (TC_SEMICOL | TC_NEWLINE)
184
185 /* word tokens, cannot mean something else if not expected */
186 #define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN \
187 | TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
188
189 /* discard newlines after these */
190 #define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
191 | TC_BINOP | TC_OPTERM)
192
193 /* what can expression begin with */
194 #define TC_OPSEQ (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
195 /* what can group begin with */
196 #define TC_GRPSEQ (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
197
198 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
199 /* operator is inserted between them */
200 #define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
201 | TC_STRING | TC_NUMBER | TC_UOPPOST)
202 #define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
203
204 #define OF_RES1 0x010000
205 #define OF_RES2 0x020000
206 #define OF_STR1 0x040000
207 #define OF_STR2 0x080000
208 #define OF_NUM1 0x100000
209 #define OF_CHECKED 0x200000
210
211 /* combined operator flags */
212 #define xx 0
213 #define xV OF_RES2
214 #define xS (OF_RES2 | OF_STR2)
215 #define Vx OF_RES1
216 #define VV (OF_RES1 | OF_RES2)
217 #define Nx (OF_RES1 | OF_NUM1)
218 #define NV (OF_RES1 | OF_NUM1 | OF_RES2)
219 #define Sx (OF_RES1 | OF_STR1)
220 #define SV (OF_RES1 | OF_STR1 | OF_RES2)
221 #define SS (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
222
223 #define OPCLSMASK 0xFF00
224 #define OPNMASK 0x007F
225
226 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
227 * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
228 * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
229 */
230 #define P(x) (x << 24)
231 #define PRIMASK 0x7F000000
232 #define PRIMASK2 0x7E000000
233
234 /* Operation classes */
235
236 #define SHIFT_TIL_THIS 0x0600
237 #define RECUR_FROM_THIS 0x1000
238
239 enum {
240 OC_DELETE = 0x0100, OC_EXEC = 0x0200, OC_NEWSOURCE = 0x0300,
241 OC_PRINT = 0x0400, OC_PRINTF = 0x0500, OC_WALKINIT = 0x0600,
242
243 OC_BR = 0x0700, OC_BREAK = 0x0800, OC_CONTINUE = 0x0900,
244 OC_EXIT = 0x0a00, OC_NEXT = 0x0b00, OC_NEXTFILE = 0x0c00,
245 OC_TEST = 0x0d00, OC_WALKNEXT = 0x0e00,
246
247 OC_BINARY = 0x1000, OC_BUILTIN = 0x1100, OC_COLON = 0x1200,
248 OC_COMMA = 0x1300, OC_COMPARE = 0x1400, OC_CONCAT = 0x1500,
249 OC_FBLTIN = 0x1600, OC_FIELD = 0x1700, OC_FNARG = 0x1800,
250 OC_FUNC = 0x1900, OC_GETLINE = 0x1a00, OC_IN = 0x1b00,
251 OC_LAND = 0x1c00, OC_LOR = 0x1d00, OC_MATCH = 0x1e00,
252 OC_MOVE = 0x1f00, OC_PGETLINE = 0x2000, OC_REGEXP = 0x2100,
253 OC_REPLACE = 0x2200, OC_RETURN = 0x2300, OC_SPRINTF = 0x2400,
254 OC_TERNARY = 0x2500, OC_UNARY = 0x2600, OC_VAR = 0x2700,
255 OC_DONE = 0x2800,
256
257 ST_IF = 0x3000, ST_DO = 0x3100, ST_FOR = 0x3200,
258 ST_WHILE = 0x3300
259 };
260
261 /* simple builtins */
262 enum {
263 F_in, F_rn, F_co, F_ex, F_lg, F_si, F_sq, F_sr,
264 F_ti, F_le, F_sy, F_ff, F_cl
265 };
266
267 /* builtins */
268 enum {
269 B_a2, B_ix, B_ma, B_sp, B_ss, B_ti, B_mt, B_lo, B_up,
270 B_ge, B_gs, B_su,
271 B_an, B_co, B_ls, B_or, B_rs, B_xo,
272 };
273
274 /* tokens and their corresponding info values */
275
276 #define NTC "\377" /* switch to next token class (tc<<1) */
277 #define NTCC '\377'
278
279 #define OC_B OC_BUILTIN
280
281 static const char tokenlist[] ALIGN1 =
282 "\1(" NTC
283 "\1)" NTC
284 "\1/" NTC /* REGEXP */
285 "\2>>" "\1>" "\1|" NTC /* OUTRDR */
286 "\2++" "\2--" NTC /* UOPPOST */
287 "\2++" "\2--" "\1$" NTC /* UOPPRE1 */
288 "\2==" "\1=" "\2+=" "\2-=" /* BINOPX */
289 "\2*=" "\2/=" "\2%=" "\2^="
290 "\1+" "\1-" "\3**=" "\2**"
291 "\1/" "\1%" "\1^" "\1*"
292 "\2!=" "\2>=" "\2<=" "\1>"
293 "\1<" "\2!~" "\1~" "\2&&"
294 "\2||" "\1?" "\1:" NTC
295 "\2in" NTC
296 "\1," NTC
297 "\1|" NTC
298 "\1+" "\1-" "\1!" NTC /* UOPPRE2 */
299 "\1]" NTC
300 "\1{" NTC
301 "\1}" NTC
302 "\1;" NTC
303 "\1\n" NTC
304 "\2if" "\2do" "\3for" "\5break" /* STATX */
305 "\10continue" "\6delete" "\5print"
306 "\6printf" "\4next" "\10nextfile"
307 "\6return" "\4exit" NTC
308 "\5while" NTC
309 "\4else" NTC
310
311 "\3and" "\5compl" "\6lshift" "\2or"
312 "\6rshift" "\3xor"
313 "\5close" "\6system" "\6fflush" "\5atan2" /* BUILTIN */
314 "\3cos" "\3exp" "\3int" "\3log"
315 "\4rand" "\3sin" "\4sqrt" "\5srand"
316 "\6gensub" "\4gsub" "\5index" "\6length"
317 "\5match" "\5split" "\7sprintf" "\3sub"
318 "\6substr" "\7systime" "\10strftime" "\6mktime"
319 "\7tolower" "\7toupper" NTC
320 "\7getline" NTC
321 "\4func" "\10function" NTC
322 "\5BEGIN" NTC
323 "\3END" "\0"
324 ;
325
326 static const uint32_t tokeninfo[] = {
327 0,
328 0,
329 OC_REGEXP,
330 xS|'a', xS|'w', xS|'|',
331 OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m',
332 OC_UNARY|xV|P(9)|'P', OC_UNARY|xV|P(9)|'M',
333 OC_FIELD|xV|P(5),
334 OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74),
335 OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
336 OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/',
337 OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
338 OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-',
339 OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
340 OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%',
341 OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*',
342 OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3,
343 OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1,
344 OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!',
345 OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
346 OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?',
347 OC_COLON|xx|P(67)|':',
348 OC_IN|SV|P(49),
349 OC_COMMA|SS|P(80),
350 OC_PGETLINE|SV|P(37),
351 OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-',
352 OC_UNARY|xV|P(19)|'!',
353 0,
354 0,
355 0,
356 0,
357 0,
358 ST_IF, ST_DO, ST_FOR, OC_BREAK,
359 OC_CONTINUE, OC_DELETE|Vx, OC_PRINT,
360 OC_PRINTF, OC_NEXT, OC_NEXTFILE,
361 OC_RETURN|Vx, OC_EXIT|Nx,
362 ST_WHILE,
363 0,
364
365 OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
366 OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
367 OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
368 OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
369 OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
370 OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
371 OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6),
372 OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b), OC_B|B_mt|P(0x0b),
373 OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
374 OC_GETLINE|SV|P(0),
375 0, 0,
376 0,
377 0
378 };
379
380 /* internal variable names and their initial values */
381 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
382 enum {
383 CONVFMT, OFMT, FS, OFS,
384 ORS, RS, RT, FILENAME,
385 SUBSEP, F0, ARGIND, ARGC,
386 ARGV, ERRNO, FNR, NR,
387 NF, IGNORECASE, ENVIRON, NUM_INTERNAL_VARS
388 };
389
390 static const char vNames[] ALIGN1 =
391 "CONVFMT\0" "OFMT\0" "FS\0*" "OFS\0"
392 "ORS\0" "RS\0*" "RT\0" "FILENAME\0"
393 "SUBSEP\0" "$\0*" "ARGIND\0" "ARGC\0"
394 "ARGV\0" "ERRNO\0" "FNR\0" "NR\0"
395 "NF\0*" "IGNORECASE\0*" "ENVIRON\0" "\0";
396
397 static const char vValues[] ALIGN1 =
398 "%.6g\0" "%.6g\0" " \0" " \0"
399 "\n\0" "\n\0" "\0" "\0"
400 "\034\0" "\0" "\377";
401
402 /* hash size may grow to these values */
403 #define FIRST_PRIME 61
404 static const uint16_t PRIMES[] ALIGN2 = { 251, 1021, 4093, 16381, 65521 };
405
406
407 /* Globals. Split in two parts so that first one is addressed
408 * with (mostly short) negative offsets.
409 * NB: it's unsafe to put members of type "double"
410 * into globals2 (gcc may fail to align them).
411 */
412 struct globals {
413 double t_double;
414 chain beginseq, mainseq, endseq;
415 chain *seq;
416 node *break_ptr, *continue_ptr;
417 rstream *iF;
418 xhash *vhash, *ahash, *fdhash, *fnhash;
419 const char *g_progname;
420 int g_lineno;
421 int nfields;
422 int maxfields; /* used in fsrealloc() only */
423 var *Fields;
424 nvblock *g_cb;
425 char *g_pos;
426 char *g_buf;
427 smallint icase;
428 smallint exiting;
429 smallint nextrec;
430 smallint nextfile;
431 smallint is_f0_split;
432 };
433 struct globals2 {
434 uint32_t t_info; /* often used */
435 uint32_t t_tclass;
436 char *t_string;
437 int t_lineno;
438 int t_rollback;
439
440 var *intvar[NUM_INTERNAL_VARS]; /* often used */
441
442 /* former statics from various functions */
443 char *split_f0__fstrings;
444
445 uint32_t next_token__save_tclass;
446 uint32_t next_token__save_info;
447 uint32_t next_token__ltclass;
448 smallint next_token__concat_inserted;
449
450 smallint next_input_file__files_happen;
451 rstream next_input_file__rsm;
452
453 var *evaluate__fnargs;
454 unsigned evaluate__seed;
455 regex_t evaluate__sreg;
456
457 var ptest__v;
458
459 tsplitter exec_builtin__tspl;
460
461 /* biggest and least used members go last */
462 tsplitter fsplitter, rsplitter;
463 };
464 #define G1 (ptr_to_globals[-1])
465 #define G (*(struct globals2 *)ptr_to_globals)
466 /* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */
467 /*char G1size[sizeof(G1)]; - 0x74 */
468 /*char Gsize[sizeof(G)]; - 0x1c4 */
469 /* Trying to keep most of members accessible with short offsets: */
470 /*char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */
471 #define t_double (G1.t_double )
472 #define beginseq (G1.beginseq )
473 #define mainseq (G1.mainseq )
474 #define endseq (G1.endseq )
475 #define seq (G1.seq )
476 #define break_ptr (G1.break_ptr )
477 #define continue_ptr (G1.continue_ptr)
478 #define iF (G1.iF )
479 #define vhash (G1.vhash )
480 #define ahash (G1.ahash )
481 #define fdhash (G1.fdhash )
482 #define fnhash (G1.fnhash )
483 #define g_progname (G1.g_progname )
484 #define g_lineno (G1.g_lineno )
485 #define nfields (G1.nfields )
486 #define maxfields (G1.maxfields )
487 #define Fields (G1.Fields )
488 #define g_cb (G1.g_cb )
489 #define g_pos (G1.g_pos )
490 #define g_buf (G1.g_buf )
491 #define icase (G1.icase )
492 #define exiting (G1.exiting )
493 #define nextrec (G1.nextrec )
494 #define nextfile (G1.nextfile )
495 #define is_f0_split (G1.is_f0_split )
496 #define t_info (G.t_info )
497 #define t_tclass (G.t_tclass )
498 #define t_string (G.t_string )
499 #define t_lineno (G.t_lineno )
500 #define t_rollback (G.t_rollback )
501 #define intvar (G.intvar )
502 #define fsplitter (G.fsplitter )
503 #define rsplitter (G.rsplitter )
504 #define INIT_G() do { \
505 SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \
506 G.next_token__ltclass = TC_OPTERM; \
507 G.evaluate__seed = 1; \
508 } while (0)
509
510
511 /* function prototypes */
512 static void handle_special(var *);
513 static node *parse_expr(uint32_t);
514 static void chain_group(void);
515 static var *evaluate(node *, var *);
516 static rstream *next_input_file(void);
517 static int fmt_num(char *, int, const char *, double, int);
518 static int awk_exit(int) NORETURN;
519
520 /* ---- error handling ---- */
521
522 static const char EMSG_INTERNAL_ERROR[] ALIGN1 = "Internal error";
523 static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string";
524 static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token";
525 static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero";
526 static const char EMSG_INV_FMT[] ALIGN1 = "Invalid format specifier";
527 static const char EMSG_TOO_FEW_ARGS[] ALIGN1 = "Too few arguments for builtin";
528 static const char EMSG_NOT_ARRAY[] ALIGN1 = "Not an array";
529 static const char EMSG_POSSIBLE_ERROR[] ALIGN1 = "Possible syntax error";
530 static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function";
531 #if !ENABLE_FEATURE_AWK_LIBM
532 static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in";
533 #endif
534
535 static void zero_out_var(var *vp)
536 {
537 memset(vp, 0, sizeof(*vp));
538 }
539
540 static void syntax_error(const char *message) NORETURN;
541 static void syntax_error(const char *message)
542 {
543 bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
544 }
545
546 /* ---- hash stuff ---- */
547
548 static unsigned hashidx(const char *name)
549 {
550 unsigned idx = 0;
551
552 while (*name)
553 idx = *name++ + (idx << 6) - idx;
554 return idx;
555 }
556
557 /* create new hash */
558 static xhash *hash_init(void)
559 {
560 xhash *newhash;
561
562 newhash = xzalloc(sizeof(*newhash));
563 newhash->csize = FIRST_PRIME;
564 newhash->items = xzalloc(FIRST_PRIME * sizeof(newhash->items[0]));
565
566 return newhash;
567 }
568
569 /* find item in hash, return ptr to data, NULL if not found */
570 static void *hash_search(xhash *hash, const char *name)
571 {
572 hash_item *hi;
573
574 hi = hash->items[hashidx(name) % hash->csize];
575 while (hi) {
576 if (strcmp(hi->name, name) == 0)
577 return &hi->data;
578 hi = hi->next;
579 }
580 return NULL;
581 }
582
583 /* grow hash if it becomes too big */
584 static void hash_rebuild(xhash *hash)
585 {
586 unsigned newsize, i, idx;
587 hash_item **newitems, *hi, *thi;
588
589 if (hash->nprime == ARRAY_SIZE(PRIMES))
590 return;
591
592 newsize = PRIMES[hash->nprime++];
593 newitems = xzalloc(newsize * sizeof(newitems[0]));
594
595 for (i = 0; i < hash->csize; i++) {
596 hi = hash->items[i];
597 while (hi) {
598 thi = hi;
599 hi = thi->next;
600 idx = hashidx(thi->name) % newsize;
601 thi->next = newitems[idx];
602 newitems[idx] = thi;
603 }
604 }
605
606 free(hash->items);
607 hash->csize = newsize;
608 hash->items = newitems;
609 }
610
611 /* find item in hash, add it if necessary. Return ptr to data */
612 static void *hash_find(xhash *hash, const char *name)
613 {
614 hash_item *hi;
615 unsigned idx;
616 int l;
617
618 hi = hash_search(hash, name);
619 if (!hi) {
620 if (++hash->nel / hash->csize > 10)
621 hash_rebuild(hash);
622
623 l = strlen(name) + 1;
624 hi = xzalloc(sizeof(*hi) + l);
625 strcpy(hi->name, name);
626
627 idx = hashidx(name) % hash->csize;
628 hi->next = hash->items[idx];
629 hash->items[idx] = hi;
630 hash->glen += l;
631 }
632 return &hi->data;
633 }
634
635 #define findvar(hash, name) ((var*) hash_find((hash), (name)))
636 #define newvar(name) ((var*) hash_find(vhash, (name)))
637 #define newfile(name) ((rstream*)hash_find(fdhash, (name)))
638 #define newfunc(name) ((func*) hash_find(fnhash, (name)))
639
640 static void hash_remove(xhash *hash, const char *name)
641 {
642 hash_item *hi, **phi;
643
644 phi = &hash->items[hashidx(name) % hash->csize];
645 while (*phi) {
646 hi = *phi;
647 if (strcmp(hi->name, name) == 0) {
648 hash->glen -= (strlen(name) + 1);
649 hash->nel--;
650 *phi = hi->next;
651 free(hi);
652 break;
653 }
654 phi = &hi->next;
655 }
656 }
657
658 /* ------ some useful functions ------ */
659
660 static char *skip_spaces(char *p)
661 {
662 while (1) {
663 if (*p == '\\' && p[1] == '\n') {
664 p++;
665 t_lineno++;
666 } else if (*p != ' ' && *p != '\t') {
667 break;
668 }
669 p++;
670 }
671 return p;
672 }
673
674 /* returns old *s, advances *s past word and terminating NUL */
675 static char *nextword(char **s)
676 {
677 char *p = *s;
678 while (*(*s)++ != '\0')
679 continue;
680 return p;
681 }
682
683 static char nextchar(char **s)
684 {
685 char c, *pps;
686
687 c = *(*s)++;
688 pps = *s;
689 if (c == '\\')
690 c = bb_process_escape_sequence((const char**)s);
691 if (c == '\\' && *s == pps)
692 c = *(*s)++;
693 return c;
694 }
695
696 static ALWAYS_INLINE int isalnum_(int c)
697 {
698 return (isalnum(c) || c == '_');
699 }
700
701 static double my_strtod(char **pp)
702 {
703 #if ENABLE_DESKTOP
704 if ((*pp)[0] == '0'
705 && ((((*pp)[1] | 0x20) == 'x') || isdigit((*pp)[1]))
706 ) {
707 return strtoull(*pp, pp, 0);
708 }
709 #endif
710 return strtod(*pp, pp);
711 }
712
713 /* -------- working with variables (set/get/copy/etc) -------- */
714
715 static xhash *iamarray(var *v)
716 {
717 var *a = v;
718
719 while (a->type & VF_CHILD)
720 a = a->x.parent;
721
722 if (!(a->type & VF_ARRAY)) {
723 a->type |= VF_ARRAY;
724 a->x.array = hash_init();
725 }
726 return a->x.array;
727 }
728
729 static void clear_array(xhash *array)
730 {
731 unsigned i;
732 hash_item *hi, *thi;
733
734 for (i = 0; i < array->csize; i++) {
735 hi = array->items[i];
736 while (hi) {
737 thi = hi;
738 hi = hi->next;
739 free(thi->data.v.string);
740 free(thi);
741 }
742 array->items[i] = NULL;
743 }
744 array->glen = array->nel = 0;
745 }
746
747 /* clear a variable */
748 static var *clrvar(var *v)
749 {
750 if (!(v->type & VF_FSTR))
751 free(v->string);
752
753 v->type &= VF_DONTTOUCH;
754 v->type |= VF_DIRTY;
755 v->string = NULL;
756 return v;
757 }
758
759 /* assign string value to variable */
760 static var *setvar_p(var *v, char *value)
761 {
762 clrvar(v);
763 v->string = value;
764 handle_special(v);
765 return v;
766 }
767
768 /* same as setvar_p but make a copy of string */
769 static var *setvar_s(var *v, const char *value)
770 {
771 return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
772 }
773
774 /* same as setvar_s but sets USER flag */
775 static var *setvar_u(var *v, const char *value)
776 {
777 v = setvar_s(v, value);
778 v->type |= VF_USER;
779 return v;
780 }
781
782 /* set array element to user string */
783 static void setari_u(var *a, int idx, const char *s)
784 {
785 var *v;
786
787 v = findvar(iamarray(a), itoa(idx));
788 setvar_u(v, s);
789 }
790
791 /* assign numeric value to variable */
792 static var *setvar_i(var *v, double value)
793 {
794 clrvar(v);
795 v->type |= VF_NUMBER;
796 v->number = value;
797 handle_special(v);
798 return v;
799 }
800
801 static const char *getvar_s(var *v)
802 {
803 /* if v is numeric and has no cached string, convert it to string */
804 if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
805 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE);
806 v->string = xstrdup(g_buf);
807 v->type |= VF_CACHED;
808 }
809 return (v->string == NULL) ? "" : v->string;
810 }
811
812 static double getvar_i(var *v)
813 {
814 char *s;
815
816 if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
817 v->number = 0;
818 s = v->string;
819 if (s && *s) {
820 v->number = my_strtod(&s);
821 if (v->type & VF_USER) {
822 s = skip_spaces(s);
823 if (*s != '\0')
824 v->type &= ~VF_USER;
825 }
826 } else {
827 v->type &= ~VF_USER;
828 }
829 v->type |= VF_CACHED;
830 }
831 return v->number;
832 }
833
834 /* Used for operands of bitwise ops */
835 static unsigned long getvar_i_int(var *v)
836 {
837 double d = getvar_i(v);
838
839 /* Casting doubles to longs is undefined for values outside
840 * of target type range. Try to widen it as much as possible */
841 if (d >= 0)
842 return (unsigned long)d;
843 /* Why? Think about d == -4294967295.0 (assuming 32bit longs) */
844 return - (long) (unsigned long) (-d);
845 }
846
847 static var *copyvar(var *dest, const var *src)
848 {
849 if (dest != src) {
850 clrvar(dest);
851 dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
852 dest->number = src->number;
853 if (src->string)
854 dest->string = xstrdup(src->string);
855 }
856 handle_special(dest);
857 return dest;
858 }
859
860 static var *incvar(var *v)
861 {
862 return setvar_i(v, getvar_i(v) + 1.0);
863 }
864
865 /* return true if v is number or numeric string */
866 static int is_numeric(var *v)
867 {
868 getvar_i(v);
869 return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
870 }
871
872 /* return 1 when value of v corresponds to true, 0 otherwise */
873 static int istrue(var *v)
874 {
875 if (is_numeric(v))
876 return (v->number != 0);
877 return (v->string && v->string[0]);
878 }
879
880 /* temporary variables allocator. Last allocated should be first freed */
881 static var *nvalloc(int n)
882 {
883 nvblock *pb = NULL;
884 var *v, *r;
885 int size;
886
887 while (g_cb) {
888 pb = g_cb;
889 if ((g_cb->pos - g_cb->nv) + n <= g_cb->size)
890 break;
891 g_cb = g_cb->next;
892 }
893
894 if (!g_cb) {
895 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
896 g_cb = xzalloc(sizeof(nvblock) + size * sizeof(var));
897 g_cb->size = size;
898 g_cb->pos = g_cb->nv;
899 g_cb->prev = pb;
900 /*g_cb->next = NULL; - xzalloc did it */
901 if (pb)
902 pb->next = g_cb;
903 }
904
905 v = r = g_cb->pos;
906 g_cb->pos += n;
907
908 while (v < g_cb->pos) {
909 v->type = 0;
910 v->string = NULL;
911 v++;
912 }
913
914 return r;
915 }
916
917 static void nvfree(var *v)
918 {
919 var *p;
920
921 if (v < g_cb->nv || v >= g_cb->pos)
922 syntax_error(EMSG_INTERNAL_ERROR);
923
924 for (p = v; p < g_cb->pos; p++) {
925 if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
926 clear_array(iamarray(p));
927 free(p->x.array->items);
928 free(p->x.array);
929 }
930 if (p->type & VF_WALK) {
931 walker_list *n;
932 walker_list *w = p->x.walker;
933 debug_printf_walker("nvfree: freeing walker @%p\n", &p->x.walker);
934 p->x.walker = NULL;
935 while (w) {
936 n = w->prev;
937 debug_printf_walker(" free(%p)\n", w);
938 free(w);
939 w = n;
940 }
941 }
942 clrvar(p);
943 }
944
945 g_cb->pos = v;
946 while (g_cb->prev && g_cb->pos == g_cb->nv) {
947 g_cb = g_cb->prev;
948 }
949 }
950
951 /* ------- awk program text parsing ------- */
952
953 /* Parse next token pointed by global pos, place results into global ttt.
954 * If token isn't expected, give away. Return token class
955 */
956 static uint32_t next_token(uint32_t expected)
957 {
958 #define concat_inserted (G.next_token__concat_inserted)
959 #define save_tclass (G.next_token__save_tclass)
960 #define save_info (G.next_token__save_info)
961 /* Initialized to TC_OPTERM: */
962 #define ltclass (G.next_token__ltclass)
963
964 char *p, *s;
965 const char *tl;
966 uint32_t tc;
967 const uint32_t *ti;
968 int l;
969
970 if (t_rollback) {
971 t_rollback = FALSE;
972
973 } else if (concat_inserted) {
974 concat_inserted = FALSE;
975 t_tclass = save_tclass;
976 t_info = save_info;
977
978 } else {
979 p = g_pos;
980 readnext:
981 p = skip_spaces(p);
982 g_lineno = t_lineno;
983 if (*p == '#')
984 while (*p != '\n' && *p != '\0')
985 p++;
986
987 if (*p == '\n')
988 t_lineno++;
989
990 if (*p == '\0') {
991 tc = TC_EOF;
992
993 } else if (*p == '\"') {
994 /* it's a string */
995 t_string = s = ++p;
996 while (*p != '\"') {
997 char *pp = p;
998 if (*p == '\0' || *p == '\n')
999 syntax_error(EMSG_UNEXP_EOS);
1000 *s++ = nextchar(&pp);
1001 p = pp;
1002 }
1003 p++;
1004 *s = '\0';
1005 tc = TC_STRING;
1006
1007 } else if ((expected & TC_REGEXP) && *p == '/') {
1008 /* it's regexp */
1009 t_string = s = ++p;
1010 while (*p != '/') {
1011 if (*p == '\0' || *p == '\n')
1012 syntax_error(EMSG_UNEXP_EOS);
1013 *s = *p++;
1014 if (*s++ == '\\') {
1015 char *pp = p;
1016 s[-1] = bb_process_escape_sequence((const char **)&pp);
1017 if (*p == '\\')
1018 *s++ = '\\';
1019 if (pp == p)
1020 *s++ = *p++;
1021 else
1022 p = pp;
1023 }
1024 }
1025 p++;
1026 *s = '\0';
1027 tc = TC_REGEXP;
1028
1029 } else if (*p == '.' || isdigit(*p)) {
1030 /* it's a number */
1031 char *pp = p;
1032 t_double = my_strtod(&pp);
1033 p = pp;
1034 if (*pp == '.')
1035 syntax_error(EMSG_UNEXP_TOKEN);
1036 tc = TC_NUMBER;
1037
1038 } else {
1039 /* search for something known */
1040 tl = tokenlist;
1041 tc = 0x00000001;
1042 ti = tokeninfo;
1043 while (*tl) {
1044 l = *tl++;
1045 if (l == NTCC) {
1046 tc <<= 1;
1047 continue;
1048 }
1049 /* if token class is expected, token
1050 * matches and it's not a longer word,
1051 * then this is what we are looking for
1052 */
1053 if ((tc & (expected | TC_WORD | TC_NEWLINE))
1054 && *tl == *p && strncmp(p, tl, l) == 0
1055 && !((tc & TC_WORD) && isalnum_(p[l]))
1056 ) {
1057 t_info = *ti;
1058 p += l;
1059 break;
1060 }
1061 ti++;
1062 tl += l;
1063 }
1064
1065 if (!*tl) {
1066 /* it's a name (var/array/function),
1067 * otherwise it's something wrong
1068 */
1069 if (!isalnum_(*p))
1070 syntax_error(EMSG_UNEXP_TOKEN);
1071
1072 t_string = --p;
1073 while (isalnum_(*++p)) {
1074 p[-1] = *p;
1075 }
1076 p[-1] = '\0';
1077 tc = TC_VARIABLE;
1078 /* also consume whitespace between functionname and bracket */
1079 if (!(expected & TC_VARIABLE) || (expected & TC_ARRAY))
1080 p = skip_spaces(p);
1081 if (*p == '(') {
1082 tc = TC_FUNCTION;
1083 } else {
1084 if (*p == '[') {
1085 p++;
1086 tc = TC_ARRAY;
1087 }
1088 }
1089 }
1090 }
1091 g_pos = p;
1092
1093 /* skipping newlines in some cases */
1094 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
1095 goto readnext;
1096
1097 /* insert concatenation operator when needed */
1098 if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)) {
1099 concat_inserted = TRUE;
1100 save_tclass = tc;
1101 save_info = t_info;
1102 tc = TC_BINOP;
1103 t_info = OC_CONCAT | SS | P(35);
1104 }
1105
1106 t_tclass = tc;
1107 }
1108 ltclass = t_tclass;
1109
1110 /* Are we ready for this? */
1111 if (!(ltclass & expected))
1112 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
1113 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1114
1115 return ltclass;
1116 #undef concat_inserted
1117 #undef save_tclass
1118 #undef save_info
1119 #undef ltclass
1120 }
1121
1122 static void rollback_token(void)
1123 {
1124 t_rollback = TRUE;
1125 }
1126
1127 static node *new_node(uint32_t info)
1128 {
1129 node *n;
1130
1131 n = xzalloc(sizeof(node));
1132 n->info = info;
1133 n->lineno = g_lineno;
1134 return n;
1135 }
1136
1137 static void mk_re_node(const char *s, node *n, regex_t *re)
1138 {
1139 n->info = OC_REGEXP;
1140 n->l.re = re;
1141 n->r.ire = re + 1;
1142 xregcomp(re, s, REG_EXTENDED);
1143 xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
1144 }
1145
1146 static node *condition(void)
1147 {
1148 next_token(TC_SEQSTART);
1149 return parse_expr(TC_SEQTERM);
1150 }
1151
1152 /* parse expression terminated by given argument, return ptr
1153 * to built subtree. Terminator is eaten by parse_expr */
1154 static node *parse_expr(uint32_t iexp)
1155 {
1156 node sn;
1157 node *cn = &sn;
1158 node *vn, *glptr;
1159 uint32_t tc, xtc;
1160 var *v;
1161
1162 sn.info = PRIMASK;
1163 sn.r.n = glptr = NULL;
1164 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1165
1166 while (!((tc = next_token(xtc)) & iexp)) {
1167 if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) {
1168 /* input redirection (<) attached to glptr node */
1169 cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
1170 cn->a.n = glptr;
1171 xtc = TC_OPERAND | TC_UOPPRE;
1172 glptr = NULL;
1173
1174 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1175 /* for binary and postfix-unary operators, jump back over
1176 * previous operators with higher priority */
1177 vn = cn;
1178 while (((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
1179 || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON))
1180 ) {
1181 vn = vn->a.n;
1182 }
1183 if ((t_info & OPCLSMASK) == OC_TERNARY)
1184 t_info += P(6);
1185 cn = vn->a.n->r.n = new_node(t_info);
1186 cn->a.n = vn->a.n;
1187 if (tc & TC_BINOP) {
1188 cn->l.n = vn;
1189 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1190 if ((t_info & OPCLSMASK) == OC_PGETLINE) {
1191 /* it's a pipe */
1192 next_token(TC_GETLINE);
1193 /* give maximum priority to this pipe */
1194 cn->info &= ~PRIMASK;
1195 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1196 }
1197 } else {
1198 cn->r.n = vn;
1199 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1200 }
1201 vn->a.n = cn;
1202
1203 } else {
1204 /* for operands and prefix-unary operators, attach them
1205 * to last node */
1206 vn = cn;
1207 cn = vn->r.n = new_node(t_info);
1208 cn->a.n = vn;
1209 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1210 if (tc & (TC_OPERAND | TC_REGEXP)) {
1211 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1212 /* one should be very careful with switch on tclass -
1213 * only simple tclasses should be used! */
1214 switch (tc) {
1215 case TC_VARIABLE:
1216 case TC_ARRAY:
1217 cn->info = OC_VAR;
1218 v = hash_search(ahash, t_string);
1219 if (v != NULL) {
1220 cn->info = OC_FNARG;
1221 cn->l.aidx = v->x.aidx;
1222 } else {
1223 cn->l.v = newvar(t_string);
1224 }
1225 if (tc & TC_ARRAY) {
1226 cn->info |= xS;
1227 cn->r.n = parse_expr(TC_ARRTERM);
1228 }
1229 break;
1230
1231 case TC_NUMBER:
1232 case TC_STRING:
1233 cn->info = OC_VAR;
1234 v = cn->l.v = xzalloc(sizeof(var));
1235 if (tc & TC_NUMBER)
1236 setvar_i(v, t_double);
1237 else
1238 setvar_s(v, t_string);
1239 break;
1240
1241 case TC_REGEXP:
1242 mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
1243 break;
1244
1245 case TC_FUNCTION:
1246 cn->info = OC_FUNC;
1247 cn->r.f = newfunc(t_string);
1248 cn->l.n = condition();
1249 break;
1250
1251 case TC_SEQSTART:
1252 cn = vn->r.n = parse_expr(TC_SEQTERM);
1253 cn->a.n = vn;
1254 break;
1255
1256 case TC_GETLINE:
1257 glptr = cn;
1258 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1259 break;
1260
1261 case TC_BUILTIN:
1262 cn->l.n = condition();
1263 break;
1264 }
1265 }
1266 }
1267 }
1268 return sn.r.n;
1269 }
1270
1271 /* add node to chain. Return ptr to alloc'd node */
1272 static node *chain_node(uint32_t info)
1273 {
1274 node *n;
1275
1276 if (!seq->first)
1277 seq->first = seq->last = new_node(0);
1278
1279 if (seq->programname != g_progname) {
1280 seq->programname = g_progname;
1281 n = chain_node(OC_NEWSOURCE);
1282 n->l.new_progname = xstrdup(g_progname);
1283 }
1284
1285 n = seq->last;
1286 n->info = info;
1287 seq->last = n->a.n = new_node(OC_DONE);
1288
1289 return n;
1290 }
1291
1292 static void chain_expr(uint32_t info)
1293 {
1294 node *n;
1295
1296 n = chain_node(info);
1297 n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1298 if (t_tclass & TC_GRPTERM)
1299 rollback_token();
1300 }
1301
1302 static node *chain_loop(node *nn)
1303 {
1304 node *n, *n2, *save_brk, *save_cont;
1305
1306 save_brk = break_ptr;
1307 save_cont = continue_ptr;
1308
1309 n = chain_node(OC_BR | Vx);
1310 continue_ptr = new_node(OC_EXEC);
1311 break_ptr = new_node(OC_EXEC);
1312 chain_group();
1313 n2 = chain_node(OC_EXEC | Vx);
1314 n2->l.n = nn;
1315 n2->a.n = n;
1316 continue_ptr->a.n = n2;
1317 break_ptr->a.n = n->r.n = seq->last;
1318
1319 continue_ptr = save_cont;
1320 break_ptr = save_brk;
1321
1322 return n;
1323 }
1324
1325 /* parse group and attach it to chain */
1326 static void chain_group(void)
1327 {
1328 uint32_t c;
1329 node *n, *n2, *n3;
1330
1331 do {
1332 c = next_token(TC_GRPSEQ);
1333 } while (c & TC_NEWLINE);
1334
1335 if (c & TC_GRPSTART) {
1336 while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1337 if (t_tclass & TC_NEWLINE)
1338 continue;
1339 rollback_token();
1340 chain_group();
1341 }
1342 } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1343 rollback_token();
1344 chain_expr(OC_EXEC | Vx);
1345 } else { /* TC_STATEMNT */
1346 switch (t_info & OPCLSMASK) {
1347 case ST_IF:
1348 n = chain_node(OC_BR | Vx);
1349 n->l.n = condition();
1350 chain_group();
1351 n2 = chain_node(OC_EXEC);
1352 n->r.n = seq->last;
1353 if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
1354 chain_group();
1355 n2->a.n = seq->last;
1356 } else {
1357 rollback_token();
1358 }
1359 break;
1360
1361 case ST_WHILE:
1362 n2 = condition();
1363 n = chain_loop(NULL);
1364 n->l.n = n2;
1365 break;
1366
1367 case ST_DO:
1368 n2 = chain_node(OC_EXEC);
1369 n = chain_loop(NULL);
1370 n2->a.n = n->a.n;
1371 next_token(TC_WHILE);
1372 n->l.n = condition();
1373 break;
1374
1375 case ST_FOR:
1376 next_token(TC_SEQSTART);
1377 n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1378 if (t_tclass & TC_SEQTERM) { /* for-in */
1379 if ((n2->info & OPCLSMASK) != OC_IN)
1380 syntax_error(EMSG_UNEXP_TOKEN);
1381 n = chain_node(OC_WALKINIT | VV);
1382 n->l.n = n2->l.n;
1383 n->r.n = n2->r.n;
1384 n = chain_loop(NULL);
1385 n->info = OC_WALKNEXT | Vx;
1386 n->l.n = n2->l.n;
1387 } else { /* for (;;) */
1388 n = chain_node(OC_EXEC | Vx);
1389 n->l.n = n2;
1390 n2 = parse_expr(TC_SEMICOL);
1391 n3 = parse_expr(TC_SEQTERM);
1392 n = chain_loop(n3);
1393 n->l.n = n2;
1394 if (!n2)
1395 n->info = OC_EXEC;
1396 }
1397 break;
1398
1399 case OC_PRINT:
1400 case OC_PRINTF:
1401 n = chain_node(t_info);
1402 n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1403 if (t_tclass & TC_OUTRDR) {
1404 n->info |= t_info;
1405 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1406 }
1407 if (t_tclass & TC_GRPTERM)
1408 rollback_token();
1409 break;
1410
1411 case OC_BREAK:
1412 n = chain_node(OC_EXEC);
1413 n->a.n = break_ptr;
1414 break;
1415
1416 case OC_CONTINUE:
1417 n = chain_node(OC_EXEC);
1418 n->a.n = continue_ptr;
1419 break;
1420
1421 /* delete, next, nextfile, return, exit */
1422 default:
1423 chain_expr(t_info);
1424 }
1425 }
1426 }
1427
1428 static void parse_program(char *p)
1429 {
1430 uint32_t tclass;
1431 node *cn;
1432 func *f;
1433 var *v;
1434
1435 g_pos = p;
1436 t_lineno = 1;
1437 while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1438 TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1439
1440 if (tclass & TC_OPTERM)
1441 continue;
1442
1443 seq = &mainseq;
1444 if (tclass & TC_BEGIN) {
1445 seq = &beginseq;
1446 chain_group();
1447
1448 } else if (tclass & TC_END) {
1449 seq = &endseq;
1450 chain_group();
1451
1452 } else if (tclass & TC_FUNCDECL) {
1453 next_token(TC_FUNCTION);
1454 g_pos++;
1455 f = newfunc(t_string);
1456 f->body.first = NULL;
1457 f->nargs = 0;
1458 while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1459 v = findvar(ahash, t_string);
1460 v->x.aidx = f->nargs++;
1461
1462 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1463 break;
1464 }
1465 seq = &f->body;
1466 chain_group();
1467 clear_array(ahash);
1468
1469 } else if (tclass & TC_OPSEQ) {
1470 rollback_token();
1471 cn = chain_node(OC_TEST);
1472 cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1473 if (t_tclass & TC_GRPSTART) {
1474 rollback_token();
1475 chain_group();
1476 } else {
1477 chain_node(OC_PRINT);
1478 }
1479 cn->r.n = mainseq.last;
1480
1481 } else /* if (tclass & TC_GRPSTART) */ {
1482 rollback_token();
1483 chain_group();
1484 }
1485 }
1486 }
1487
1488
1489 /* -------- program execution part -------- */
1490
1491 static node *mk_splitter(const char *s, tsplitter *spl)
1492 {
1493 regex_t *re, *ire;
1494 node *n;
1495
1496 re = &spl->re[0];
1497 ire = &spl->re[1];
1498 n = &spl->n;
1499 if ((n->info & OPCLSMASK) == OC_REGEXP) {
1500 regfree(re);
1501 regfree(ire); // TODO: nuke ire, use re+1?
1502 }
1503 if (strlen(s) > 1) {
1504 mk_re_node(s, n, re);
1505 } else {
1506 n->info = (uint32_t) *s;
1507 }
1508
1509 return n;
1510 }
1511
1512 /* use node as a regular expression. Supplied with node ptr and regex_t
1513 * storage space. Return ptr to regex (if result points to preg, it should
1514 * be later regfree'd manually
1515 */
1516 static regex_t *as_regex(node *op, regex_t *preg)
1517 {
1518 int cflags;
1519 var *v;
1520 const char *s;
1521
1522 if ((op->info & OPCLSMASK) == OC_REGEXP) {
1523 return icase ? op->r.ire : op->l.re;
1524 }
1525 v = nvalloc(1);
1526 s = getvar_s(evaluate(op, v));
1527
1528 cflags = icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED;
1529 /* Testcase where REG_EXTENDED fails (unpaired '{'):
1530 * echo Hi | awk 'gsub("@(samp|code|file)\{","");'
1531 * gawk 3.1.5 eats this. We revert to ~REG_EXTENDED
1532 * (maybe gsub is not supposed to use REG_EXTENDED?).
1533 */
1534 if (regcomp(preg, s, cflags)) {
1535 cflags &= ~REG_EXTENDED;
1536 xregcomp(preg, s, cflags);
1537 }
1538 nvfree(v);
1539 return preg;
1540 }
1541
1542 /* gradually increasing buffer.
1543 * note that we reallocate even if n == old_size,
1544 * and thus there is at least one extra allocated byte.
1545 */
1546 static char* qrealloc(char *b, int n, int *size)
1547 {
1548 if (!b || n >= *size) {
1549 *size = n + (n>>1) + 80;
1550 b = xrealloc(b, *size);
1551 }
1552 return b;
1553 }
1554
1555 /* resize field storage space */
1556 static void fsrealloc(int size)
1557 {
1558 int i;
1559
1560 if (size >= maxfields) {
1561 i = maxfields;
1562 maxfields = size + 16;
1563 Fields = xrealloc(Fields, maxfields * sizeof(var));
1564 for (; i < maxfields; i++) {
1565 Fields[i].type = VF_SPECIAL;
1566 Fields[i].string = NULL;
1567 }
1568 }
1569
1570 if (size < nfields) {
1571 for (i = size; i < nfields; i++) {
1572 clrvar(Fields + i);
1573 }
1574 }
1575 nfields = size;
1576 }
1577
1578 static int awk_split(const char *s, node *spl, char **slist)
1579 {
1580 int l, n = 0;
1581 char c[4];
1582 char *s1;
1583 regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough...
1584
1585 /* in worst case, each char would be a separate field */
1586 *slist = s1 = xzalloc(strlen(s) * 2 + 3);
1587 strcpy(s1, s);
1588
1589 c[0] = c[1] = (char)spl->info;
1590 c[2] = c[3] = '\0';
1591 if (*getvar_s(intvar[RS]) == '\0')
1592 c[2] = '\n';
1593
1594 if ((spl->info & OPCLSMASK) == OC_REGEXP) { /* regex split */
1595 if (!*s)
1596 return n; /* "": zero fields */
1597 n++; /* at least one field will be there */
1598 do {
1599 l = strcspn(s, c+2); /* len till next NUL or \n */
1600 if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0
1601 && pmatch[0].rm_so <= l
1602 ) {
1603 l = pmatch[0].rm_so;
1604 if (pmatch[0].rm_eo == 0) {
1605 l++;
1606 pmatch[0].rm_eo++;
1607 }
1608 n++; /* we saw yet another delimiter */
1609 } else {
1610 pmatch[0].rm_eo = l;
1611 if (s[l])
1612 pmatch[0].rm_eo++;
1613 }
1614 memcpy(s1, s, l);
1615 /* make sure we remove *all* of the separator chars */
1616 do {
1617 s1[l] = '\0';
1618 } while (++l < pmatch[0].rm_eo);
1619 nextword(&s1);
1620 s += pmatch[0].rm_eo;
1621 } while (*s);
1622 return n;
1623 }
1624 if (c[0] == '\0') { /* null split */
1625 while (*s) {
1626 *s1++ = *s++;
1627 *s1++ = '\0';
1628 n++;
1629 }
1630 return n;
1631 }
1632 if (c[0] != ' ') { /* single-character split */
1633 if (icase) {
1634 c[0] = toupper(c[0]);
1635 c[1] = tolower(c[1]);
1636 }
1637 if (*s1)
1638 n++;
1639 while ((s1 = strpbrk(s1, c))) {
1640 *s1++ = '\0';
1641 n++;
1642 }
1643 return n;
1644 }
1645 /* space split */
1646 while (*s) {
1647 s = skip_whitespace(s);
1648 if (!*s)
1649 break;
1650 n++;
1651 while (*s && !isspace(*s))
1652 *s1++ = *s++;
1653 *s1++ = '\0';
1654 }
1655 return n;
1656 }
1657
1658 static void split_f0(void)
1659 {
1660 /* static char *fstrings; */
1661 #define fstrings (G.split_f0__fstrings)
1662
1663 int i, n;
1664 char *s;
1665
1666 if (is_f0_split)
1667 return;
1668
1669 is_f0_split = TRUE;
1670 free(fstrings);
1671 fsrealloc(0);
1672 n = awk_split(getvar_s(intvar[F0]), &fsplitter.n, &fstrings);
1673 fsrealloc(n);
1674 s = fstrings;
1675 for (i = 0; i < n; i++) {
1676 Fields[i].string = nextword(&s);
1677 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1678 }
1679
1680 /* set NF manually to avoid side effects */
1681 clrvar(intvar[NF]);
1682 intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
1683 intvar[NF]->number = nfields;
1684 #undef fstrings
1685 }
1686
1687 /* perform additional actions when some internal variables changed */
1688 static void handle_special(var *v)
1689 {
1690 int n;
1691 char *b;
1692 const char *sep, *s;
1693 int sl, l, len, i, bsize;
1694
1695 if (!(v->type & VF_SPECIAL))
1696 return;
1697
1698 if (v == intvar[NF]) {
1699 n = (int)getvar_i(v);
1700 fsrealloc(n);
1701
1702 /* recalculate $0 */
1703 sep = getvar_s(intvar[OFS]);
1704 sl = strlen(sep);
1705 b = NULL;
1706 len = 0;
1707 for (i = 0; i < n; i++) {
1708 s = getvar_s(&Fields[i]);
1709 l = strlen(s);
1710 if (b) {
1711 memcpy(b+len, sep, sl);
1712 len += sl;
1713 }
1714 b = qrealloc(b, len+l+sl, &bsize);
1715 memcpy(b+len, s, l);
1716 len += l;
1717 }
1718 if (b)
1719 b[len] = '\0';
1720 setvar_p(intvar[F0], b);
1721 is_f0_split = TRUE;
1722
1723 } else if (v == intvar[F0]) {
1724 is_f0_split = FALSE;
1725
1726 } else if (v == intvar[FS]) {
1727 mk_splitter(getvar_s(v), &fsplitter);
1728
1729 } else if (v == intvar[RS]) {
1730 mk_splitter(getvar_s(v), &rsplitter);
1731
1732 } else if (v == intvar[IGNORECASE]) {
1733 icase = istrue(v);
1734
1735 } else { /* $n */
1736 n = getvar_i(intvar[NF]);
1737 setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1);
1738 /* right here v is invalid. Just to note... */
1739 }
1740 }
1741
1742 /* step through func/builtin/etc arguments */
1743 static node *nextarg(node **pn)
1744 {
1745 node *n;
1746
1747 n = *pn;
1748 if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1749 *pn = n->r.n;
1750 n = n->l.n;
1751 } else {
1752 *pn = NULL;
1753 }
1754 return n;
1755 }
1756
1757 static void hashwalk_init(var *v, xhash *array)
1758 {
1759 hash_item *hi;
1760 unsigned i;
1761 walker_list *w;
1762 walker_list *prev_walker;
1763
1764 if (v->type & VF_WALK) {
1765 prev_walker = v->x.walker;
1766 } else {
1767 v->type |= VF_WALK;
1768 prev_walker = NULL;
1769 }
1770 debug_printf_walker("hashwalk_init: prev_walker:%p\n", prev_walker);
1771
1772 w = v->x.walker = xzalloc(sizeof(*w) + array->glen + 1); /* why + 1? */
1773 debug_printf_walker(" walker@%p=%p\n", &v->x.walker, w);
1774 w->cur = w->end = w->wbuf;
1775 w->prev = prev_walker;
1776 for (i = 0; i < array->csize; i++) {
1777 hi = array->items[i];
1778 while (hi) {
1779 strcpy(w->end, hi->name);
1780 nextword(&w->end);
1781 hi = hi->next;
1782 }
1783 }
1784 }
1785
1786 static int hashwalk_next(var *v)
1787 {
1788 walker_list *w = v->x.walker;
1789
1790 if (w->cur >= w->end) {
1791 walker_list *prev_walker = w->prev;
1792
1793 debug_printf_walker("end of iteration, free(walker@%p:%p), prev_walker:%p\n", &v->x.walker, w, prev_walker);
1794 free(w);
1795 v->x.walker = prev_walker;
1796 return FALSE;
1797 }
1798
1799 setvar_s(v, nextword(&w->cur));
1800 return TRUE;
1801 }
1802
1803 /* evaluate node, return 1 when result is true, 0 otherwise */
1804 static int ptest(node *pattern)
1805 {
1806 /* ptest__v is "static": to save stack space? */
1807 return istrue(evaluate(pattern, &G.ptest__v));
1808 }
1809
1810 /* read next record from stream rsm into a variable v */
1811 static int awk_getline(rstream *rsm, var *v)
1812 {
1813 char *b;
1814 regmatch_t pmatch[2];
1815 int size, a, p, pp = 0;
1816 int fd, so, eo, r, rp;
1817 char c, *m, *s;
1818
1819 /* we're using our own buffer since we need access to accumulating
1820 * characters
1821 */
1822 fd = fileno(rsm->F);
1823 m = rsm->buffer;
1824 a = rsm->adv;
1825 p = rsm->pos;
1826 size = rsm->size;
1827 c = (char) rsplitter.n.info;
1828 rp = 0;
1829
1830 if (!m)
1831 m = qrealloc(m, 256, &size);
1832
1833 do {
1834 b = m + a;
1835 so = eo = p;
1836 r = 1;
1837 if (p > 0) {
1838 if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1839 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1840 b, 1, pmatch, 0) == 0) {
1841 so = pmatch[0].rm_so;
1842 eo = pmatch[0].rm_eo;
1843 if (b[eo] != '\0')
1844 break;
1845 }
1846 } else if (c != '\0') {
1847 s = strchr(b+pp, c);
1848 if (!s)
1849 s = memchr(b+pp, '\0', p - pp);
1850 if (s) {
1851 so = eo = s-b;
1852 eo++;
1853 break;
1854 }
1855 } else {
1856 while (b[rp] == '\n')
1857 rp++;
1858 s = strstr(b+rp, "\n\n");
1859 if (s) {
1860 so = eo = s-b;
1861 while (b[eo] == '\n')
1862 eo++;
1863 if (b[eo] != '\0')
1864 break;
1865 }
1866 }
1867 }
1868
1869 if (a > 0) {
1870 memmove(m, m+a, p+1);
1871 b = m;
1872 a = 0;
1873 }
1874
1875 m = qrealloc(m, a+p+128, &size);
1876 b = m + a;
1877 pp = p;
1878 p += safe_read(fd, b+p, size-p-1);
1879 if (p < pp) {
1880 p = 0;
1881 r = 0;
1882 setvar_i(intvar[ERRNO], errno);
1883 }
1884 b[p] = '\0';
1885
1886 } while (p > pp);
1887
1888 if (p == 0) {
1889 r--;
1890 } else {
1891 c = b[so]; b[so] = '\0';
1892 setvar_s(v, b+rp);
1893 v->type |= VF_USER;
1894 b[so] = c;
1895 c = b[eo]; b[eo] = '\0';
1896 setvar_s(intvar[RT], b+so);
1897 b[eo] = c;
1898 }
1899
1900 rsm->buffer = m;
1901 rsm->adv = a + eo;
1902 rsm->pos = p - eo;
1903 rsm->size = size;
1904
1905 return r;
1906 }
1907
1908 static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
1909 {
1910 int r = 0;
1911 char c;
1912 const char *s = format;
1913
1914 if (int_as_int && n == (int)n) {
1915 r = snprintf(b, size, "%d", (int)n);
1916 } else {
1917 do { c = *s; } while (c && *++s);
1918 if (strchr("diouxX", c)) {
1919 r = snprintf(b, size, format, (int)n);
1920 } else if (strchr("eEfgG", c)) {
1921 r = snprintf(b, size, format, n);
1922 } else {
1923 syntax_error(EMSG_INV_FMT);
1924 }
1925 }
1926 return r;
1927 }
1928
1929 /* formatted output into an allocated buffer, return ptr to buffer */
1930 static char *awk_printf(node *n)
1931 {
1932 char *b = NULL;
1933 char *fmt, *s, *f;
1934 const char *s1;
1935 int i, j, incr, bsize;
1936 char c, c1;
1937 var *v, *arg;
1938
1939 v = nvalloc(1);
1940 fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1941
1942 i = 0;
1943 while (*f) {
1944 s = f;
1945 while (*f && (*f != '%' || *++f == '%'))
1946 f++;
1947 while (*f && !isalpha(*f)) {
1948 if (*f == '*')
1949 syntax_error("%*x formats are not supported");
1950 f++;
1951 }
1952
1953 incr = (f - s) + MAXVARFMT;
1954 b = qrealloc(b, incr + i, &bsize);
1955 c = *f;
1956 if (c != '\0')
1957 f++;
1958 c1 = *f;
1959 *f = '\0';
1960 arg = evaluate(nextarg(&n), v);
1961
1962 j = i;
1963 if (c == 'c' || !c) {
1964 i += sprintf(b+i, s, is_numeric(arg) ?
1965 (char)getvar_i(arg) : *getvar_s(arg));
1966 } else if (c == 's') {
1967 s1 = getvar_s(arg);
1968 b = qrealloc(b, incr+i+strlen(s1), &bsize);
1969 i += sprintf(b+i, s, s1);
1970 } else {
1971 i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1972 }
1973 *f = c1;
1974
1975 /* if there was an error while sprintf, return value is negative */
1976 if (i < j)
1977 i = j;
1978 }
1979
1980 free(fmt);
1981 nvfree(v);
1982 b = xrealloc(b, i + 1);
1983 b[i] = '\0';
1984 return b;
1985 }
1986
1987 /* Common substitution routine.
1988 * Replace (nm)'th substring of (src) that matches (rn) with (repl),
1989 * store result into (dest), return number of substitutions.
1990 * If nm = 0, replace all matches.
1991 * If src or dst is NULL, use $0.
1992 * If subexp != 0, enable subexpression matching (\1-\9).
1993 */
1994 static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int subexp)
1995 {
1996 char *resbuf;
1997 const char *sp;
1998 int match_no, residx, replen, resbufsize;
1999 int regexec_flags;
2000 regmatch_t pmatch[10];
2001 regex_t sreg, *regex;
2002
2003 resbuf = NULL;
2004 residx = 0;
2005 match_no = 0;
2006 regexec_flags = 0;
2007 regex = as_regex(rn, &sreg);
2008 sp = getvar_s(src ? src : intvar[F0]);
2009 replen = strlen(repl);
2010 while (regexec(regex, sp, 10, pmatch, regexec_flags) == 0) {
2011 int so = pmatch[0].rm_so;
2012 int eo = pmatch[0].rm_eo;
2013
2014 //bb_error_msg("match %u: [%u,%u] '%s'%p", match_no+1, so, eo, sp,sp);
2015 resbuf = qrealloc(resbuf, residx + eo + replen, &resbufsize);
2016 memcpy(resbuf + residx, sp, eo);
2017 residx += eo;
2018 if (++match_no >= nm) {
2019 const char *s;
2020 int nbs;
2021
2022 /* replace */
2023 residx -= (eo - so);
2024 nbs = 0;
2025 for (s = repl; *s; s++) {
2026 char c = resbuf[residx++] = *s;
2027 if (c == '\\') {
2028 nbs++;
2029 continue;
2030 }
2031 if (c == '&' || (subexp && c >= '0' && c <= '9')) {
2032 int j;
2033 residx -= ((nbs + 3) >> 1);
2034 j = 0;
2035 if (c != '&') {
2036 j = c - '0';
2037 nbs++;
2038 }
2039 if (nbs % 2) {
2040 resbuf[residx++] = c;
2041 } else {
2042 int n = pmatch[j].rm_eo - pmatch[j].rm_so;
2043 resbuf = qrealloc(resbuf, residx + replen + n, &resbufsize);
2044 memcpy(resbuf + residx, sp + pmatch[j].rm_so, n);
2045 residx += n;
2046 }
2047 }
2048 nbs = 0;
2049 }
2050 }
2051
2052 regexec_flags = REG_NOTBOL;
2053 sp += eo;
2054 if (match_no == nm)
2055 break;
2056 if (eo == so) {
2057 /* Empty match (e.g. "b*" will match anywhere).
2058 * Advance by one char. */
2059 //BUG (bug 1333):
2060 //gsub(/\<b*/,"") on "abc" will reach this point, advance to "bc"
2061 //... and will erroneously match "b" even though it is NOT at the word start.
2062 //we need REG_NOTBOW but it does not exist...
2063 //TODO: if EXTRA_COMPAT=y, use GNU matching and re_search,
2064 //it should be able to do it correctly.
2065 /* Subtle: this is safe only because
2066 * qrealloc allocated at least one extra byte */
2067 resbuf[residx] = *sp;
2068 if (*sp == '\0')
2069 goto ret;
2070 sp++;
2071 residx++;
2072 }
2073 }
2074
2075 resbuf = qrealloc(resbuf, residx + strlen(sp), &resbufsize);
2076 strcpy(resbuf + residx, sp);
2077 ret:
2078 //bb_error_msg("end sp:'%s'%p", sp,sp);
2079 setvar_p(dest ? dest : intvar[F0], resbuf);
2080 if (regex == &sreg)
2081 regfree(regex);
2082 return match_no;
2083 }
2084
2085 static NOINLINE int do_mktime(const char *ds)
2086 {
2087 struct tm then;
2088 int count;
2089
2090 /*memset(&then, 0, sizeof(then)); - not needed */
2091 then.tm_isdst = -1; /* default is unknown */
2092
2093 /* manpage of mktime says these fields are ints,
2094 * so we can sscanf stuff directly into them */
2095 count = sscanf(ds, "%u %u %u %u %u %u %d",
2096 &then.tm_year, &then.tm_mon, &then.tm_mday,
2097 &then.tm_hour, &then.tm_min, &then.tm_sec,
2098 &then.tm_isdst);
2099
2100 if (count < 6
2101 || (unsigned)then.tm_mon < 1
2102 || (unsigned)then.tm_year < 1900
2103 ) {
2104 return -1;
2105 }
2106
2107 then.tm_mon -= 1;
2108 then.tm_year -= 1900;
2109
2110 return mktime(&then);
2111 }
2112
2113 static NOINLINE var *exec_builtin(node *op, var *res)
2114 {
2115 #define tspl (G.exec_builtin__tspl)
2116
2117 var *tv;
2118 node *an[4];
2119 var *av[4];
2120 const char *as[4];
2121 regmatch_t pmatch[2];
2122 regex_t sreg, *re;
2123 node *spl;
2124 uint32_t isr, info;
2125 int nargs;
2126 time_t tt;
2127 int i, l, ll, n;
2128
2129 tv = nvalloc(4);
2130 isr = info = op->info;
2131 op = op->l.n;
2132
2133 av[2] = av[3] = NULL;
2134 for (i = 0; i < 4 && op; i++) {
2135 an[i] = nextarg(&op);
2136 if (isr & 0x09000000)
2137 av[i] = evaluate(an[i], &tv[i]);
2138 if (isr & 0x08000000)
2139 as[i] = getvar_s(av[i]);
2140 isr >>= 1;
2141 }
2142
2143 nargs = i;
2144 if ((uint32_t)nargs < (info >> 30))
2145 syntax_error(EMSG_TOO_FEW_ARGS);
2146
2147 info &= OPNMASK;
2148 switch (info) {
2149
2150 case B_a2:
2151 #if ENABLE_FEATURE_AWK_LIBM
2152 setvar_i(res, atan2(getvar_i(av[0]), getvar_i(av[1])));
2153 #else
2154 syntax_error(EMSG_NO_MATH);
2155 #endif
2156 break;
2157
2158 case B_sp: {
2159 char *s, *s1;
2160
2161 if (nargs > 2) {
2162 spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
2163 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
2164 } else {
2165 spl = &fsplitter.n;
2166 }
2167
2168 n = awk_split(as[0], spl, &s);
2169 s1 = s;
2170 clear_array(iamarray(av[1]));
2171 for (i = 1; i <= n; i++)
2172 setari_u(av[1], i, nextword(&s));
2173 free(s1);
2174 setvar_i(res, n);
2175 break;
2176 }
2177
2178 case B_ss: {
2179 char *s;
2180
2181 l = strlen(as[0]);
2182 i = getvar_i(av[1]) - 1;
2183 if (i > l)
2184 i = l;
2185 if (i < 0)
2186 i = 0;
2187 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
2188 if (n < 0)
2189 n = 0;
2190 s = xstrndup(as[0]+i, n);
2191 setvar_p(res, s);
2192 break;
2193 }
2194
2195 /* Bitwise ops must assume that operands are unsigned. GNU Awk 3.1.5:
2196 * awk '{ print or(-1,1) }' gives "4.29497e+09", not "-2.xxxe+09" */
2197 case B_an:
2198 setvar_i(res, getvar_i_int(av[0]) & getvar_i_int(av[1]));
2199 break;
2200
2201 case B_co:
2202 setvar_i(res, ~getvar_i_int(av[0]));
2203 break;
2204
2205 case B_ls:
2206 setvar_i(res, getvar_i_int(av[0]) << getvar_i_int(av[1]));
2207 break;
2208
2209 case B_or:
2210 setvar_i(res, getvar_i_int(av[0]) | getvar_i_int(av[1]));
2211 break;
2212
2213 case B_rs:
2214 setvar_i(res, getvar_i_int(av[0]) >> getvar_i_int(av[1]));
2215 break;
2216
2217 case B_xo:
2218 setvar_i(res, getvar_i_int(av[0]) ^ getvar_i_int(av[1]));
2219 break;
2220
2221 case B_lo:
2222 case B_up: {
2223 char *s, *s1;
2224 s1 = s = xstrdup(as[0]);
2225 while (*s1) {
2226 //*s1 = (info == B_up) ? toupper(*s1) : tolower(*s1);
2227 if ((unsigned char)((*s1 | 0x20) - 'a') <= ('z' - 'a'))
2228 *s1 = (info == B_up) ? (*s1 & 0xdf) : (*s1 | 0x20);
2229 s1++;
2230 }
2231 setvar_p(res, s);
2232 break;
2233 }
2234
2235 case B_ix:
2236 n = 0;
2237 ll = strlen(as[1]);
2238 l = strlen(as[0]) - ll;
2239 if (ll > 0 && l >= 0) {
2240 if (!icase) {
2241 char *s = strstr(as[0], as[1]);
2242 if (s)
2243 n = (s - as[0]) + 1;
2244 } else {
2245 /* this piece of code is terribly slow and
2246 * really should be rewritten
2247 */
2248 for (i = 0; i <= l; i++) {
2249 if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2250 n = i+1;
2251 break;
2252 }
2253 }
2254 }
2255 }
2256 setvar_i(res, n);
2257 break;
2258
2259 case B_ti:
2260 if (nargs > 1)
2261 tt = getvar_i(av[1]);
2262 else
2263 time(&tt);
2264 //s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2265 i = strftime(g_buf, MAXVARFMT,
2266 ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2267 localtime(&tt));
2268 g_buf[i] = '\0';
2269 setvar_s(res, g_buf);
2270 break;
2271
2272 case B_mt:
2273 setvar_i(res, do_mktime(as[0]));
2274 break;
2275
2276 case B_ma:
2277 re = as_regex(an[1], &sreg);
2278 n = regexec(re, as[0], 1, pmatch, 0);
2279 if (n == 0) {
2280 pmatch[0].rm_so++;
2281 pmatch[0].rm_eo++;
2282 } else {
2283 pmatch[0].rm_so = 0;
2284 pmatch[0].rm_eo = -1;
2285 }
2286 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2287 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2288 setvar_i(res, pmatch[0].rm_so);
2289 if (re == &sreg)
2290 regfree(re);
2291 break;
2292
2293 case B_ge:
2294 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2295 break;
2296
2297 case B_gs:
2298 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2299 break;
2300
2301 case B_su:
2302 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2303 break;
2304 }
2305
2306 nvfree(tv);
2307 return res;
2308 #undef tspl
2309 }
2310
2311 /*
2312 * Evaluate node - the heart of the program. Supplied with subtree
2313 * and place where to store result. returns ptr to result.
2314 */
2315 #define XC(n) ((n) >> 8)
2316
2317 static var *evaluate(node *op, var *res)
2318 {
2319 /* This procedure is recursive so we should count every byte */
2320 #define fnargs (G.evaluate__fnargs)
2321 /* seed is initialized to 1 */
2322 #define seed (G.evaluate__seed)
2323 #define sreg (G.evaluate__sreg)
2324
2325 var *v1;
2326
2327 if (!op)
2328 return setvar_s(res, NULL);
2329
2330 v1 = nvalloc(2);
2331
2332 while (op) {
2333 struct {
2334 var *v;
2335 const char *s;
2336 } L = L; /* for compiler */
2337 struct {
2338 var *v;
2339 const char *s;
2340 } R = R;
2341 double L_d = L_d;
2342 uint32_t opinfo;
2343 int opn;
2344 node *op1;
2345
2346 opinfo = op->info;
2347 opn = (opinfo & OPNMASK);
2348 g_lineno = op->lineno;
2349 op1 = op->l.n;
2350
2351 /* execute inevitable things */
2352 if (opinfo & OF_RES1)
2353 L.v = evaluate(op1, v1);
2354 if (opinfo & OF_RES2)
2355 R.v = evaluate(op->r.n, v1+1);
2356 if (opinfo & OF_STR1)
2357 L.s = getvar_s(L.v);
2358 if (opinfo & OF_STR2)
2359 R.s = getvar_s(R.v);
2360 if (opinfo & OF_NUM1)
2361 L_d = getvar_i(L.v);
2362
2363 switch (XC(opinfo & OPCLSMASK)) {
2364
2365 /* -- iterative node type -- */
2366
2367 /* test pattern */
2368 case XC( OC_TEST ):
2369 if ((op1->info & OPCLSMASK) == OC_COMMA) {
2370 /* it's range pattern */
2371 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2372 op->info |= OF_CHECKED;
2373 if (ptest(op1->r.n))
2374 op->info &= ~OF_CHECKED;
2375 op = op->a.n;
2376 } else {
2377 op = op->r.n;
2378 }
2379 } else {
2380 op = ptest(op1) ? op->a.n : op->r.n;
2381 }
2382 break;
2383
2384 /* just evaluate an expression, also used as unconditional jump */
2385 case XC( OC_EXEC ):
2386 break;
2387
2388 /* branch, used in if-else and various loops */
2389 case XC( OC_BR ):
2390 op = istrue(L.v) ? op->a.n : op->r.n;
2391 break;
2392
2393 /* initialize for-in loop */
2394 case XC( OC_WALKINIT ):
2395 hashwalk_init(L.v, iamarray(R.v));
2396 break;
2397
2398 /* get next array item */
2399 case XC( OC_WALKNEXT ):
2400 op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2401 break;
2402
2403 case XC( OC_PRINT ):
2404 case XC( OC_PRINTF ): {
2405 FILE *F = stdout;
2406
2407 if (op->r.n) {
2408 rstream *rsm = newfile(R.s);
2409 if (!rsm->F) {
2410 if (opn == '|') {
2411 rsm->F = popen(R.s, "w");
2412 if (rsm->F == NULL)
2413 bb_perror_msg_and_die("popen");
2414 rsm->is_pipe = 1;
2415 } else {
2416 rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2417 }
2418 }
2419 F = rsm->F;
2420 }
2421
2422 if ((opinfo & OPCLSMASK) == OC_PRINT) {
2423 if (!op1) {
2424 fputs(getvar_s(intvar[F0]), F);
2425 } else {
2426 while (op1) {
2427 var *v = evaluate(nextarg(&op1), v1);
2428 if (v->type & VF_NUMBER) {
2429 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]),
2430 getvar_i(v), TRUE);
2431 fputs(g_buf, F);
2432 } else {
2433 fputs(getvar_s(v), F);
2434 }
2435
2436 if (op1)
2437 fputs(getvar_s(intvar[OFS]), F);
2438 }
2439 }
2440 fputs(getvar_s(intvar[ORS]), F);
2441
2442 } else { /* OC_PRINTF */
2443 char *s = awk_printf(op1);
2444 fputs(s, F);
2445 free(s);
2446 }
2447 fflush(F);
2448 break;
2449 }
2450
2451 case XC( OC_DELETE ): {
2452 uint32_t info = op1->info & OPCLSMASK;
2453 var *v;
2454
2455 if (info == OC_VAR) {
2456 v = op1->l.v;
2457 } else if (info == OC_FNARG) {
2458 v = &fnargs[op1->l.aidx];
2459 } else {
2460 syntax_error(EMSG_NOT_ARRAY);
2461 }
2462
2463 if (op1->r.n) {
2464 const char *s;
2465 clrvar(L.v);
2466 s = getvar_s(evaluate(op1->r.n, v1));
2467 hash_remove(iamarray(v), s);
2468 } else {
2469 clear_array(iamarray(v));
2470 }
2471 break;
2472 }
2473
2474 case XC( OC_NEWSOURCE ):
2475 g_progname = op->l.new_progname;
2476 break;
2477
2478 case XC( OC_RETURN ):
2479 copyvar(res, L.v);
2480 break;
2481
2482 case XC( OC_NEXTFILE ):
2483 nextfile = TRUE;
2484 case XC( OC_NEXT ):
2485 nextrec = TRUE;
2486 case XC( OC_DONE ):
2487 clrvar(res);
2488 break;
2489
2490 case XC( OC_EXIT ):
2491 awk_exit(L_d);
2492
2493 /* -- recursive node type -- */
2494
2495 case XC( OC_VAR ):
2496 L.v = op->l.v;
2497 if (L.v == intvar[NF])
2498 split_f0();
2499 goto v_cont;
2500
2501 case XC( OC_FNARG ):
2502 L.v = &fnargs[op->l.aidx];
2503 v_cont:
2504 res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
2505 break;
2506
2507 case XC( OC_IN ):
2508 setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2509 break;
2510
2511 case XC( OC_REGEXP ):
2512 op1 = op;
2513 L.s = getvar_s(intvar[F0]);
2514 goto re_cont;
2515
2516 case XC( OC_MATCH ):
2517 op1 = op->r.n;
2518 re_cont:
2519 {
2520 regex_t *re = as_regex(op1, &sreg);
2521 int i = regexec(re, L.s, 0, NULL, 0);
2522 if (re == &sreg)
2523 regfree(re);
2524 setvar_i(res, (i == 0) ^ (opn == '!'));
2525 }
2526 break;
2527
2528 case XC( OC_MOVE ):
2529 /* if source is a temporary string, jusk relink it to dest */
2530 //Disabled: if R.v is numeric but happens to have cached R.v->string,
2531 //then L.v ends up being a string, which is wrong
2532 // if (R.v == v1+1 && R.v->string) {
2533 // res = setvar_p(L.v, R.v->string);
2534 // R.v->string = NULL;
2535 // } else {
2536 res = copyvar(L.v, R.v);
2537 // }
2538 break;
2539
2540 case XC( OC_TERNARY ):
2541 if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2542 syntax_error(EMSG_POSSIBLE_ERROR);
2543 res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2544 break;
2545
2546 case XC( OC_FUNC ): {
2547 var *vbeg, *v;
2548 const char *sv_progname;
2549
2550 if (!op->r.f->body.first)
2551 syntax_error(EMSG_UNDEF_FUNC);
2552
2553 vbeg = v = nvalloc(op->r.f->nargs + 1);
2554 while (op1) {
2555 var *arg = evaluate(nextarg(&op1), v1);
2556 copyvar(v, arg);
2557 v->type |= VF_CHILD;
2558 v->x.parent = arg;
2559 if (++v - vbeg >= op->r.f->nargs)
2560 break;
2561 }
2562
2563 v = fnargs;
2564 fnargs = vbeg;
2565 sv_progname = g_progname;
2566
2567 res = evaluate(op->r.f->body.first, res);
2568
2569 g_progname = sv_progname;
2570 nvfree(fnargs);
2571 fnargs = v;
2572
2573 break;
2574 }
2575
2576 case XC( OC_GETLINE ):
2577 case XC( OC_PGETLINE ): {
2578 rstream *rsm;
2579 int i;
2580
2581 if (op1) {
2582 rsm = newfile(L.s);
2583 if (!rsm->F) {
2584 if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2585 rsm->F = popen(L.s, "r");
2586 rsm->is_pipe = TRUE;
2587 } else {
2588 rsm->F = fopen_for_read(L.s); /* not xfopen! */
2589 }
2590 }
2591 } else {
2592 if (!iF)
2593 iF = next_input_file();
2594 rsm = iF;
2595 }
2596
2597 if (!rsm->F) {
2598 setvar_i(intvar[ERRNO], errno);
2599 setvar_i(res, -1);
2600 break;
2601 }
2602
2603 if (!op->r.n)
2604 R.v = intvar[F0];
2605
2606 i = awk_getline(rsm, R.v);
2607 if (i > 0 && !op1) {
2608 incvar(intvar[FNR]);
2609 incvar(intvar[NR]);
2610 }
2611 setvar_i(res, i);
2612 break;
2613 }
2614
2615 /* simple builtins */
2616 case XC( OC_FBLTIN ): {
2617 int i;
2618 rstream *rsm;
2619 double R_d = R_d; /* for compiler */
2620
2621 switch (opn) {
2622 case F_in:
2623 R_d = (int)L_d;
2624 break;
2625
2626 case F_rn:
2627 R_d = (double)rand() / (double)RAND_MAX;
2628 break;
2629 #if ENABLE_FEATURE_AWK_LIBM
2630 case F_co:
2631 R_d = cos(L_d);
2632 break;
2633
2634 case F_ex:
2635 R_d = exp(L_d);
2636 break;
2637
2638 case F_lg:
2639 R_d = log(L_d);
2640 break;
2641
2642 case F_si:
2643 R_d = sin(L_d);
2644 break;
2645
2646 case F_sq:
2647 R_d = sqrt(L_d);
2648 break;
2649 #else
2650 case F_co:
2651 case F_ex:
2652 case F_lg:
2653 case F_si:
2654 case F_sq:
2655 syntax_error(EMSG_NO_MATH);
2656 break;
2657 #endif
2658 case F_sr:
2659 R_d = (double)seed;
2660 seed = op1 ? (unsigned)L_d : (unsigned)time(NULL);
2661 srand(seed);
2662 break;
2663
2664 case F_ti:
2665 R_d = time(NULL);
2666 break;
2667
2668 case F_le:
2669 if (!op1)
2670 L.s = getvar_s(intvar[F0]);
2671 R_d = strlen(L.s);
2672 break;
2673
2674 case F_sy:
2675 fflush_all();
2676 R_d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2677 ? (system(L.s) >> 8) : 0;
2678 break;
2679
2680 case F_ff:
2681 if (!op1) {
2682 fflush(stdout);
2683 } else if (L.s && *L.s) {
2684 rsm = newfile(L.s);
2685 fflush(rsm->F);
2686 } else {
2687 fflush_all();
2688 }
2689 break;
2690
2691 case F_cl:
2692 i = 0;
2693 rsm = (rstream *)hash_search(fdhash, L.s);
2694 if (rsm) {
2695 i = rsm->is_pipe ? pclose(rsm->F) : fclose(rsm->F);
2696 free(rsm->buffer);
2697 hash_remove(fdhash, L.s);
2698 }
2699 if (i != 0)
2700 setvar_i(intvar[ERRNO], errno);
2701 R_d = (double)i;
2702 break;
2703 }
2704 setvar_i(res, R_d);
2705 break;
2706 }
2707
2708 case XC( OC_BUILTIN ):
2709 res = exec_builtin(op, res);
2710 break;
2711
2712 case XC( OC_SPRINTF ):
2713 setvar_p(res, awk_printf(op1));
2714 break;
2715
2716 case XC( OC_UNARY ): {
2717 double Ld, R_d;
2718
2719 Ld = R_d = getvar_i(R.v);
2720 switch (opn) {
2721 case 'P':
2722 Ld = ++R_d;
2723 goto r_op_change;
2724 case 'p':
2725 R_d++;
2726 goto r_op_change;
2727 case 'M':
2728 Ld = --R_d;
2729 goto r_op_change;
2730 case 'm':
2731 R_d--;
2732 r_op_change:
2733 setvar_i(R.v, R_d);
2734 break;
2735 case '!':
2736 Ld = !istrue(R.v);
2737 break;
2738 case '-':
2739 Ld = -R_d;
2740 break;
2741 }
2742 setvar_i(res, Ld);
2743 break;
2744 }
2745
2746 case XC( OC_FIELD ): {
2747 int i = (int)getvar_i(R.v);
2748 if (i == 0) {
2749 res = intvar[F0];
2750 } else {
2751 split_f0();
2752 if (i > nfields)
2753 fsrealloc(i);
2754 res = &Fields[i - 1];
2755 }
2756 break;
2757 }
2758
2759 /* concatenation (" ") and index joining (",") */
2760 case XC( OC_CONCAT ):
2761 case XC( OC_COMMA ): {
2762 const char *sep = "";
2763 if ((opinfo & OPCLSMASK) == OC_COMMA)
2764 sep = getvar_s(intvar[SUBSEP]);
2765 setvar_p(res, xasprintf("%s%s%s", L.s, sep, R.s));
2766 break;
2767 }
2768
2769 case XC( OC_LAND ):
2770 setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2771 break;
2772
2773 case XC( OC_LOR ):
2774 setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2775 break;
2776
2777 case XC( OC_BINARY ):
2778 case XC( OC_REPLACE ): {
2779 double R_d = getvar_i(R.v);
2780 switch (opn) {
2781 case '+':
2782 L_d += R_d;
2783 break;
2784 case '-':
2785 L_d -= R_d;
2786 break;
2787 case '*':
2788 L_d *= R_d;
2789 break;
2790 case '/':
2791 if (R_d == 0)
2792 syntax_error(EMSG_DIV_BY_ZERO);
2793 L_d /= R_d;
2794 break;
2795 case '&':
2796 #if ENABLE_FEATURE_AWK_LIBM
2797 L_d = pow(L_d, R_d);
2798 #else
2799 syntax_error(EMSG_NO_MATH);
2800 #endif
2801 break;
2802 case '%':
2803 if (R_d == 0)
2804 syntax_error(EMSG_DIV_BY_ZERO);
2805 L_d -= (int)(L_d / R_d) * R_d;
2806 break;
2807 }
2808 res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : L.v, L_d);
2809 break;
2810 }
2811
2812 case XC( OC_COMPARE ): {
2813 int i = i; /* for compiler */
2814 double Ld;
2815
2816 if (is_numeric(L.v) && is_numeric(R.v)) {
2817 Ld = getvar_i(L.v) - getvar_i(R.v);
2818 } else {
2819 const char *l = getvar_s(L.v);
2820 const char *r = getvar_s(R.v);
2821 Ld = icase ? strcasecmp(l, r) : strcmp(l, r);
2822 }
2823 switch (opn & 0xfe) {
2824 case 0:
2825 i = (Ld > 0);
2826 break;
2827 case 2:
2828 i = (Ld >= 0);
2829 break;
2830 case 4:
2831 i = (Ld == 0);
2832 break;
2833 }
2834 setvar_i(res, (i == 0) ^ (opn & 1));
2835 break;
2836 }
2837
2838 default:
2839 syntax_error(EMSG_POSSIBLE_ERROR);
2840 }
2841 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2842 op = op->a.n;
2843 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2844 break;
2845 if (nextrec)
2846 break;
2847 } /* while (op) */
2848
2849 nvfree(v1);
2850 return res;
2851 #undef fnargs
2852 #undef seed
2853 #undef sreg
2854 }
2855
2856
2857 /* -------- main & co. -------- */
2858
2859 static int awk_exit(int r)
2860 {
2861 var tv;
2862 unsigned i;
2863 hash_item *hi;
2864
2865 zero_out_var(&tv);
2866
2867 if (!exiting) {
2868 exiting = TRUE;
2869 nextrec = FALSE;
2870 evaluate(endseq.first, &tv);
2871 }
2872
2873 /* waiting for children */
2874 for (i = 0; i < fdhash->csize; i++) {
2875 hi = fdhash->items[i];
2876 while (hi) {
2877 if (hi->data.rs.F && hi->data.rs.is_pipe)
2878 pclose(hi->data.rs.F);
2879 hi = hi->next;
2880 }
2881 }
2882
2883 exit(r);
2884 }
2885
2886 /* if expr looks like "var=value", perform assignment and return 1,
2887 * otherwise return 0 */
2888 static int is_assignment(const char *expr)
2889 {
2890 char *exprc, *s, *s0, *s1;
2891
2892 exprc = xstrdup(expr);
2893 if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
2894 free(exprc);
2895 return FALSE;
2896 }
2897
2898 *s++ = '\0';
2899 s0 = s1 = s;
2900 while (*s)
2901 *s1++ = nextchar(&s);
2902
2903 *s1 = '\0';
2904 setvar_u(newvar(exprc), s0);
2905 free(exprc);
2906 return TRUE;
2907 }
2908
2909 /* switch to next input file */
2910 static rstream *next_input_file(void)
2911 {
2912 #define rsm (G.next_input_file__rsm)
2913 #define files_happen (G.next_input_file__files_happen)
2914
2915 FILE *F = NULL;
2916 const char *fname, *ind;
2917
2918 if (rsm.F)
2919 fclose(rsm.F);
2920 rsm.F = NULL;
2921 rsm.pos = rsm.adv = 0;
2922
2923 do {
2924 if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
2925 if (files_happen)
2926 return NULL;
2927 fname = "-";
2928 F = stdin;
2929 } else {
2930 ind = getvar_s(incvar(intvar[ARGIND]));
2931 fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
2932 if (fname && *fname && !is_assignment(fname))
2933 F = xfopen_stdin(fname);
2934 }
2935 } while (!F);
2936
2937 files_happen = TRUE;
2938 setvar_s(intvar[FILENAME], fname);
2939 rsm.F = F;
2940 return &rsm;
2941 #undef rsm
2942 #undef files_happen
2943 }
2944
2945 int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
2946 int awk_main(int argc, char **argv)
2947 {
2948 unsigned opt;
2949 char *opt_F, *opt_W;
2950 llist_t *list_v = NULL;
2951 llist_t *list_f = NULL;
2952 int i, j;
2953 var *v;
2954 var tv;
2955 char **envp;
2956 char *vnames = (char *)vNames; /* cheat */
2957 char *vvalues = (char *)vValues;
2958
2959 INIT_G();
2960
2961 /* Undo busybox.c, or else strtod may eat ','! This breaks parsing:
2962 * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */
2963 if (ENABLE_LOCALE_SUPPORT)
2964 setlocale(LC_NUMERIC, "C");
2965
2966 zero_out_var(&tv);
2967
2968 /* allocate global buffer */
2969 g_buf = xmalloc(MAXVARFMT + 1);
2970
2971 vhash = hash_init();
2972 ahash = hash_init();
2973 fdhash = hash_init();
2974 fnhash = hash_init();
2975
2976 /* initialize variables */
2977 for (i = 0; *vnames; i++) {
2978 intvar[i] = v = newvar(nextword(&vnames));
2979 if (*vvalues != '\377')
2980 setvar_s(v, nextword(&vvalues));
2981 else
2982 setvar_i(v, 0);
2983
2984 if (*vnames == '*') {
2985 v->type |= VF_SPECIAL;
2986 vnames++;
2987 }
2988 }
2989
2990 handle_special(intvar[FS]);
2991 handle_special(intvar[RS]);
2992
2993 newfile("/dev/stdin")->F = stdin;
2994 newfile("/dev/stdout")->F = stdout;
2995 newfile("/dev/stderr")->F = stderr;
2996
2997 /* Huh, people report that sometimes environ is NULL. Oh well. */
2998 if (environ) for (envp = environ; *envp; envp++) {
2999 /* environ is writable, thus we don't strdup it needlessly */
3000 char *s = *envp;
3001 char *s1 = strchr(s, '=');
3002 if (s1) {
3003 *s1 = '\0';
3004 /* Both findvar and setvar_u take const char*
3005 * as 2nd arg -> environment is not trashed */
3006 setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
3007 *s1 = '=';
3008 }
3009 }
3010 opt_complementary = "v::f::"; /* -v and -f can occur multiple times */
3011 opt = getopt32(argv, "F:v:f:W:", &opt_F, &list_v, &list_f, &opt_W);
3012 argv += optind;
3013 argc -= optind;
3014 if (opt & 0x1)
3015 setvar_s(intvar[FS], opt_F); // -F
3016 while (list_v) { /* -v */
3017 if (!is_assignment(llist_pop(&list_v)))
3018 bb_show_usage();
3019 }
3020 if (list_f) { /* -f */
3021 do {
3022 char *s = NULL;
3023 FILE *from_file;
3024
3025 g_progname = llist_pop(&list_f);
3026 from_file = xfopen_stdin(g_progname);
3027 /* one byte is reserved for some trick in next_token */
3028 for (i = j = 1; j > 0; i += j) {
3029 s = xrealloc(s, i + 4096);
3030 j = fread(s + i, 1, 4094, from_file);
3031 }
3032 s[i] = '\0';
3033 fclose(from_file);
3034 parse_program(s + 1);
3035 free(s);
3036 } while (list_f);
3037 argc++;
3038 } else { // no -f: take program from 1st parameter
3039 if (!argc)
3040 bb_show_usage();
3041 g_progname = "cmd. line";
3042 parse_program(*argv++);
3043 }
3044 if (opt & 0x8) // -W
3045 bb_error_msg("warning: unrecognized option '-W %s' ignored", opt_W);
3046
3047 /* fill in ARGV array */
3048 setvar_i(intvar[ARGC], argc);
3049 setari_u(intvar[ARGV], 0, "awk");
3050 i = 0;
3051 while (*argv)
3052 setari_u(intvar[ARGV], ++i, *argv++);
3053
3054 evaluate(beginseq.first, &tv);
3055 if (!mainseq.first && !endseq.first)
3056 awk_exit(EXIT_SUCCESS);
3057
3058 /* input file could already be opened in BEGIN block */
3059 if (!iF)
3060 iF = next_input_file();
3061
3062 /* passing through input files */
3063 while (iF) {
3064 nextfile = FALSE;
3065 setvar_i(intvar[FNR], 0);
3066
3067 while ((i = awk_getline(iF, intvar[F0])) > 0) {
3068 nextrec = FALSE;
3069 incvar(intvar[NR]);
3070 incvar(intvar[FNR]);
3071 evaluate(mainseq.first, &tv);
3072
3073 if (nextfile)
3074 break;
3075 }
3076
3077 if (i < 0)
3078 syntax_error(strerror(errno));
3079
3080 iF = next_input_file();
3081 }
3082
3083 awk_exit(EXIT_SUCCESS);
3084 /*return 0;*/
3085 }