Magellan Linux

Contents of /trunk/mkinitrd-magellan/busybox/editors/awk.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 984 - (show annotations) (download)
Sun May 30 11:32:42 2010 UTC (13 years, 11 months ago) by niro
File MIME type: text/plain
File size: 66134 byte(s)
-updated to busybox-1.16.1 and enabled blkid/uuid support in default config
1 /* vi: set sw=4 ts=4: */
2 /*
3 * awk implementation for busybox
4 *
5 * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
6 *
7 * Licensed under the GPL v2 or later, see the file LICENSE in this tarball.
8 */
9
10 #include "libbb.h"
11 #include "xregex.h"
12 #include <math.h>
13
14 /* This is a NOEXEC applet. Be very careful! */
15
16
17 #define MAXVARFMT 240
18 #define MINNVBLOCK 64
19
20 /* variable flags */
21 #define VF_NUMBER 0x0001 /* 1 = primary type is number */
22 #define VF_ARRAY 0x0002 /* 1 = it's an array */
23
24 #define VF_CACHED 0x0100 /* 1 = num/str value has cached str/num eq */
25 #define VF_USER 0x0200 /* 1 = user input (may be numeric string) */
26 #define VF_SPECIAL 0x0400 /* 1 = requires extra handling when changed */
27 #define VF_WALK 0x0800 /* 1 = variable has alloc'd x.walker list */
28 #define VF_FSTR 0x1000 /* 1 = var::string points to fstring buffer */
29 #define VF_CHILD 0x2000 /* 1 = function arg; x.parent points to source */
30 #define VF_DIRTY 0x4000 /* 1 = variable was set explicitly */
31
32 /* these flags are static, don't change them when value is changed */
33 #define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
34
35 /* Variable */
36 typedef struct var_s {
37 unsigned type; /* flags */
38 double number;
39 char *string;
40 union {
41 int aidx; /* func arg idx (for compilation stage) */
42 struct xhash_s *array; /* array ptr */
43 struct var_s *parent; /* for func args, ptr to actual parameter */
44 char **walker; /* list of array elements (for..in) */
45 } x;
46 } var;
47
48 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
49 typedef struct chain_s {
50 struct node_s *first;
51 struct node_s *last;
52 const char *programname;
53 } chain;
54
55 /* Function */
56 typedef struct func_s {
57 unsigned nargs;
58 struct chain_s body;
59 } func;
60
61 /* I/O stream */
62 typedef struct rstream_s {
63 FILE *F;
64 char *buffer;
65 int adv;
66 int size;
67 int pos;
68 smallint is_pipe;
69 } rstream;
70
71 typedef struct hash_item_s {
72 union {
73 struct var_s v; /* variable/array hash */
74 struct rstream_s rs; /* redirect streams hash */
75 struct func_s f; /* functions hash */
76 } data;
77 struct hash_item_s *next; /* next in chain */
78 char name[1]; /* really it's longer */
79 } hash_item;
80
81 typedef struct xhash_s {
82 unsigned nel; /* num of elements */
83 unsigned csize; /* current hash size */
84 unsigned nprime; /* next hash size in PRIMES[] */
85 unsigned glen; /* summary length of item names */
86 struct hash_item_s **items;
87 } xhash;
88
89 /* Tree node */
90 typedef struct node_s {
91 uint32_t info;
92 unsigned lineno;
93 union {
94 struct node_s *n;
95 var *v;
96 int i;
97 char *s;
98 regex_t *re;
99 } l;
100 union {
101 struct node_s *n;
102 regex_t *ire;
103 func *f;
104 int argno;
105 } r;
106 union {
107 struct node_s *n;
108 } a;
109 } node;
110
111 /* Block of temporary variables */
112 typedef struct nvblock_s {
113 int size;
114 var *pos;
115 struct nvblock_s *prev;
116 struct nvblock_s *next;
117 var nv[];
118 } nvblock;
119
120 typedef struct tsplitter_s {
121 node n;
122 regex_t re[2];
123 } tsplitter;
124
125 /* simple token classes */
126 /* Order and hex values are very important!!! See next_token() */
127 #define TC_SEQSTART 1 /* ( */
128 #define TC_SEQTERM (1 << 1) /* ) */
129 #define TC_REGEXP (1 << 2) /* /.../ */
130 #define TC_OUTRDR (1 << 3) /* | > >> */
131 #define TC_UOPPOST (1 << 4) /* unary postfix operator */
132 #define TC_UOPPRE1 (1 << 5) /* unary prefix operator */
133 #define TC_BINOPX (1 << 6) /* two-opnd operator */
134 #define TC_IN (1 << 7)
135 #define TC_COMMA (1 << 8)
136 #define TC_PIPE (1 << 9) /* input redirection pipe */
137 #define TC_UOPPRE2 (1 << 10) /* unary prefix operator */
138 #define TC_ARRTERM (1 << 11) /* ] */
139 #define TC_GRPSTART (1 << 12) /* { */
140 #define TC_GRPTERM (1 << 13) /* } */
141 #define TC_SEMICOL (1 << 14)
142 #define TC_NEWLINE (1 << 15)
143 #define TC_STATX (1 << 16) /* ctl statement (for, next...) */
144 #define TC_WHILE (1 << 17)
145 #define TC_ELSE (1 << 18)
146 #define TC_BUILTIN (1 << 19)
147 #define TC_GETLINE (1 << 20)
148 #define TC_FUNCDECL (1 << 21) /* `function' `func' */
149 #define TC_BEGIN (1 << 22)
150 #define TC_END (1 << 23)
151 #define TC_EOF (1 << 24)
152 #define TC_VARIABLE (1 << 25)
153 #define TC_ARRAY (1 << 26)
154 #define TC_FUNCTION (1 << 27)
155 #define TC_STRING (1 << 28)
156 #define TC_NUMBER (1 << 29)
157
158 #define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
159
160 /* combined token classes */
161 #define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
162 #define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
163 #define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
164 | TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
165
166 #define TC_STATEMNT (TC_STATX | TC_WHILE)
167 #define TC_OPTERM (TC_SEMICOL | TC_NEWLINE)
168
169 /* word tokens, cannot mean something else if not expected */
170 #define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN \
171 | TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
172
173 /* discard newlines after these */
174 #define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
175 | TC_BINOP | TC_OPTERM)
176
177 /* what can expression begin with */
178 #define TC_OPSEQ (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
179 /* what can group begin with */
180 #define TC_GRPSEQ (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
181
182 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
183 /* operator is inserted between them */
184 #define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
185 | TC_STRING | TC_NUMBER | TC_UOPPOST)
186 #define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
187
188 #define OF_RES1 0x010000
189 #define OF_RES2 0x020000
190 #define OF_STR1 0x040000
191 #define OF_STR2 0x080000
192 #define OF_NUM1 0x100000
193 #define OF_CHECKED 0x200000
194
195 /* combined operator flags */
196 #define xx 0
197 #define xV OF_RES2
198 #define xS (OF_RES2 | OF_STR2)
199 #define Vx OF_RES1
200 #define VV (OF_RES1 | OF_RES2)
201 #define Nx (OF_RES1 | OF_NUM1)
202 #define NV (OF_RES1 | OF_NUM1 | OF_RES2)
203 #define Sx (OF_RES1 | OF_STR1)
204 #define SV (OF_RES1 | OF_STR1 | OF_RES2)
205 #define SS (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
206
207 #define OPCLSMASK 0xFF00
208 #define OPNMASK 0x007F
209
210 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
211 * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
212 * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
213 */
214 #define P(x) (x << 24)
215 #define PRIMASK 0x7F000000
216 #define PRIMASK2 0x7E000000
217
218 /* Operation classes */
219
220 #define SHIFT_TIL_THIS 0x0600
221 #define RECUR_FROM_THIS 0x1000
222
223 enum {
224 OC_DELETE = 0x0100, OC_EXEC = 0x0200, OC_NEWSOURCE = 0x0300,
225 OC_PRINT = 0x0400, OC_PRINTF = 0x0500, OC_WALKINIT = 0x0600,
226
227 OC_BR = 0x0700, OC_BREAK = 0x0800, OC_CONTINUE = 0x0900,
228 OC_EXIT = 0x0a00, OC_NEXT = 0x0b00, OC_NEXTFILE = 0x0c00,
229 OC_TEST = 0x0d00, OC_WALKNEXT = 0x0e00,
230
231 OC_BINARY = 0x1000, OC_BUILTIN = 0x1100, OC_COLON = 0x1200,
232 OC_COMMA = 0x1300, OC_COMPARE = 0x1400, OC_CONCAT = 0x1500,
233 OC_FBLTIN = 0x1600, OC_FIELD = 0x1700, OC_FNARG = 0x1800,
234 OC_FUNC = 0x1900, OC_GETLINE = 0x1a00, OC_IN = 0x1b00,
235 OC_LAND = 0x1c00, OC_LOR = 0x1d00, OC_MATCH = 0x1e00,
236 OC_MOVE = 0x1f00, OC_PGETLINE = 0x2000, OC_REGEXP = 0x2100,
237 OC_REPLACE = 0x2200, OC_RETURN = 0x2300, OC_SPRINTF = 0x2400,
238 OC_TERNARY = 0x2500, OC_UNARY = 0x2600, OC_VAR = 0x2700,
239 OC_DONE = 0x2800,
240
241 ST_IF = 0x3000, ST_DO = 0x3100, ST_FOR = 0x3200,
242 ST_WHILE = 0x3300
243 };
244
245 /* simple builtins */
246 enum {
247 F_in, F_rn, F_co, F_ex, F_lg, F_si, F_sq, F_sr,
248 F_ti, F_le, F_sy, F_ff, F_cl
249 };
250
251 /* builtins */
252 enum {
253 B_a2, B_ix, B_ma, B_sp, B_ss, B_ti, B_mt, B_lo, B_up,
254 B_ge, B_gs, B_su,
255 B_an, B_co, B_ls, B_or, B_rs, B_xo,
256 };
257
258 /* tokens and their corresponding info values */
259
260 #define NTC "\377" /* switch to next token class (tc<<1) */
261 #define NTCC '\377'
262
263 #define OC_B OC_BUILTIN
264
265 static const char tokenlist[] ALIGN1 =
266 "\1(" NTC
267 "\1)" NTC
268 "\1/" NTC /* REGEXP */
269 "\2>>" "\1>" "\1|" NTC /* OUTRDR */
270 "\2++" "\2--" NTC /* UOPPOST */
271 "\2++" "\2--" "\1$" NTC /* UOPPRE1 */
272 "\2==" "\1=" "\2+=" "\2-=" /* BINOPX */
273 "\2*=" "\2/=" "\2%=" "\2^="
274 "\1+" "\1-" "\3**=" "\2**"
275 "\1/" "\1%" "\1^" "\1*"
276 "\2!=" "\2>=" "\2<=" "\1>"
277 "\1<" "\2!~" "\1~" "\2&&"
278 "\2||" "\1?" "\1:" NTC
279 "\2in" NTC
280 "\1," NTC
281 "\1|" NTC
282 "\1+" "\1-" "\1!" NTC /* UOPPRE2 */
283 "\1]" NTC
284 "\1{" NTC
285 "\1}" NTC
286 "\1;" NTC
287 "\1\n" NTC
288 "\2if" "\2do" "\3for" "\5break" /* STATX */
289 "\10continue" "\6delete" "\5print"
290 "\6printf" "\4next" "\10nextfile"
291 "\6return" "\4exit" NTC
292 "\5while" NTC
293 "\4else" NTC
294
295 "\3and" "\5compl" "\6lshift" "\2or"
296 "\6rshift" "\3xor"
297 "\5close" "\6system" "\6fflush" "\5atan2" /* BUILTIN */
298 "\3cos" "\3exp" "\3int" "\3log"
299 "\4rand" "\3sin" "\4sqrt" "\5srand"
300 "\6gensub" "\4gsub" "\5index" "\6length"
301 "\5match" "\5split" "\7sprintf" "\3sub"
302 "\6substr" "\7systime" "\10strftime" "\6mktime"
303 "\7tolower" "\7toupper" NTC
304 "\7getline" NTC
305 "\4func" "\10function" NTC
306 "\5BEGIN" NTC
307 "\3END" "\0"
308 ;
309
310 static const uint32_t tokeninfo[] = {
311 0,
312 0,
313 OC_REGEXP,
314 xS|'a', xS|'w', xS|'|',
315 OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m',
316 OC_UNARY|xV|P(9)|'P', OC_UNARY|xV|P(9)|'M',
317 OC_FIELD|xV|P(5),
318 OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74),
319 OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
320 OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/',
321 OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
322 OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-',
323 OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
324 OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%',
325 OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*',
326 OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3,
327 OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1,
328 OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!',
329 OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
330 OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?',
331 OC_COLON|xx|P(67)|':',
332 OC_IN|SV|P(49),
333 OC_COMMA|SS|P(80),
334 OC_PGETLINE|SV|P(37),
335 OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-',
336 OC_UNARY|xV|P(19)|'!',
337 0,
338 0,
339 0,
340 0,
341 0,
342 ST_IF, ST_DO, ST_FOR, OC_BREAK,
343 OC_CONTINUE, OC_DELETE|Vx, OC_PRINT,
344 OC_PRINTF, OC_NEXT, OC_NEXTFILE,
345 OC_RETURN|Vx, OC_EXIT|Nx,
346 ST_WHILE,
347 0,
348
349 OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
350 OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
351 OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
352 OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
353 OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
354 OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
355 OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6),
356 OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b), OC_B|B_mt|P(0x0b),
357 OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
358 OC_GETLINE|SV|P(0),
359 0, 0,
360 0,
361 0
362 };
363
364 /* internal variable names and their initial values */
365 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
366 enum {
367 CONVFMT, OFMT, FS, OFS,
368 ORS, RS, RT, FILENAME,
369 SUBSEP, F0, ARGIND, ARGC,
370 ARGV, ERRNO, FNR, NR,
371 NF, IGNORECASE, ENVIRON, NUM_INTERNAL_VARS
372 };
373
374 static const char vNames[] ALIGN1 =
375 "CONVFMT\0" "OFMT\0" "FS\0*" "OFS\0"
376 "ORS\0" "RS\0*" "RT\0" "FILENAME\0"
377 "SUBSEP\0" "$\0*" "ARGIND\0" "ARGC\0"
378 "ARGV\0" "ERRNO\0" "FNR\0" "NR\0"
379 "NF\0*" "IGNORECASE\0*" "ENVIRON\0" "\0";
380
381 static const char vValues[] ALIGN1 =
382 "%.6g\0" "%.6g\0" " \0" " \0"
383 "\n\0" "\n\0" "\0" "\0"
384 "\034\0" "\0" "\377";
385
386 /* hash size may grow to these values */
387 #define FIRST_PRIME 61
388 static const uint16_t PRIMES[] ALIGN2 = { 251, 1021, 4093, 16381, 65521 };
389
390
391 /* Globals. Split in two parts so that first one is addressed
392 * with (mostly short) negative offsets.
393 * NB: it's unsafe to put members of type "double"
394 * into globals2 (gcc may fail to align them).
395 */
396 struct globals {
397 double t_double;
398 chain beginseq, mainseq, endseq;
399 chain *seq;
400 node *break_ptr, *continue_ptr;
401 rstream *iF;
402 xhash *vhash, *ahash, *fdhash, *fnhash;
403 const char *g_progname;
404 int g_lineno;
405 int nfields;
406 int maxfields; /* used in fsrealloc() only */
407 var *Fields;
408 nvblock *g_cb;
409 char *g_pos;
410 char *g_buf;
411 smallint icase;
412 smallint exiting;
413 smallint nextrec;
414 smallint nextfile;
415 smallint is_f0_split;
416 };
417 struct globals2 {
418 uint32_t t_info; /* often used */
419 uint32_t t_tclass;
420 char *t_string;
421 int t_lineno;
422 int t_rollback;
423
424 var *intvar[NUM_INTERNAL_VARS]; /* often used */
425
426 /* former statics from various functions */
427 char *split_f0__fstrings;
428
429 uint32_t next_token__save_tclass;
430 uint32_t next_token__save_info;
431 uint32_t next_token__ltclass;
432 smallint next_token__concat_inserted;
433
434 smallint next_input_file__files_happen;
435 rstream next_input_file__rsm;
436
437 var *evaluate__fnargs;
438 unsigned evaluate__seed;
439 regex_t evaluate__sreg;
440
441 var ptest__v;
442
443 tsplitter exec_builtin__tspl;
444
445 /* biggest and least used members go last */
446 tsplitter fsplitter, rsplitter;
447 };
448 #define G1 (ptr_to_globals[-1])
449 #define G (*(struct globals2 *)ptr_to_globals)
450 /* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */
451 /*char G1size[sizeof(G1)]; - 0x74 */
452 /*char Gsize[sizeof(G)]; - 0x1c4 */
453 /* Trying to keep most of members accessible with short offsets: */
454 /*char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */
455 #define t_double (G1.t_double )
456 #define beginseq (G1.beginseq )
457 #define mainseq (G1.mainseq )
458 #define endseq (G1.endseq )
459 #define seq (G1.seq )
460 #define break_ptr (G1.break_ptr )
461 #define continue_ptr (G1.continue_ptr)
462 #define iF (G1.iF )
463 #define vhash (G1.vhash )
464 #define ahash (G1.ahash )
465 #define fdhash (G1.fdhash )
466 #define fnhash (G1.fnhash )
467 #define g_progname (G1.g_progname )
468 #define g_lineno (G1.g_lineno )
469 #define nfields (G1.nfields )
470 #define maxfields (G1.maxfields )
471 #define Fields (G1.Fields )
472 #define g_cb (G1.g_cb )
473 #define g_pos (G1.g_pos )
474 #define g_buf (G1.g_buf )
475 #define icase (G1.icase )
476 #define exiting (G1.exiting )
477 #define nextrec (G1.nextrec )
478 #define nextfile (G1.nextfile )
479 #define is_f0_split (G1.is_f0_split )
480 #define t_info (G.t_info )
481 #define t_tclass (G.t_tclass )
482 #define t_string (G.t_string )
483 #define t_lineno (G.t_lineno )
484 #define t_rollback (G.t_rollback )
485 #define intvar (G.intvar )
486 #define fsplitter (G.fsplitter )
487 #define rsplitter (G.rsplitter )
488 #define INIT_G() do { \
489 SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \
490 G.next_token__ltclass = TC_OPTERM; \
491 G.evaluate__seed = 1; \
492 } while (0)
493
494
495 /* function prototypes */
496 static void handle_special(var *);
497 static node *parse_expr(uint32_t);
498 static void chain_group(void);
499 static var *evaluate(node *, var *);
500 static rstream *next_input_file(void);
501 static int fmt_num(char *, int, const char *, double, int);
502 static int awk_exit(int) NORETURN;
503
504 /* ---- error handling ---- */
505
506 static const char EMSG_INTERNAL_ERROR[] ALIGN1 = "Internal error";
507 static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string";
508 static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token";
509 static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero";
510 static const char EMSG_INV_FMT[] ALIGN1 = "Invalid format specifier";
511 static const char EMSG_TOO_FEW_ARGS[] ALIGN1 = "Too few arguments for builtin";
512 static const char EMSG_NOT_ARRAY[] ALIGN1 = "Not an array";
513 static const char EMSG_POSSIBLE_ERROR[] ALIGN1 = "Possible syntax error";
514 static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function";
515 #if !ENABLE_FEATURE_AWK_LIBM
516 static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in";
517 #endif
518
519 static void zero_out_var(var *vp)
520 {
521 memset(vp, 0, sizeof(*vp));
522 }
523
524 static void syntax_error(const char *message) NORETURN;
525 static void syntax_error(const char *message)
526 {
527 bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
528 }
529
530 /* ---- hash stuff ---- */
531
532 static unsigned hashidx(const char *name)
533 {
534 unsigned idx = 0;
535
536 while (*name)
537 idx = *name++ + (idx << 6) - idx;
538 return idx;
539 }
540
541 /* create new hash */
542 static xhash *hash_init(void)
543 {
544 xhash *newhash;
545
546 newhash = xzalloc(sizeof(*newhash));
547 newhash->csize = FIRST_PRIME;
548 newhash->items = xzalloc(FIRST_PRIME * sizeof(newhash->items[0]));
549
550 return newhash;
551 }
552
553 /* find item in hash, return ptr to data, NULL if not found */
554 static void *hash_search(xhash *hash, const char *name)
555 {
556 hash_item *hi;
557
558 hi = hash->items[hashidx(name) % hash->csize];
559 while (hi) {
560 if (strcmp(hi->name, name) == 0)
561 return &(hi->data);
562 hi = hi->next;
563 }
564 return NULL;
565 }
566
567 /* grow hash if it becomes too big */
568 static void hash_rebuild(xhash *hash)
569 {
570 unsigned newsize, i, idx;
571 hash_item **newitems, *hi, *thi;
572
573 if (hash->nprime == ARRAY_SIZE(PRIMES))
574 return;
575
576 newsize = PRIMES[hash->nprime++];
577 newitems = xzalloc(newsize * sizeof(newitems[0]));
578
579 for (i = 0; i < hash->csize; i++) {
580 hi = hash->items[i];
581 while (hi) {
582 thi = hi;
583 hi = thi->next;
584 idx = hashidx(thi->name) % newsize;
585 thi->next = newitems[idx];
586 newitems[idx] = thi;
587 }
588 }
589
590 free(hash->items);
591 hash->csize = newsize;
592 hash->items = newitems;
593 }
594
595 /* find item in hash, add it if necessary. Return ptr to data */
596 static void *hash_find(xhash *hash, const char *name)
597 {
598 hash_item *hi;
599 unsigned idx;
600 int l;
601
602 hi = hash_search(hash, name);
603 if (!hi) {
604 if (++hash->nel / hash->csize > 10)
605 hash_rebuild(hash);
606
607 l = strlen(name) + 1;
608 hi = xzalloc(sizeof(*hi) + l);
609 strcpy(hi->name, name);
610
611 idx = hashidx(name) % hash->csize;
612 hi->next = hash->items[idx];
613 hash->items[idx] = hi;
614 hash->glen += l;
615 }
616 return &(hi->data);
617 }
618
619 #define findvar(hash, name) ((var*) hash_find((hash), (name)))
620 #define newvar(name) ((var*) hash_find(vhash, (name)))
621 #define newfile(name) ((rstream*)hash_find(fdhash, (name)))
622 #define newfunc(name) ((func*) hash_find(fnhash, (name)))
623
624 static void hash_remove(xhash *hash, const char *name)
625 {
626 hash_item *hi, **phi;
627
628 phi = &(hash->items[hashidx(name) % hash->csize]);
629 while (*phi) {
630 hi = *phi;
631 if (strcmp(hi->name, name) == 0) {
632 hash->glen -= (strlen(name) + 1);
633 hash->nel--;
634 *phi = hi->next;
635 free(hi);
636 break;
637 }
638 phi = &(hi->next);
639 }
640 }
641
642 /* ------ some useful functions ------ */
643
644 static void skip_spaces(char **s)
645 {
646 char *p = *s;
647
648 while (1) {
649 if (*p == '\\' && p[1] == '\n') {
650 p++;
651 t_lineno++;
652 } else if (*p != ' ' && *p != '\t') {
653 break;
654 }
655 p++;
656 }
657 *s = p;
658 }
659
660 static char *nextword(char **s)
661 {
662 char *p = *s;
663 while (*(*s)++)
664 continue;
665 return p;
666 }
667
668 static char nextchar(char **s)
669 {
670 char c, *pps;
671
672 c = *((*s)++);
673 pps = *s;
674 if (c == '\\')
675 c = bb_process_escape_sequence((const char**)s);
676 if (c == '\\' && *s == pps)
677 c = *((*s)++);
678 return c;
679 }
680
681 static ALWAYS_INLINE int isalnum_(int c)
682 {
683 return (isalnum(c) || c == '_');
684 }
685
686 static double my_strtod(char **pp)
687 {
688 #if ENABLE_DESKTOP
689 if ((*pp)[0] == '0'
690 && ((((*pp)[1] | 0x20) == 'x') || isdigit((*pp)[1]))
691 ) {
692 return strtoull(*pp, pp, 0);
693 }
694 #endif
695 return strtod(*pp, pp);
696 }
697
698 /* -------- working with variables (set/get/copy/etc) -------- */
699
700 static xhash *iamarray(var *v)
701 {
702 var *a = v;
703
704 while (a->type & VF_CHILD)
705 a = a->x.parent;
706
707 if (!(a->type & VF_ARRAY)) {
708 a->type |= VF_ARRAY;
709 a->x.array = hash_init();
710 }
711 return a->x.array;
712 }
713
714 static void clear_array(xhash *array)
715 {
716 unsigned i;
717 hash_item *hi, *thi;
718
719 for (i = 0; i < array->csize; i++) {
720 hi = array->items[i];
721 while (hi) {
722 thi = hi;
723 hi = hi->next;
724 free(thi->data.v.string);
725 free(thi);
726 }
727 array->items[i] = NULL;
728 }
729 array->glen = array->nel = 0;
730 }
731
732 /* clear a variable */
733 static var *clrvar(var *v)
734 {
735 if (!(v->type & VF_FSTR))
736 free(v->string);
737
738 v->type &= VF_DONTTOUCH;
739 v->type |= VF_DIRTY;
740 v->string = NULL;
741 return v;
742 }
743
744 /* assign string value to variable */
745 static var *setvar_p(var *v, char *value)
746 {
747 clrvar(v);
748 v->string = value;
749 handle_special(v);
750 return v;
751 }
752
753 /* same as setvar_p but make a copy of string */
754 static var *setvar_s(var *v, const char *value)
755 {
756 return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
757 }
758
759 /* same as setvar_s but sets USER flag */
760 static var *setvar_u(var *v, const char *value)
761 {
762 v = setvar_s(v, value);
763 v->type |= VF_USER;
764 return v;
765 }
766
767 /* set array element to user string */
768 static void setari_u(var *a, int idx, const char *s)
769 {
770 var *v;
771
772 v = findvar(iamarray(a), itoa(idx));
773 setvar_u(v, s);
774 }
775
776 /* assign numeric value to variable */
777 static var *setvar_i(var *v, double value)
778 {
779 clrvar(v);
780 v->type |= VF_NUMBER;
781 v->number = value;
782 handle_special(v);
783 return v;
784 }
785
786 static const char *getvar_s(var *v)
787 {
788 /* if v is numeric and has no cached string, convert it to string */
789 if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
790 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE);
791 v->string = xstrdup(g_buf);
792 v->type |= VF_CACHED;
793 }
794 return (v->string == NULL) ? "" : v->string;
795 }
796
797 static double getvar_i(var *v)
798 {
799 char *s;
800
801 if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
802 v->number = 0;
803 s = v->string;
804 if (s && *s) {
805 v->number = my_strtod(&s);
806 if (v->type & VF_USER) {
807 skip_spaces(&s);
808 if (*s != '\0')
809 v->type &= ~VF_USER;
810 }
811 } else {
812 v->type &= ~VF_USER;
813 }
814 v->type |= VF_CACHED;
815 }
816 return v->number;
817 }
818
819 /* Used for operands of bitwise ops */
820 static unsigned long getvar_i_int(var *v)
821 {
822 double d = getvar_i(v);
823
824 /* Casting doubles to longs is undefined for values outside
825 * of target type range. Try to widen it as much as possible */
826 if (d >= 0)
827 return (unsigned long)d;
828 /* Why? Think about d == -4294967295.0 (assuming 32bit longs) */
829 return - (long) (unsigned long) (-d);
830 }
831
832 static var *copyvar(var *dest, const var *src)
833 {
834 if (dest != src) {
835 clrvar(dest);
836 dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
837 dest->number = src->number;
838 if (src->string)
839 dest->string = xstrdup(src->string);
840 }
841 handle_special(dest);
842 return dest;
843 }
844
845 static var *incvar(var *v)
846 {
847 return setvar_i(v, getvar_i(v) + 1.0);
848 }
849
850 /* return true if v is number or numeric string */
851 static int is_numeric(var *v)
852 {
853 getvar_i(v);
854 return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
855 }
856
857 /* return 1 when value of v corresponds to true, 0 otherwise */
858 static int istrue(var *v)
859 {
860 if (is_numeric(v))
861 return (v->number != 0);
862 return (v->string && v->string[0]);
863 }
864
865 /* temporary variables allocator. Last allocated should be first freed */
866 static var *nvalloc(int n)
867 {
868 nvblock *pb = NULL;
869 var *v, *r;
870 int size;
871
872 while (g_cb) {
873 pb = g_cb;
874 if ((g_cb->pos - g_cb->nv) + n <= g_cb->size)
875 break;
876 g_cb = g_cb->next;
877 }
878
879 if (!g_cb) {
880 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
881 g_cb = xzalloc(sizeof(nvblock) + size * sizeof(var));
882 g_cb->size = size;
883 g_cb->pos = g_cb->nv;
884 g_cb->prev = pb;
885 /*g_cb->next = NULL; - xzalloc did it */
886 if (pb)
887 pb->next = g_cb;
888 }
889
890 v = r = g_cb->pos;
891 g_cb->pos += n;
892
893 while (v < g_cb->pos) {
894 v->type = 0;
895 v->string = NULL;
896 v++;
897 }
898
899 return r;
900 }
901
902 static void nvfree(var *v)
903 {
904 var *p;
905
906 if (v < g_cb->nv || v >= g_cb->pos)
907 syntax_error(EMSG_INTERNAL_ERROR);
908
909 for (p = v; p < g_cb->pos; p++) {
910 if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
911 clear_array(iamarray(p));
912 free(p->x.array->items);
913 free(p->x.array);
914 }
915 if (p->type & VF_WALK)
916 free(p->x.walker);
917
918 clrvar(p);
919 }
920
921 g_cb->pos = v;
922 while (g_cb->prev && g_cb->pos == g_cb->nv) {
923 g_cb = g_cb->prev;
924 }
925 }
926
927 /* ------- awk program text parsing ------- */
928
929 /* Parse next token pointed by global pos, place results into global ttt.
930 * If token isn't expected, give away. Return token class
931 */
932 static uint32_t next_token(uint32_t expected)
933 {
934 #define concat_inserted (G.next_token__concat_inserted)
935 #define save_tclass (G.next_token__save_tclass)
936 #define save_info (G.next_token__save_info)
937 /* Initialized to TC_OPTERM: */
938 #define ltclass (G.next_token__ltclass)
939
940 char *p, *pp, *s;
941 const char *tl;
942 uint32_t tc;
943 const uint32_t *ti;
944 int l;
945
946 if (t_rollback) {
947 t_rollback = FALSE;
948
949 } else if (concat_inserted) {
950 concat_inserted = FALSE;
951 t_tclass = save_tclass;
952 t_info = save_info;
953
954 } else {
955 p = g_pos;
956 readnext:
957 skip_spaces(&p);
958 g_lineno = t_lineno;
959 if (*p == '#')
960 while (*p != '\n' && *p != '\0')
961 p++;
962
963 if (*p == '\n')
964 t_lineno++;
965
966 if (*p == '\0') {
967 tc = TC_EOF;
968
969 } else if (*p == '\"') {
970 /* it's a string */
971 t_string = s = ++p;
972 while (*p != '\"') {
973 if (*p == '\0' || *p == '\n')
974 syntax_error(EMSG_UNEXP_EOS);
975 *(s++) = nextchar(&p);
976 }
977 p++;
978 *s = '\0';
979 tc = TC_STRING;
980
981 } else if ((expected & TC_REGEXP) && *p == '/') {
982 /* it's regexp */
983 t_string = s = ++p;
984 while (*p != '/') {
985 if (*p == '\0' || *p == '\n')
986 syntax_error(EMSG_UNEXP_EOS);
987 *s = *p++;
988 if (*s++ == '\\') {
989 pp = p;
990 *(s-1) = bb_process_escape_sequence((const char **)&p);
991 if (*pp == '\\')
992 *s++ = '\\';
993 if (p == pp)
994 *s++ = *p++;
995 }
996 }
997 p++;
998 *s = '\0';
999 tc = TC_REGEXP;
1000
1001 } else if (*p == '.' || isdigit(*p)) {
1002 /* it's a number */
1003 t_double = my_strtod(&p);
1004 if (*p == '.')
1005 syntax_error(EMSG_UNEXP_TOKEN);
1006 tc = TC_NUMBER;
1007
1008 } else {
1009 /* search for something known */
1010 tl = tokenlist;
1011 tc = 0x00000001;
1012 ti = tokeninfo;
1013 while (*tl) {
1014 l = *(tl++);
1015 if (l == NTCC) {
1016 tc <<= 1;
1017 continue;
1018 }
1019 /* if token class is expected, token
1020 * matches and it's not a longer word,
1021 * then this is what we are looking for
1022 */
1023 if ((tc & (expected | TC_WORD | TC_NEWLINE))
1024 && *tl == *p && strncmp(p, tl, l) == 0
1025 && !((tc & TC_WORD) && isalnum_(p[l]))
1026 ) {
1027 t_info = *ti;
1028 p += l;
1029 break;
1030 }
1031 ti++;
1032 tl += l;
1033 }
1034
1035 if (!*tl) {
1036 /* it's a name (var/array/function),
1037 * otherwise it's something wrong
1038 */
1039 if (!isalnum_(*p))
1040 syntax_error(EMSG_UNEXP_TOKEN);
1041
1042 t_string = --p;
1043 while (isalnum_(*(++p))) {
1044 *(p-1) = *p;
1045 }
1046 *(p-1) = '\0';
1047 tc = TC_VARIABLE;
1048 /* also consume whitespace between functionname and bracket */
1049 if (!(expected & TC_VARIABLE))
1050 skip_spaces(&p);
1051 if (*p == '(') {
1052 tc = TC_FUNCTION;
1053 } else {
1054 if (*p == '[') {
1055 p++;
1056 tc = TC_ARRAY;
1057 }
1058 }
1059 }
1060 }
1061 g_pos = p;
1062
1063 /* skipping newlines in some cases */
1064 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
1065 goto readnext;
1066
1067 /* insert concatenation operator when needed */
1068 if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)) {
1069 concat_inserted = TRUE;
1070 save_tclass = tc;
1071 save_info = t_info;
1072 tc = TC_BINOP;
1073 t_info = OC_CONCAT | SS | P(35);
1074 }
1075
1076 t_tclass = tc;
1077 }
1078 ltclass = t_tclass;
1079
1080 /* Are we ready for this? */
1081 if (!(ltclass & expected))
1082 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
1083 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1084
1085 return ltclass;
1086 #undef concat_inserted
1087 #undef save_tclass
1088 #undef save_info
1089 #undef ltclass
1090 }
1091
1092 static void rollback_token(void)
1093 {
1094 t_rollback = TRUE;
1095 }
1096
1097 static node *new_node(uint32_t info)
1098 {
1099 node *n;
1100
1101 n = xzalloc(sizeof(node));
1102 n->info = info;
1103 n->lineno = g_lineno;
1104 return n;
1105 }
1106
1107 static node *mk_re_node(const char *s, node *n, regex_t *re)
1108 {
1109 n->info = OC_REGEXP;
1110 n->l.re = re;
1111 n->r.ire = re + 1;
1112 xregcomp(re, s, REG_EXTENDED);
1113 xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
1114
1115 return n;
1116 }
1117
1118 static node *condition(void)
1119 {
1120 next_token(TC_SEQSTART);
1121 return parse_expr(TC_SEQTERM);
1122 }
1123
1124 /* parse expression terminated by given argument, return ptr
1125 * to built subtree. Terminator is eaten by parse_expr */
1126 static node *parse_expr(uint32_t iexp)
1127 {
1128 node sn;
1129 node *cn = &sn;
1130 node *vn, *glptr;
1131 uint32_t tc, xtc;
1132 var *v;
1133
1134 sn.info = PRIMASK;
1135 sn.r.n = glptr = NULL;
1136 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1137
1138 while (!((tc = next_token(xtc)) & iexp)) {
1139 if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) {
1140 /* input redirection (<) attached to glptr node */
1141 cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
1142 cn->a.n = glptr;
1143 xtc = TC_OPERAND | TC_UOPPRE;
1144 glptr = NULL;
1145
1146 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1147 /* for binary and postfix-unary operators, jump back over
1148 * previous operators with higher priority */
1149 vn = cn;
1150 while (((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
1151 || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON))
1152 ) {
1153 vn = vn->a.n;
1154 }
1155 if ((t_info & OPCLSMASK) == OC_TERNARY)
1156 t_info += P(6);
1157 cn = vn->a.n->r.n = new_node(t_info);
1158 cn->a.n = vn->a.n;
1159 if (tc & TC_BINOP) {
1160 cn->l.n = vn;
1161 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1162 if ((t_info & OPCLSMASK) == OC_PGETLINE) {
1163 /* it's a pipe */
1164 next_token(TC_GETLINE);
1165 /* give maximum priority to this pipe */
1166 cn->info &= ~PRIMASK;
1167 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1168 }
1169 } else {
1170 cn->r.n = vn;
1171 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1172 }
1173 vn->a.n = cn;
1174
1175 } else {
1176 /* for operands and prefix-unary operators, attach them
1177 * to last node */
1178 vn = cn;
1179 cn = vn->r.n = new_node(t_info);
1180 cn->a.n = vn;
1181 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1182 if (tc & (TC_OPERAND | TC_REGEXP)) {
1183 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1184 /* one should be very careful with switch on tclass -
1185 * only simple tclasses should be used! */
1186 switch (tc) {
1187 case TC_VARIABLE:
1188 case TC_ARRAY:
1189 cn->info = OC_VAR;
1190 v = hash_search(ahash, t_string);
1191 if (v != NULL) {
1192 cn->info = OC_FNARG;
1193 cn->l.i = v->x.aidx;
1194 } else {
1195 cn->l.v = newvar(t_string);
1196 }
1197 if (tc & TC_ARRAY) {
1198 cn->info |= xS;
1199 cn->r.n = parse_expr(TC_ARRTERM);
1200 }
1201 break;
1202
1203 case TC_NUMBER:
1204 case TC_STRING:
1205 cn->info = OC_VAR;
1206 v = cn->l.v = xzalloc(sizeof(var));
1207 if (tc & TC_NUMBER)
1208 setvar_i(v, t_double);
1209 else
1210 setvar_s(v, t_string);
1211 break;
1212
1213 case TC_REGEXP:
1214 mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
1215 break;
1216
1217 case TC_FUNCTION:
1218 cn->info = OC_FUNC;
1219 cn->r.f = newfunc(t_string);
1220 cn->l.n = condition();
1221 break;
1222
1223 case TC_SEQSTART:
1224 cn = vn->r.n = parse_expr(TC_SEQTERM);
1225 cn->a.n = vn;
1226 break;
1227
1228 case TC_GETLINE:
1229 glptr = cn;
1230 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1231 break;
1232
1233 case TC_BUILTIN:
1234 cn->l.n = condition();
1235 break;
1236 }
1237 }
1238 }
1239 }
1240 return sn.r.n;
1241 }
1242
1243 /* add node to chain. Return ptr to alloc'd node */
1244 static node *chain_node(uint32_t info)
1245 {
1246 node *n;
1247
1248 if (!seq->first)
1249 seq->first = seq->last = new_node(0);
1250
1251 if (seq->programname != g_progname) {
1252 seq->programname = g_progname;
1253 n = chain_node(OC_NEWSOURCE);
1254 n->l.s = xstrdup(g_progname);
1255 }
1256
1257 n = seq->last;
1258 n->info = info;
1259 seq->last = n->a.n = new_node(OC_DONE);
1260
1261 return n;
1262 }
1263
1264 static void chain_expr(uint32_t info)
1265 {
1266 node *n;
1267
1268 n = chain_node(info);
1269 n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1270 if (t_tclass & TC_GRPTERM)
1271 rollback_token();
1272 }
1273
1274 static node *chain_loop(node *nn)
1275 {
1276 node *n, *n2, *save_brk, *save_cont;
1277
1278 save_brk = break_ptr;
1279 save_cont = continue_ptr;
1280
1281 n = chain_node(OC_BR | Vx);
1282 continue_ptr = new_node(OC_EXEC);
1283 break_ptr = new_node(OC_EXEC);
1284 chain_group();
1285 n2 = chain_node(OC_EXEC | Vx);
1286 n2->l.n = nn;
1287 n2->a.n = n;
1288 continue_ptr->a.n = n2;
1289 break_ptr->a.n = n->r.n = seq->last;
1290
1291 continue_ptr = save_cont;
1292 break_ptr = save_brk;
1293
1294 return n;
1295 }
1296
1297 /* parse group and attach it to chain */
1298 static void chain_group(void)
1299 {
1300 uint32_t c;
1301 node *n, *n2, *n3;
1302
1303 do {
1304 c = next_token(TC_GRPSEQ);
1305 } while (c & TC_NEWLINE);
1306
1307 if (c & TC_GRPSTART) {
1308 while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1309 if (t_tclass & TC_NEWLINE) continue;
1310 rollback_token();
1311 chain_group();
1312 }
1313 } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1314 rollback_token();
1315 chain_expr(OC_EXEC | Vx);
1316 } else { /* TC_STATEMNT */
1317 switch (t_info & OPCLSMASK) {
1318 case ST_IF:
1319 n = chain_node(OC_BR | Vx);
1320 n->l.n = condition();
1321 chain_group();
1322 n2 = chain_node(OC_EXEC);
1323 n->r.n = seq->last;
1324 if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
1325 chain_group();
1326 n2->a.n = seq->last;
1327 } else {
1328 rollback_token();
1329 }
1330 break;
1331
1332 case ST_WHILE:
1333 n2 = condition();
1334 n = chain_loop(NULL);
1335 n->l.n = n2;
1336 break;
1337
1338 case ST_DO:
1339 n2 = chain_node(OC_EXEC);
1340 n = chain_loop(NULL);
1341 n2->a.n = n->a.n;
1342 next_token(TC_WHILE);
1343 n->l.n = condition();
1344 break;
1345
1346 case ST_FOR:
1347 next_token(TC_SEQSTART);
1348 n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1349 if (t_tclass & TC_SEQTERM) { /* for-in */
1350 if ((n2->info & OPCLSMASK) != OC_IN)
1351 syntax_error(EMSG_UNEXP_TOKEN);
1352 n = chain_node(OC_WALKINIT | VV);
1353 n->l.n = n2->l.n;
1354 n->r.n = n2->r.n;
1355 n = chain_loop(NULL);
1356 n->info = OC_WALKNEXT | Vx;
1357 n->l.n = n2->l.n;
1358 } else { /* for (;;) */
1359 n = chain_node(OC_EXEC | Vx);
1360 n->l.n = n2;
1361 n2 = parse_expr(TC_SEMICOL);
1362 n3 = parse_expr(TC_SEQTERM);
1363 n = chain_loop(n3);
1364 n->l.n = n2;
1365 if (!n2)
1366 n->info = OC_EXEC;
1367 }
1368 break;
1369
1370 case OC_PRINT:
1371 case OC_PRINTF:
1372 n = chain_node(t_info);
1373 n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1374 if (t_tclass & TC_OUTRDR) {
1375 n->info |= t_info;
1376 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1377 }
1378 if (t_tclass & TC_GRPTERM)
1379 rollback_token();
1380 break;
1381
1382 case OC_BREAK:
1383 n = chain_node(OC_EXEC);
1384 n->a.n = break_ptr;
1385 break;
1386
1387 case OC_CONTINUE:
1388 n = chain_node(OC_EXEC);
1389 n->a.n = continue_ptr;
1390 break;
1391
1392 /* delete, next, nextfile, return, exit */
1393 default:
1394 chain_expr(t_info);
1395 }
1396 }
1397 }
1398
1399 static void parse_program(char *p)
1400 {
1401 uint32_t tclass;
1402 node *cn;
1403 func *f;
1404 var *v;
1405
1406 g_pos = p;
1407 t_lineno = 1;
1408 while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1409 TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1410
1411 if (tclass & TC_OPTERM)
1412 continue;
1413
1414 seq = &mainseq;
1415 if (tclass & TC_BEGIN) {
1416 seq = &beginseq;
1417 chain_group();
1418
1419 } else if (tclass & TC_END) {
1420 seq = &endseq;
1421 chain_group();
1422
1423 } else if (tclass & TC_FUNCDECL) {
1424 next_token(TC_FUNCTION);
1425 g_pos++;
1426 f = newfunc(t_string);
1427 f->body.first = NULL;
1428 f->nargs = 0;
1429 while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1430 v = findvar(ahash, t_string);
1431 v->x.aidx = (f->nargs)++;
1432
1433 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1434 break;
1435 }
1436 seq = &(f->body);
1437 chain_group();
1438 clear_array(ahash);
1439
1440 } else if (tclass & TC_OPSEQ) {
1441 rollback_token();
1442 cn = chain_node(OC_TEST);
1443 cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1444 if (t_tclass & TC_GRPSTART) {
1445 rollback_token();
1446 chain_group();
1447 } else {
1448 chain_node(OC_PRINT);
1449 }
1450 cn->r.n = mainseq.last;
1451
1452 } else /* if (tclass & TC_GRPSTART) */ {
1453 rollback_token();
1454 chain_group();
1455 }
1456 }
1457 }
1458
1459
1460 /* -------- program execution part -------- */
1461
1462 static node *mk_splitter(const char *s, tsplitter *spl)
1463 {
1464 regex_t *re, *ire;
1465 node *n;
1466
1467 re = &spl->re[0];
1468 ire = &spl->re[1];
1469 n = &spl->n;
1470 if ((n->info & OPCLSMASK) == OC_REGEXP) {
1471 regfree(re);
1472 regfree(ire); // TODO: nuke ire, use re+1?
1473 }
1474 if (strlen(s) > 1) {
1475 mk_re_node(s, n, re);
1476 } else {
1477 n->info = (uint32_t) *s;
1478 }
1479
1480 return n;
1481 }
1482
1483 /* use node as a regular expression. Supplied with node ptr and regex_t
1484 * storage space. Return ptr to regex (if result points to preg, it should
1485 * be later regfree'd manually
1486 */
1487 static regex_t *as_regex(node *op, regex_t *preg)
1488 {
1489 int cflags;
1490 var *v;
1491 const char *s;
1492
1493 if ((op->info & OPCLSMASK) == OC_REGEXP) {
1494 return icase ? op->r.ire : op->l.re;
1495 }
1496 v = nvalloc(1);
1497 s = getvar_s(evaluate(op, v));
1498
1499 cflags = icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED;
1500 /* Testcase where REG_EXTENDED fails (unpaired '{'):
1501 * echo Hi | awk 'gsub("@(samp|code|file)\{","");'
1502 * gawk 3.1.5 eats this. We revert to ~REG_EXTENDED
1503 * (maybe gsub is not supposed to use REG_EXTENDED?).
1504 */
1505 if (regcomp(preg, s, cflags)) {
1506 cflags &= ~REG_EXTENDED;
1507 xregcomp(preg, s, cflags);
1508 }
1509 nvfree(v);
1510 return preg;
1511 }
1512
1513 /* gradually increasing buffer */
1514 static void qrealloc(char **b, int n, int *size)
1515 {
1516 if (!*b || n >= *size) {
1517 *size = n + (n>>1) + 80;
1518 *b = xrealloc(*b, *size);
1519 }
1520 }
1521
1522 /* resize field storage space */
1523 static void fsrealloc(int size)
1524 {
1525 int i;
1526
1527 if (size >= maxfields) {
1528 i = maxfields;
1529 maxfields = size + 16;
1530 Fields = xrealloc(Fields, maxfields * sizeof(var));
1531 for (; i < maxfields; i++) {
1532 Fields[i].type = VF_SPECIAL;
1533 Fields[i].string = NULL;
1534 }
1535 }
1536
1537 if (size < nfields) {
1538 for (i = size; i < nfields; i++) {
1539 clrvar(Fields + i);
1540 }
1541 }
1542 nfields = size;
1543 }
1544
1545 static int awk_split(const char *s, node *spl, char **slist)
1546 {
1547 int l, n = 0;
1548 char c[4];
1549 char *s1;
1550 regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough...
1551
1552 /* in worst case, each char would be a separate field */
1553 *slist = s1 = xzalloc(strlen(s) * 2 + 3);
1554 strcpy(s1, s);
1555
1556 c[0] = c[1] = (char)spl->info;
1557 c[2] = c[3] = '\0';
1558 if (*getvar_s(intvar[RS]) == '\0')
1559 c[2] = '\n';
1560
1561 if ((spl->info & OPCLSMASK) == OC_REGEXP) { /* regex split */
1562 if (!*s)
1563 return n; /* "": zero fields */
1564 n++; /* at least one field will be there */
1565 do {
1566 l = strcspn(s, c+2); /* len till next NUL or \n */
1567 if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0
1568 && pmatch[0].rm_so <= l
1569 ) {
1570 l = pmatch[0].rm_so;
1571 if (pmatch[0].rm_eo == 0) {
1572 l++;
1573 pmatch[0].rm_eo++;
1574 }
1575 n++; /* we saw yet another delimiter */
1576 } else {
1577 pmatch[0].rm_eo = l;
1578 if (s[l])
1579 pmatch[0].rm_eo++;
1580 }
1581 memcpy(s1, s, l);
1582 /* make sure we remove *all* of the separator chars */
1583 do {
1584 s1[l] = '\0';
1585 } while (++l < pmatch[0].rm_eo);
1586 nextword(&s1);
1587 s += pmatch[0].rm_eo;
1588 } while (*s);
1589 return n;
1590 }
1591 if (c[0] == '\0') { /* null split */
1592 while (*s) {
1593 *s1++ = *s++;
1594 *s1++ = '\0';
1595 n++;
1596 }
1597 return n;
1598 }
1599 if (c[0] != ' ') { /* single-character split */
1600 if (icase) {
1601 c[0] = toupper(c[0]);
1602 c[1] = tolower(c[1]);
1603 }
1604 if (*s1) n++;
1605 while ((s1 = strpbrk(s1, c))) {
1606 *s1++ = '\0';
1607 n++;
1608 }
1609 return n;
1610 }
1611 /* space split */
1612 while (*s) {
1613 s = skip_whitespace(s);
1614 if (!*s) break;
1615 n++;
1616 while (*s && !isspace(*s))
1617 *s1++ = *s++;
1618 *s1++ = '\0';
1619 }
1620 return n;
1621 }
1622
1623 static void split_f0(void)
1624 {
1625 /* static char *fstrings; */
1626 #define fstrings (G.split_f0__fstrings)
1627
1628 int i, n;
1629 char *s;
1630
1631 if (is_f0_split)
1632 return;
1633
1634 is_f0_split = TRUE;
1635 free(fstrings);
1636 fsrealloc(0);
1637 n = awk_split(getvar_s(intvar[F0]), &fsplitter.n, &fstrings);
1638 fsrealloc(n);
1639 s = fstrings;
1640 for (i = 0; i < n; i++) {
1641 Fields[i].string = nextword(&s);
1642 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1643 }
1644
1645 /* set NF manually to avoid side effects */
1646 clrvar(intvar[NF]);
1647 intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
1648 intvar[NF]->number = nfields;
1649 #undef fstrings
1650 }
1651
1652 /* perform additional actions when some internal variables changed */
1653 static void handle_special(var *v)
1654 {
1655 int n;
1656 char *b;
1657 const char *sep, *s;
1658 int sl, l, len, i, bsize;
1659
1660 if (!(v->type & VF_SPECIAL))
1661 return;
1662
1663 if (v == intvar[NF]) {
1664 n = (int)getvar_i(v);
1665 fsrealloc(n);
1666
1667 /* recalculate $0 */
1668 sep = getvar_s(intvar[OFS]);
1669 sl = strlen(sep);
1670 b = NULL;
1671 len = 0;
1672 for (i = 0; i < n; i++) {
1673 s = getvar_s(&Fields[i]);
1674 l = strlen(s);
1675 if (b) {
1676 memcpy(b+len, sep, sl);
1677 len += sl;
1678 }
1679 qrealloc(&b, len+l+sl, &bsize);
1680 memcpy(b+len, s, l);
1681 len += l;
1682 }
1683 if (b)
1684 b[len] = '\0';
1685 setvar_p(intvar[F0], b);
1686 is_f0_split = TRUE;
1687
1688 } else if (v == intvar[F0]) {
1689 is_f0_split = FALSE;
1690
1691 } else if (v == intvar[FS]) {
1692 mk_splitter(getvar_s(v), &fsplitter);
1693
1694 } else if (v == intvar[RS]) {
1695 mk_splitter(getvar_s(v), &rsplitter);
1696
1697 } else if (v == intvar[IGNORECASE]) {
1698 icase = istrue(v);
1699
1700 } else { /* $n */
1701 n = getvar_i(intvar[NF]);
1702 setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1);
1703 /* right here v is invalid. Just to note... */
1704 }
1705 }
1706
1707 /* step through func/builtin/etc arguments */
1708 static node *nextarg(node **pn)
1709 {
1710 node *n;
1711
1712 n = *pn;
1713 if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1714 *pn = n->r.n;
1715 n = n->l.n;
1716 } else {
1717 *pn = NULL;
1718 }
1719 return n;
1720 }
1721
1722 static void hashwalk_init(var *v, xhash *array)
1723 {
1724 char **w;
1725 hash_item *hi;
1726 unsigned i;
1727
1728 if (v->type & VF_WALK)
1729 free(v->x.walker);
1730
1731 v->type |= VF_WALK;
1732 w = v->x.walker = xzalloc(2 + 2*sizeof(char *) + array->glen);
1733 w[0] = w[1] = (char *)(w + 2);
1734 for (i = 0; i < array->csize; i++) {
1735 hi = array->items[i];
1736 while (hi) {
1737 strcpy(*w, hi->name);
1738 nextword(w);
1739 hi = hi->next;
1740 }
1741 }
1742 }
1743
1744 static int hashwalk_next(var *v)
1745 {
1746 char **w;
1747
1748 w = v->x.walker;
1749 if (w[1] == w[0])
1750 return FALSE;
1751
1752 setvar_s(v, nextword(w+1));
1753 return TRUE;
1754 }
1755
1756 /* evaluate node, return 1 when result is true, 0 otherwise */
1757 static int ptest(node *pattern)
1758 {
1759 /* ptest__v is "static": to save stack space? */
1760 return istrue(evaluate(pattern, &G.ptest__v));
1761 }
1762
1763 /* read next record from stream rsm into a variable v */
1764 static int awk_getline(rstream *rsm, var *v)
1765 {
1766 char *b;
1767 regmatch_t pmatch[2];
1768 int a, p, pp=0, size;
1769 int fd, so, eo, r, rp;
1770 char c, *m, *s;
1771
1772 /* we're using our own buffer since we need access to accumulating
1773 * characters
1774 */
1775 fd = fileno(rsm->F);
1776 m = rsm->buffer;
1777 a = rsm->adv;
1778 p = rsm->pos;
1779 size = rsm->size;
1780 c = (char) rsplitter.n.info;
1781 rp = 0;
1782
1783 if (!m) qrealloc(&m, 256, &size);
1784 do {
1785 b = m + a;
1786 so = eo = p;
1787 r = 1;
1788 if (p > 0) {
1789 if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1790 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1791 b, 1, pmatch, 0) == 0) {
1792 so = pmatch[0].rm_so;
1793 eo = pmatch[0].rm_eo;
1794 if (b[eo] != '\0')
1795 break;
1796 }
1797 } else if (c != '\0') {
1798 s = strchr(b+pp, c);
1799 if (!s) s = memchr(b+pp, '\0', p - pp);
1800 if (s) {
1801 so = eo = s-b;
1802 eo++;
1803 break;
1804 }
1805 } else {
1806 while (b[rp] == '\n')
1807 rp++;
1808 s = strstr(b+rp, "\n\n");
1809 if (s) {
1810 so = eo = s-b;
1811 while (b[eo] == '\n') eo++;
1812 if (b[eo] != '\0')
1813 break;
1814 }
1815 }
1816 }
1817
1818 if (a > 0) {
1819 memmove(m, (const void *)(m+a), p+1);
1820 b = m;
1821 a = 0;
1822 }
1823
1824 qrealloc(&m, a+p+128, &size);
1825 b = m + a;
1826 pp = p;
1827 p += safe_read(fd, b+p, size-p-1);
1828 if (p < pp) {
1829 p = 0;
1830 r = 0;
1831 setvar_i(intvar[ERRNO], errno);
1832 }
1833 b[p] = '\0';
1834
1835 } while (p > pp);
1836
1837 if (p == 0) {
1838 r--;
1839 } else {
1840 c = b[so]; b[so] = '\0';
1841 setvar_s(v, b+rp);
1842 v->type |= VF_USER;
1843 b[so] = c;
1844 c = b[eo]; b[eo] = '\0';
1845 setvar_s(intvar[RT], b+so);
1846 b[eo] = c;
1847 }
1848
1849 rsm->buffer = m;
1850 rsm->adv = a + eo;
1851 rsm->pos = p - eo;
1852 rsm->size = size;
1853
1854 return r;
1855 }
1856
1857 static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
1858 {
1859 int r = 0;
1860 char c;
1861 const char *s = format;
1862
1863 if (int_as_int && n == (int)n) {
1864 r = snprintf(b, size, "%d", (int)n);
1865 } else {
1866 do { c = *s; } while (c && *++s);
1867 if (strchr("diouxX", c)) {
1868 r = snprintf(b, size, format, (int)n);
1869 } else if (strchr("eEfgG", c)) {
1870 r = snprintf(b, size, format, n);
1871 } else {
1872 syntax_error(EMSG_INV_FMT);
1873 }
1874 }
1875 return r;
1876 }
1877
1878 /* formatted output into an allocated buffer, return ptr to buffer */
1879 static char *awk_printf(node *n)
1880 {
1881 char *b = NULL;
1882 char *fmt, *s, *f;
1883 const char *s1;
1884 int i, j, incr, bsize;
1885 char c, c1;
1886 var *v, *arg;
1887
1888 v = nvalloc(1);
1889 fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1890
1891 i = 0;
1892 while (*f) {
1893 s = f;
1894 while (*f && (*f != '%' || *(++f) == '%'))
1895 f++;
1896 while (*f && !isalpha(*f)) {
1897 if (*f == '*')
1898 syntax_error("%*x formats are not supported");
1899 f++;
1900 }
1901
1902 incr = (f - s) + MAXVARFMT;
1903 qrealloc(&b, incr + i, &bsize);
1904 c = *f;
1905 if (c != '\0') f++;
1906 c1 = *f;
1907 *f = '\0';
1908 arg = evaluate(nextarg(&n), v);
1909
1910 j = i;
1911 if (c == 'c' || !c) {
1912 i += sprintf(b+i, s, is_numeric(arg) ?
1913 (char)getvar_i(arg) : *getvar_s(arg));
1914 } else if (c == 's') {
1915 s1 = getvar_s(arg);
1916 qrealloc(&b, incr+i+strlen(s1), &bsize);
1917 i += sprintf(b+i, s, s1);
1918 } else {
1919 i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1920 }
1921 *f = c1;
1922
1923 /* if there was an error while sprintf, return value is negative */
1924 if (i < j) i = j;
1925 }
1926
1927 b = xrealloc(b, i + 1);
1928 free(fmt);
1929 nvfree(v);
1930 b[i] = '\0';
1931 return b;
1932 }
1933
1934 /* common substitution routine
1935 * replace (nm) substring of (src) that match (n) with (repl), store
1936 * result into (dest), return number of substitutions. If nm=0, replace
1937 * all matches. If src or dst is NULL, use $0. If ex=TRUE, enable
1938 * subexpression matching (\1-\9)
1939 */
1940 static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int ex)
1941 {
1942 char *ds = NULL;
1943 const char *s;
1944 const char *sp;
1945 int c, i, j, di, rl, so, eo, nbs, n, dssize;
1946 regmatch_t pmatch[10];
1947 regex_t sreg, *re;
1948
1949 re = as_regex(rn, &sreg);
1950 if (!src) src = intvar[F0];
1951 if (!dest) dest = intvar[F0];
1952
1953 i = di = 0;
1954 sp = getvar_s(src);
1955 rl = strlen(repl);
1956 while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0 : REG_NOTBOL) == 0) {
1957 so = pmatch[0].rm_so;
1958 eo = pmatch[0].rm_eo;
1959
1960 qrealloc(&ds, di + eo + rl, &dssize);
1961 memcpy(ds + di, sp, eo);
1962 di += eo;
1963 if (++i >= nm) {
1964 /* replace */
1965 di -= (eo - so);
1966 nbs = 0;
1967 for (s = repl; *s; s++) {
1968 ds[di++] = c = *s;
1969 if (c == '\\') {
1970 nbs++;
1971 continue;
1972 }
1973 if (c == '&' || (ex && c >= '0' && c <= '9')) {
1974 di -= ((nbs + 3) >> 1);
1975 j = 0;
1976 if (c != '&') {
1977 j = c - '0';
1978 nbs++;
1979 }
1980 if (nbs % 2) {
1981 ds[di++] = c;
1982 } else {
1983 n = pmatch[j].rm_eo - pmatch[j].rm_so;
1984 qrealloc(&ds, di + rl + n, &dssize);
1985 memcpy(ds + di, sp + pmatch[j].rm_so, n);
1986 di += n;
1987 }
1988 }
1989 nbs = 0;
1990 }
1991 }
1992
1993 sp += eo;
1994 if (i == nm)
1995 break;
1996 if (eo == so) {
1997 ds[di] = *sp++;
1998 if (!ds[di++])
1999 break;
2000 }
2001 }
2002
2003 qrealloc(&ds, di + strlen(sp), &dssize);
2004 strcpy(ds + di, sp);
2005 setvar_p(dest, ds);
2006 if (re == &sreg)
2007 regfree(re);
2008 return i;
2009 }
2010
2011 static NOINLINE int do_mktime(const char *ds)
2012 {
2013 struct tm then;
2014 int count;
2015
2016 /*memset(&then, 0, sizeof(then)); - not needed */
2017 then.tm_isdst = -1; /* default is unknown */
2018
2019 /* manpage of mktime says these fields are ints,
2020 * so we can sscanf stuff directly into them */
2021 count = sscanf(ds, "%u %u %u %u %u %u %d",
2022 &then.tm_year, &then.tm_mon, &then.tm_mday,
2023 &then.tm_hour, &then.tm_min, &then.tm_sec,
2024 &then.tm_isdst);
2025
2026 if (count < 6
2027 || (unsigned)then.tm_mon < 1
2028 || (unsigned)then.tm_year < 1900
2029 ) {
2030 return -1;
2031 }
2032
2033 then.tm_mon -= 1;
2034 then.tm_year -= 1900;
2035
2036 return mktime(&then);
2037 }
2038
2039 static NOINLINE var *exec_builtin(node *op, var *res)
2040 {
2041 #define tspl (G.exec_builtin__tspl)
2042
2043 var *tv;
2044 node *an[4];
2045 var *av[4];
2046 const char *as[4];
2047 regmatch_t pmatch[2];
2048 regex_t sreg, *re;
2049 node *spl;
2050 uint32_t isr, info;
2051 int nargs;
2052 time_t tt;
2053 char *s, *s1;
2054 int i, l, ll, n;
2055
2056 tv = nvalloc(4);
2057 isr = info = op->info;
2058 op = op->l.n;
2059
2060 av[2] = av[3] = NULL;
2061 for (i = 0; i < 4 && op; i++) {
2062 an[i] = nextarg(&op);
2063 if (isr & 0x09000000) av[i] = evaluate(an[i], &tv[i]);
2064 if (isr & 0x08000000) as[i] = getvar_s(av[i]);
2065 isr >>= 1;
2066 }
2067
2068 nargs = i;
2069 if ((uint32_t)nargs < (info >> 30))
2070 syntax_error(EMSG_TOO_FEW_ARGS);
2071
2072 info &= OPNMASK;
2073 switch (info) {
2074
2075 case B_a2:
2076 #if ENABLE_FEATURE_AWK_LIBM
2077 setvar_i(res, atan2(getvar_i(av[0]), getvar_i(av[1])));
2078 #else
2079 syntax_error(EMSG_NO_MATH);
2080 #endif
2081 break;
2082
2083 case B_sp:
2084 if (nargs > 2) {
2085 spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
2086 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
2087 } else {
2088 spl = &fsplitter.n;
2089 }
2090
2091 n = awk_split(as[0], spl, &s);
2092 s1 = s;
2093 clear_array(iamarray(av[1]));
2094 for (i = 1; i <= n; i++)
2095 setari_u(av[1], i, nextword(&s1));
2096 free(s);
2097 setvar_i(res, n);
2098 break;
2099
2100 case B_ss:
2101 l = strlen(as[0]);
2102 i = getvar_i(av[1]) - 1;
2103 if (i > l) i = l;
2104 if (i < 0) i = 0;
2105 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
2106 if (n < 0) n = 0;
2107 s = xstrndup(as[0]+i, n);
2108 setvar_p(res, s);
2109 break;
2110
2111 /* Bitwise ops must assume that operands are unsigned. GNU Awk 3.1.5:
2112 * awk '{ print or(-1,1) }' gives "4.29497e+09", not "-2.xxxe+09" */
2113 case B_an:
2114 setvar_i(res, getvar_i_int(av[0]) & getvar_i_int(av[1]));
2115 break;
2116
2117 case B_co:
2118 setvar_i(res, ~getvar_i_int(av[0]));
2119 break;
2120
2121 case B_ls:
2122 setvar_i(res, getvar_i_int(av[0]) << getvar_i_int(av[1]));
2123 break;
2124
2125 case B_or:
2126 setvar_i(res, getvar_i_int(av[0]) | getvar_i_int(av[1]));
2127 break;
2128
2129 case B_rs:
2130 setvar_i(res, getvar_i_int(av[0]) >> getvar_i_int(av[1]));
2131 break;
2132
2133 case B_xo:
2134 setvar_i(res, getvar_i_int(av[0]) ^ getvar_i_int(av[1]));
2135 break;
2136
2137 case B_lo:
2138 case B_up:
2139 s1 = s = xstrdup(as[0]);
2140 while (*s1) {
2141 //*s1 = (info == B_up) ? toupper(*s1) : tolower(*s1);
2142 if ((unsigned char)((*s1 | 0x20) - 'a') <= ('z' - 'a'))
2143 *s1 = (info == B_up) ? (*s1 & 0xdf) : (*s1 | 0x20);
2144 s1++;
2145 }
2146 setvar_p(res, s);
2147 break;
2148
2149 case B_ix:
2150 n = 0;
2151 ll = strlen(as[1]);
2152 l = strlen(as[0]) - ll;
2153 if (ll > 0 && l >= 0) {
2154 if (!icase) {
2155 s = strstr(as[0], as[1]);
2156 if (s) n = (s - as[0]) + 1;
2157 } else {
2158 /* this piece of code is terribly slow and
2159 * really should be rewritten
2160 */
2161 for (i=0; i<=l; i++) {
2162 if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2163 n = i+1;
2164 break;
2165 }
2166 }
2167 }
2168 }
2169 setvar_i(res, n);
2170 break;
2171
2172 case B_ti:
2173 if (nargs > 1)
2174 tt = getvar_i(av[1]);
2175 else
2176 time(&tt);
2177 //s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2178 i = strftime(g_buf, MAXVARFMT,
2179 ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2180 localtime(&tt));
2181 g_buf[i] = '\0';
2182 setvar_s(res, g_buf);
2183 break;
2184
2185 case B_mt:
2186 setvar_i(res, do_mktime(as[0]));
2187 break;
2188
2189 case B_ma:
2190 re = as_regex(an[1], &sreg);
2191 n = regexec(re, as[0], 1, pmatch, 0);
2192 if (n == 0) {
2193 pmatch[0].rm_so++;
2194 pmatch[0].rm_eo++;
2195 } else {
2196 pmatch[0].rm_so = 0;
2197 pmatch[0].rm_eo = -1;
2198 }
2199 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2200 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2201 setvar_i(res, pmatch[0].rm_so);
2202 if (re == &sreg) regfree(re);
2203 break;
2204
2205 case B_ge:
2206 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2207 break;
2208
2209 case B_gs:
2210 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2211 break;
2212
2213 case B_su:
2214 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2215 break;
2216 }
2217
2218 nvfree(tv);
2219 return res;
2220 #undef tspl
2221 }
2222
2223 /*
2224 * Evaluate node - the heart of the program. Supplied with subtree
2225 * and place where to store result. returns ptr to result.
2226 */
2227 #define XC(n) ((n) >> 8)
2228
2229 static var *evaluate(node *op, var *res)
2230 {
2231 /* This procedure is recursive so we should count every byte */
2232 #define fnargs (G.evaluate__fnargs)
2233 /* seed is initialized to 1 */
2234 #define seed (G.evaluate__seed)
2235 #define sreg (G.evaluate__sreg)
2236
2237 node *op1;
2238 var *v1;
2239 union {
2240 var *v;
2241 const char *s;
2242 double d;
2243 int i;
2244 } L, R;
2245 uint32_t opinfo;
2246 int opn;
2247 union {
2248 char *s;
2249 rstream *rsm;
2250 FILE *F;
2251 var *v;
2252 regex_t *re;
2253 uint32_t info;
2254 } X;
2255
2256 if (!op)
2257 return setvar_s(res, NULL);
2258
2259 v1 = nvalloc(2);
2260
2261 while (op) {
2262 opinfo = op->info;
2263 opn = (opinfo & OPNMASK);
2264 g_lineno = op->lineno;
2265
2266 /* execute inevitable things */
2267 op1 = op->l.n;
2268 if (opinfo & OF_RES1) X.v = L.v = evaluate(op1, v1);
2269 if (opinfo & OF_RES2) R.v = evaluate(op->r.n, v1+1);
2270 if (opinfo & OF_STR1) L.s = getvar_s(L.v);
2271 if (opinfo & OF_STR2) R.s = getvar_s(R.v);
2272 if (opinfo & OF_NUM1) L.d = getvar_i(L.v);
2273
2274 switch (XC(opinfo & OPCLSMASK)) {
2275
2276 /* -- iterative node type -- */
2277
2278 /* test pattern */
2279 case XC( OC_TEST ):
2280 if ((op1->info & OPCLSMASK) == OC_COMMA) {
2281 /* it's range pattern */
2282 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2283 op->info |= OF_CHECKED;
2284 if (ptest(op1->r.n))
2285 op->info &= ~OF_CHECKED;
2286
2287 op = op->a.n;
2288 } else {
2289 op = op->r.n;
2290 }
2291 } else {
2292 op = (ptest(op1)) ? op->a.n : op->r.n;
2293 }
2294 break;
2295
2296 /* just evaluate an expression, also used as unconditional jump */
2297 case XC( OC_EXEC ):
2298 break;
2299
2300 /* branch, used in if-else and various loops */
2301 case XC( OC_BR ):
2302 op = istrue(L.v) ? op->a.n : op->r.n;
2303 break;
2304
2305 /* initialize for-in loop */
2306 case XC( OC_WALKINIT ):
2307 hashwalk_init(L.v, iamarray(R.v));
2308 break;
2309
2310 /* get next array item */
2311 case XC( OC_WALKNEXT ):
2312 op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2313 break;
2314
2315 case XC( OC_PRINT ):
2316 case XC( OC_PRINTF ):
2317 X.F = stdout;
2318 if (op->r.n) {
2319 X.rsm = newfile(R.s);
2320 if (!X.rsm->F) {
2321 if (opn == '|') {
2322 X.rsm->F = popen(R.s, "w");
2323 if (X.rsm->F == NULL)
2324 bb_perror_msg_and_die("popen");
2325 X.rsm->is_pipe = 1;
2326 } else {
2327 X.rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2328 }
2329 }
2330 X.F = X.rsm->F;
2331 }
2332
2333 if ((opinfo & OPCLSMASK) == OC_PRINT) {
2334 if (!op1) {
2335 fputs(getvar_s(intvar[F0]), X.F);
2336 } else {
2337 while (op1) {
2338 L.v = evaluate(nextarg(&op1), v1);
2339 if (L.v->type & VF_NUMBER) {
2340 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]),
2341 getvar_i(L.v), TRUE);
2342 fputs(g_buf, X.F);
2343 } else {
2344 fputs(getvar_s(L.v), X.F);
2345 }
2346
2347 if (op1) fputs(getvar_s(intvar[OFS]), X.F);
2348 }
2349 }
2350 fputs(getvar_s(intvar[ORS]), X.F);
2351
2352 } else { /* OC_PRINTF */
2353 L.s = awk_printf(op1);
2354 fputs(L.s, X.F);
2355 free((char*)L.s);
2356 }
2357 fflush(X.F);
2358 break;
2359
2360 case XC( OC_DELETE ):
2361 X.info = op1->info & OPCLSMASK;
2362 if (X.info == OC_VAR) {
2363 R.v = op1->l.v;
2364 } else if (X.info == OC_FNARG) {
2365 R.v = &fnargs[op1->l.i];
2366 } else {
2367 syntax_error(EMSG_NOT_ARRAY);
2368 }
2369
2370 if (op1->r.n) {
2371 clrvar(L.v);
2372 L.s = getvar_s(evaluate(op1->r.n, v1));
2373 hash_remove(iamarray(R.v), L.s);
2374 } else {
2375 clear_array(iamarray(R.v));
2376 }
2377 break;
2378
2379 case XC( OC_NEWSOURCE ):
2380 g_progname = op->l.s;
2381 break;
2382
2383 case XC( OC_RETURN ):
2384 copyvar(res, L.v);
2385 break;
2386
2387 case XC( OC_NEXTFILE ):
2388 nextfile = TRUE;
2389 case XC( OC_NEXT ):
2390 nextrec = TRUE;
2391 case XC( OC_DONE ):
2392 clrvar(res);
2393 break;
2394
2395 case XC( OC_EXIT ):
2396 awk_exit(L.d);
2397
2398 /* -- recursive node type -- */
2399
2400 case XC( OC_VAR ):
2401 L.v = op->l.v;
2402 if (L.v == intvar[NF])
2403 split_f0();
2404 goto v_cont;
2405
2406 case XC( OC_FNARG ):
2407 L.v = &fnargs[op->l.i];
2408 v_cont:
2409 res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
2410 break;
2411
2412 case XC( OC_IN ):
2413 setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2414 break;
2415
2416 case XC( OC_REGEXP ):
2417 op1 = op;
2418 L.s = getvar_s(intvar[F0]);
2419 goto re_cont;
2420
2421 case XC( OC_MATCH ):
2422 op1 = op->r.n;
2423 re_cont:
2424 X.re = as_regex(op1, &sreg);
2425 R.i = regexec(X.re, L.s, 0, NULL, 0);
2426 if (X.re == &sreg) regfree(X.re);
2427 setvar_i(res, (R.i == 0) ^ (opn == '!'));
2428 break;
2429
2430 case XC( OC_MOVE ):
2431 /* if source is a temporary string, jusk relink it to dest */
2432 //Disabled: if R.v is numeric but happens to have cached R.v->string,
2433 //then L.v ends up being a string, which is wrong
2434 // if (R.v == v1+1 && R.v->string) {
2435 // res = setvar_p(L.v, R.v->string);
2436 // R.v->string = NULL;
2437 // } else {
2438 res = copyvar(L.v, R.v);
2439 // }
2440 break;
2441
2442 case XC( OC_TERNARY ):
2443 if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2444 syntax_error(EMSG_POSSIBLE_ERROR);
2445 res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2446 break;
2447
2448 case XC( OC_FUNC ):
2449 if (!op->r.f->body.first)
2450 syntax_error(EMSG_UNDEF_FUNC);
2451
2452 X.v = R.v = nvalloc(op->r.f->nargs + 1);
2453 while (op1) {
2454 L.v = evaluate(nextarg(&op1), v1);
2455 copyvar(R.v, L.v);
2456 R.v->type |= VF_CHILD;
2457 R.v->x.parent = L.v;
2458 if (++R.v - X.v >= op->r.f->nargs)
2459 break;
2460 }
2461
2462 R.v = fnargs;
2463 fnargs = X.v;
2464
2465 L.s = g_progname;
2466 res = evaluate(op->r.f->body.first, res);
2467 g_progname = L.s;
2468
2469 nvfree(fnargs);
2470 fnargs = R.v;
2471 break;
2472
2473 case XC( OC_GETLINE ):
2474 case XC( OC_PGETLINE ):
2475 if (op1) {
2476 X.rsm = newfile(L.s);
2477 if (!X.rsm->F) {
2478 if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2479 X.rsm->F = popen(L.s, "r");
2480 X.rsm->is_pipe = TRUE;
2481 } else {
2482 X.rsm->F = fopen_for_read(L.s); /* not xfopen! */
2483 }
2484 }
2485 } else {
2486 if (!iF) iF = next_input_file();
2487 X.rsm = iF;
2488 }
2489
2490 if (!X.rsm->F) {
2491 setvar_i(intvar[ERRNO], errno);
2492 setvar_i(res, -1);
2493 break;
2494 }
2495
2496 if (!op->r.n)
2497 R.v = intvar[F0];
2498
2499 L.i = awk_getline(X.rsm, R.v);
2500 if (L.i > 0) {
2501 if (!op1) {
2502 incvar(intvar[FNR]);
2503 incvar(intvar[NR]);
2504 }
2505 }
2506 setvar_i(res, L.i);
2507 break;
2508
2509 /* simple builtins */
2510 case XC( OC_FBLTIN ):
2511 switch (opn) {
2512
2513 case F_in:
2514 R.d = (int)L.d;
2515 break;
2516
2517 case F_rn:
2518 R.d = (double)rand() / (double)RAND_MAX;
2519 break;
2520 #if ENABLE_FEATURE_AWK_LIBM
2521 case F_co:
2522 R.d = cos(L.d);
2523 break;
2524
2525 case F_ex:
2526 R.d = exp(L.d);
2527 break;
2528
2529 case F_lg:
2530 R.d = log(L.d);
2531 break;
2532
2533 case F_si:
2534 R.d = sin(L.d);
2535 break;
2536
2537 case F_sq:
2538 R.d = sqrt(L.d);
2539 break;
2540 #else
2541 case F_co:
2542 case F_ex:
2543 case F_lg:
2544 case F_si:
2545 case F_sq:
2546 syntax_error(EMSG_NO_MATH);
2547 break;
2548 #endif
2549 case F_sr:
2550 R.d = (double)seed;
2551 seed = op1 ? (unsigned)L.d : (unsigned)time(NULL);
2552 srand(seed);
2553 break;
2554
2555 case F_ti:
2556 R.d = time(NULL);
2557 break;
2558
2559 case F_le:
2560 if (!op1)
2561 L.s = getvar_s(intvar[F0]);
2562 R.d = strlen(L.s);
2563 break;
2564
2565 case F_sy:
2566 fflush_all();
2567 R.d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2568 ? (system(L.s) >> 8) : 0;
2569 break;
2570
2571 case F_ff:
2572 if (!op1)
2573 fflush(stdout);
2574 else {
2575 if (L.s && *L.s) {
2576 X.rsm = newfile(L.s);
2577 fflush(X.rsm->F);
2578 } else {
2579 fflush_all();
2580 }
2581 }
2582 break;
2583
2584 case F_cl:
2585 X.rsm = (rstream *)hash_search(fdhash, L.s);
2586 if (X.rsm) {
2587 R.i = X.rsm->is_pipe ? pclose(X.rsm->F) : fclose(X.rsm->F);
2588 free(X.rsm->buffer);
2589 hash_remove(fdhash, L.s);
2590 }
2591 if (R.i != 0)
2592 setvar_i(intvar[ERRNO], errno);
2593 R.d = (double)R.i;
2594 break;
2595 }
2596 setvar_i(res, R.d);
2597 break;
2598
2599 case XC( OC_BUILTIN ):
2600 res = exec_builtin(op, res);
2601 break;
2602
2603 case XC( OC_SPRINTF ):
2604 setvar_p(res, awk_printf(op1));
2605 break;
2606
2607 case XC( OC_UNARY ):
2608 X.v = R.v;
2609 L.d = R.d = getvar_i(R.v);
2610 switch (opn) {
2611 case 'P':
2612 L.d = ++R.d;
2613 goto r_op_change;
2614 case 'p':
2615 R.d++;
2616 goto r_op_change;
2617 case 'M':
2618 L.d = --R.d;
2619 goto r_op_change;
2620 case 'm':
2621 R.d--;
2622 goto r_op_change;
2623 case '!':
2624 L.d = !istrue(X.v);
2625 break;
2626 case '-':
2627 L.d = -R.d;
2628 break;
2629 r_op_change:
2630 setvar_i(X.v, R.d);
2631 }
2632 setvar_i(res, L.d);
2633 break;
2634
2635 case XC( OC_FIELD ):
2636 R.i = (int)getvar_i(R.v);
2637 if (R.i == 0) {
2638 res = intvar[F0];
2639 } else {
2640 split_f0();
2641 if (R.i > nfields)
2642 fsrealloc(R.i);
2643 res = &Fields[R.i - 1];
2644 }
2645 break;
2646
2647 /* concatenation (" ") and index joining (",") */
2648 case XC( OC_CONCAT ):
2649 case XC( OC_COMMA ):
2650 opn = strlen(L.s) + strlen(R.s) + 2;
2651 X.s = xmalloc(opn);
2652 strcpy(X.s, L.s);
2653 if ((opinfo & OPCLSMASK) == OC_COMMA) {
2654 L.s = getvar_s(intvar[SUBSEP]);
2655 X.s = xrealloc(X.s, opn + strlen(L.s));
2656 strcat(X.s, L.s);
2657 }
2658 strcat(X.s, R.s);
2659 setvar_p(res, X.s);
2660 break;
2661
2662 case XC( OC_LAND ):
2663 setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2664 break;
2665
2666 case XC( OC_LOR ):
2667 setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2668 break;
2669
2670 case XC( OC_BINARY ):
2671 case XC( OC_REPLACE ):
2672 R.d = getvar_i(R.v);
2673 switch (opn) {
2674 case '+':
2675 L.d += R.d;
2676 break;
2677 case '-':
2678 L.d -= R.d;
2679 break;
2680 case '*':
2681 L.d *= R.d;
2682 break;
2683 case '/':
2684 if (R.d == 0)
2685 syntax_error(EMSG_DIV_BY_ZERO);
2686 L.d /= R.d;
2687 break;
2688 case '&':
2689 #if ENABLE_FEATURE_AWK_LIBM
2690 L.d = pow(L.d, R.d);
2691 #else
2692 syntax_error(EMSG_NO_MATH);
2693 #endif
2694 break;
2695 case '%':
2696 if (R.d == 0)
2697 syntax_error(EMSG_DIV_BY_ZERO);
2698 L.d -= (int)(L.d / R.d) * R.d;
2699 break;
2700 }
2701 res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : X.v, L.d);
2702 break;
2703
2704 case XC( OC_COMPARE ):
2705 if (is_numeric(L.v) && is_numeric(R.v)) {
2706 L.d = getvar_i(L.v) - getvar_i(R.v);
2707 } else {
2708 L.s = getvar_s(L.v);
2709 R.s = getvar_s(R.v);
2710 L.d = icase ? strcasecmp(L.s, R.s) : strcmp(L.s, R.s);
2711 }
2712 switch (opn & 0xfe) {
2713 case 0:
2714 R.i = (L.d > 0);
2715 break;
2716 case 2:
2717 R.i = (L.d >= 0);
2718 break;
2719 case 4:
2720 R.i = (L.d == 0);
2721 break;
2722 }
2723 setvar_i(res, (opn & 1 ? R.i : !R.i) ? 1 : 0);
2724 break;
2725
2726 default:
2727 syntax_error(EMSG_POSSIBLE_ERROR);
2728 }
2729 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2730 op = op->a.n;
2731 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2732 break;
2733 if (nextrec)
2734 break;
2735 }
2736 nvfree(v1);
2737 return res;
2738 #undef fnargs
2739 #undef seed
2740 #undef sreg
2741 }
2742
2743
2744 /* -------- main & co. -------- */
2745
2746 static int awk_exit(int r)
2747 {
2748 var tv;
2749 unsigned i;
2750 hash_item *hi;
2751
2752 zero_out_var(&tv);
2753
2754 if (!exiting) {
2755 exiting = TRUE;
2756 nextrec = FALSE;
2757 evaluate(endseq.first, &tv);
2758 }
2759
2760 /* waiting for children */
2761 for (i = 0; i < fdhash->csize; i++) {
2762 hi = fdhash->items[i];
2763 while (hi) {
2764 if (hi->data.rs.F && hi->data.rs.is_pipe)
2765 pclose(hi->data.rs.F);
2766 hi = hi->next;
2767 }
2768 }
2769
2770 exit(r);
2771 }
2772
2773 /* if expr looks like "var=value", perform assignment and return 1,
2774 * otherwise return 0 */
2775 static int is_assignment(const char *expr)
2776 {
2777 char *exprc, *s, *s0, *s1;
2778
2779 exprc = xstrdup(expr);
2780 if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
2781 free(exprc);
2782 return FALSE;
2783 }
2784
2785 *(s++) = '\0';
2786 s0 = s1 = s;
2787 while (*s)
2788 *(s1++) = nextchar(&s);
2789
2790 *s1 = '\0';
2791 setvar_u(newvar(exprc), s0);
2792 free(exprc);
2793 return TRUE;
2794 }
2795
2796 /* switch to next input file */
2797 static rstream *next_input_file(void)
2798 {
2799 #define rsm (G.next_input_file__rsm)
2800 #define files_happen (G.next_input_file__files_happen)
2801
2802 FILE *F = NULL;
2803 const char *fname, *ind;
2804
2805 if (rsm.F)
2806 fclose(rsm.F);
2807 rsm.F = NULL;
2808 rsm.pos = rsm.adv = 0;
2809
2810 do {
2811 if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
2812 if (files_happen)
2813 return NULL;
2814 fname = "-";
2815 F = stdin;
2816 } else {
2817 ind = getvar_s(incvar(intvar[ARGIND]));
2818 fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
2819 if (fname && *fname && !is_assignment(fname))
2820 F = xfopen_stdin(fname);
2821 }
2822 } while (!F);
2823
2824 files_happen = TRUE;
2825 setvar_s(intvar[FILENAME], fname);
2826 rsm.F = F;
2827 return &rsm;
2828 #undef rsm
2829 #undef files_happen
2830 }
2831
2832 int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
2833 int awk_main(int argc, char **argv)
2834 {
2835 unsigned opt;
2836 char *opt_F, *opt_W;
2837 llist_t *list_v = NULL;
2838 llist_t *list_f = NULL;
2839 int i, j;
2840 var *v;
2841 var tv;
2842 char **envp;
2843 char *vnames = (char *)vNames; /* cheat */
2844 char *vvalues = (char *)vValues;
2845
2846 INIT_G();
2847
2848 /* Undo busybox.c, or else strtod may eat ','! This breaks parsing:
2849 * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */
2850 if (ENABLE_LOCALE_SUPPORT)
2851 setlocale(LC_NUMERIC, "C");
2852
2853 zero_out_var(&tv);
2854
2855 /* allocate global buffer */
2856 g_buf = xmalloc(MAXVARFMT + 1);
2857
2858 vhash = hash_init();
2859 ahash = hash_init();
2860 fdhash = hash_init();
2861 fnhash = hash_init();
2862
2863 /* initialize variables */
2864 for (i = 0; *vnames; i++) {
2865 intvar[i] = v = newvar(nextword(&vnames));
2866 if (*vvalues != '\377')
2867 setvar_s(v, nextword(&vvalues));
2868 else
2869 setvar_i(v, 0);
2870
2871 if (*vnames == '*') {
2872 v->type |= VF_SPECIAL;
2873 vnames++;
2874 }
2875 }
2876
2877 handle_special(intvar[FS]);
2878 handle_special(intvar[RS]);
2879
2880 newfile("/dev/stdin")->F = stdin;
2881 newfile("/dev/stdout")->F = stdout;
2882 newfile("/dev/stderr")->F = stderr;
2883
2884 /* Huh, people report that sometimes environ is NULL. Oh well. */
2885 if (environ) for (envp = environ; *envp; envp++) {
2886 /* environ is writable, thus we don't strdup it needlessly */
2887 char *s = *envp;
2888 char *s1 = strchr(s, '=');
2889 if (s1) {
2890 *s1 = '\0';
2891 /* Both findvar and setvar_u take const char*
2892 * as 2nd arg -> environment is not trashed */
2893 setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
2894 *s1 = '=';
2895 }
2896 }
2897 opt_complementary = "v::f::"; /* -v and -f can occur multiple times */
2898 opt = getopt32(argv, "F:v:f:W:", &opt_F, &list_v, &list_f, &opt_W);
2899 argv += optind;
2900 argc -= optind;
2901 if (opt & 0x1)
2902 setvar_s(intvar[FS], opt_F); // -F
2903 while (list_v) { /* -v */
2904 if (!is_assignment(llist_pop(&list_v)))
2905 bb_show_usage();
2906 }
2907 if (list_f) { /* -f */
2908 do {
2909 char *s = NULL;
2910 FILE *from_file;
2911
2912 g_progname = llist_pop(&list_f);
2913 from_file = xfopen_stdin(g_progname);
2914 /* one byte is reserved for some trick in next_token */
2915 for (i = j = 1; j > 0; i += j) {
2916 s = xrealloc(s, i + 4096);
2917 j = fread(s + i, 1, 4094, from_file);
2918 }
2919 s[i] = '\0';
2920 fclose(from_file);
2921 parse_program(s + 1);
2922 free(s);
2923 } while (list_f);
2924 argc++;
2925 } else { // no -f: take program from 1st parameter
2926 if (!argc)
2927 bb_show_usage();
2928 g_progname = "cmd. line";
2929 parse_program(*argv++);
2930 }
2931 if (opt & 0x8) // -W
2932 bb_error_msg("warning: unrecognized option '-W %s' ignored", opt_W);
2933
2934 /* fill in ARGV array */
2935 setvar_i(intvar[ARGC], argc);
2936 setari_u(intvar[ARGV], 0, "awk");
2937 i = 0;
2938 while (*argv)
2939 setari_u(intvar[ARGV], ++i, *argv++);
2940
2941 evaluate(beginseq.first, &tv);
2942 if (!mainseq.first && !endseq.first)
2943 awk_exit(EXIT_SUCCESS);
2944
2945 /* input file could already be opened in BEGIN block */
2946 if (!iF)
2947 iF = next_input_file();
2948
2949 /* passing through input files */
2950 while (iF) {
2951 nextfile = FALSE;
2952 setvar_i(intvar[FNR], 0);
2953
2954 while ((i = awk_getline(iF, intvar[F0])) > 0) {
2955 nextrec = FALSE;
2956 incvar(intvar[NR]);
2957 incvar(intvar[FNR]);
2958 evaluate(mainseq.first, &tv);
2959
2960 if (nextfile)
2961 break;
2962 }
2963
2964 if (i < 0)
2965 syntax_error(strerror(errno));
2966
2967 iF = next_input_file();
2968 }
2969
2970 awk_exit(EXIT_SUCCESS);
2971 /*return 0;*/
2972 }