Magellan Linux

Contents of /trunk/mkinitrd-magellan/busybox/editors/awk.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 532 - (show annotations) (download)
Sat Sep 1 22:45:15 2007 UTC (16 years, 9 months ago) by niro
File MIME type: text/plain
File size: 59731 byte(s)
-import if magellan mkinitrd; it is a fork of redhats mkinitrd-5.0.8 with all magellan patches and features; deprecates magellan-src/mkinitrd

1 /* vi: set sw=4 ts=4: */
2 /*
3 * awk implementation for busybox
4 *
5 * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
6 *
7 * Licensed under the GPL v2 or later, see the file LICENSE in this tarball.
8 */
9
10 #include "busybox.h"
11 #include "xregex.h"
12 #include <math.h>
13
14
15 #define MAXVARFMT 240
16 #define MINNVBLOCK 64
17
18 /* variable flags */
19 #define VF_NUMBER 0x0001 /* 1 = primary type is number */
20 #define VF_ARRAY 0x0002 /* 1 = it's an array */
21
22 #define VF_CACHED 0x0100 /* 1 = num/str value has cached str/num eq */
23 #define VF_USER 0x0200 /* 1 = user input (may be numeric string) */
24 #define VF_SPECIAL 0x0400 /* 1 = requires extra handling when changed */
25 #define VF_WALK 0x0800 /* 1 = variable has alloc'd x.walker list */
26 #define VF_FSTR 0x1000 /* 1 = string points to fstring buffer */
27 #define VF_CHILD 0x2000 /* 1 = function arg; x.parent points to source */
28 #define VF_DIRTY 0x4000 /* 1 = variable was set explicitly */
29
30 /* these flags are static, don't change them when value is changed */
31 #define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
32
33 /* Variable */
34 typedef struct var_s {
35 unsigned short type; /* flags */
36 double number;
37 char *string;
38 union {
39 int aidx; /* func arg idx (for compilation stage) */
40 struct xhash_s *array; /* array ptr */
41 struct var_s *parent; /* for func args, ptr to actual parameter */
42 char **walker; /* list of array elements (for..in) */
43 } x;
44 } var;
45
46 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
47 typedef struct chain_s {
48 struct node_s *first;
49 struct node_s *last;
50 char *programname;
51 } chain;
52
53 /* Function */
54 typedef struct func_s {
55 unsigned short nargs;
56 struct chain_s body;
57 } func;
58
59 /* I/O stream */
60 typedef struct rstream_s {
61 FILE *F;
62 char *buffer;
63 int adv;
64 int size;
65 int pos;
66 unsigned short is_pipe;
67 } rstream;
68
69 typedef struct hash_item_s {
70 union {
71 struct var_s v; /* variable/array hash */
72 struct rstream_s rs; /* redirect streams hash */
73 struct func_s f; /* functions hash */
74 } data;
75 struct hash_item_s *next; /* next in chain */
76 char name[1]; /* really it's longer */
77 } hash_item;
78
79 typedef struct xhash_s {
80 unsigned nel; /* num of elements */
81 unsigned csize; /* current hash size */
82 unsigned nprime; /* next hash size in PRIMES[] */
83 unsigned glen; /* summary length of item names */
84 struct hash_item_s **items;
85 } xhash;
86
87 /* Tree node */
88 typedef struct node_s {
89 uint32_t info;
90 unsigned short lineno;
91 union {
92 struct node_s *n;
93 var *v;
94 int i;
95 char *s;
96 regex_t *re;
97 } l;
98 union {
99 struct node_s *n;
100 regex_t *ire;
101 func *f;
102 int argno;
103 } r;
104 union {
105 struct node_s *n;
106 } a;
107 } node;
108
109 /* Block of temporary variables */
110 typedef struct nvblock_s {
111 int size;
112 var *pos;
113 struct nvblock_s *prev;
114 struct nvblock_s *next;
115 var nv[0];
116 } nvblock;
117
118 typedef struct tsplitter_s {
119 node n;
120 regex_t re[2];
121 } tsplitter;
122
123 /* simple token classes */
124 /* Order and hex values are very important!!! See next_token() */
125 #define TC_SEQSTART 1 /* ( */
126 #define TC_SEQTERM (1 << 1) /* ) */
127 #define TC_REGEXP (1 << 2) /* /.../ */
128 #define TC_OUTRDR (1 << 3) /* | > >> */
129 #define TC_UOPPOST (1 << 4) /* unary postfix operator */
130 #define TC_UOPPRE1 (1 << 5) /* unary prefix operator */
131 #define TC_BINOPX (1 << 6) /* two-opnd operator */
132 #define TC_IN (1 << 7)
133 #define TC_COMMA (1 << 8)
134 #define TC_PIPE (1 << 9) /* input redirection pipe */
135 #define TC_UOPPRE2 (1 << 10) /* unary prefix operator */
136 #define TC_ARRTERM (1 << 11) /* ] */
137 #define TC_GRPSTART (1 << 12) /* { */
138 #define TC_GRPTERM (1 << 13) /* } */
139 #define TC_SEMICOL (1 << 14)
140 #define TC_NEWLINE (1 << 15)
141 #define TC_STATX (1 << 16) /* ctl statement (for, next...) */
142 #define TC_WHILE (1 << 17)
143 #define TC_ELSE (1 << 18)
144 #define TC_BUILTIN (1 << 19)
145 #define TC_GETLINE (1 << 20)
146 #define TC_FUNCDECL (1 << 21) /* `function' `func' */
147 #define TC_BEGIN (1 << 22)
148 #define TC_END (1 << 23)
149 #define TC_EOF (1 << 24)
150 #define TC_VARIABLE (1 << 25)
151 #define TC_ARRAY (1 << 26)
152 #define TC_FUNCTION (1 << 27)
153 #define TC_STRING (1 << 28)
154 #define TC_NUMBER (1 << 29)
155
156 #define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
157
158 /* combined token classes */
159 #define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
160 #define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
161 #define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION | \
162 TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
163
164 #define TC_STATEMNT (TC_STATX | TC_WHILE)
165 #define TC_OPTERM (TC_SEMICOL | TC_NEWLINE)
166
167 /* word tokens, cannot mean something else if not expected */
168 #define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN | \
169 TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
170
171 /* discard newlines after these */
172 #define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM | \
173 TC_BINOP | TC_OPTERM)
174
175 /* what can expression begin with */
176 #define TC_OPSEQ (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
177 /* what can group begin with */
178 #define TC_GRPSEQ (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
179
180 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
181 /* operator is inserted between them */
182 #define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM | \
183 TC_STRING | TC_NUMBER | TC_UOPPOST)
184 #define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
185
186 #define OF_RES1 0x010000
187 #define OF_RES2 0x020000
188 #define OF_STR1 0x040000
189 #define OF_STR2 0x080000
190 #define OF_NUM1 0x100000
191 #define OF_CHECKED 0x200000
192
193 /* combined operator flags */
194 #define xx 0
195 #define xV OF_RES2
196 #define xS (OF_RES2 | OF_STR2)
197 #define Vx OF_RES1
198 #define VV (OF_RES1 | OF_RES2)
199 #define Nx (OF_RES1 | OF_NUM1)
200 #define NV (OF_RES1 | OF_NUM1 | OF_RES2)
201 #define Sx (OF_RES1 | OF_STR1)
202 #define SV (OF_RES1 | OF_STR1 | OF_RES2)
203 #define SS (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
204
205 #define OPCLSMASK 0xFF00
206 #define OPNMASK 0x007F
207
208 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
209 * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
210 * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
211 */
212 #define P(x) (x << 24)
213 #define PRIMASK 0x7F000000
214 #define PRIMASK2 0x7E000000
215
216 /* Operation classes */
217
218 #define SHIFT_TIL_THIS 0x0600
219 #define RECUR_FROM_THIS 0x1000
220
221 enum {
222 OC_DELETE=0x0100, OC_EXEC=0x0200, OC_NEWSOURCE=0x0300,
223 OC_PRINT=0x0400, OC_PRINTF=0x0500, OC_WALKINIT=0x0600,
224
225 OC_BR=0x0700, OC_BREAK=0x0800, OC_CONTINUE=0x0900,
226 OC_EXIT=0x0a00, OC_NEXT=0x0b00, OC_NEXTFILE=0x0c00,
227 OC_TEST=0x0d00, OC_WALKNEXT=0x0e00,
228
229 OC_BINARY=0x1000, OC_BUILTIN=0x1100, OC_COLON=0x1200,
230 OC_COMMA=0x1300, OC_COMPARE=0x1400, OC_CONCAT=0x1500,
231 OC_FBLTIN=0x1600, OC_FIELD=0x1700, OC_FNARG=0x1800,
232 OC_FUNC=0x1900, OC_GETLINE=0x1a00, OC_IN=0x1b00,
233 OC_LAND=0x1c00, OC_LOR=0x1d00, OC_MATCH=0x1e00,
234 OC_MOVE=0x1f00, OC_PGETLINE=0x2000, OC_REGEXP=0x2100,
235 OC_REPLACE=0x2200, OC_RETURN=0x2300, OC_SPRINTF=0x2400,
236 OC_TERNARY=0x2500, OC_UNARY=0x2600, OC_VAR=0x2700,
237 OC_DONE=0x2800,
238
239 ST_IF=0x3000, ST_DO=0x3100, ST_FOR=0x3200,
240 ST_WHILE=0x3300
241 };
242
243 /* simple builtins */
244 enum {
245 F_in=0, F_rn, F_co, F_ex, F_lg, F_si, F_sq, F_sr,
246 F_ti, F_le, F_sy, F_ff, F_cl
247 };
248
249 /* builtins */
250 enum {
251 B_a2=0, B_ix, B_ma, B_sp, B_ss, B_ti, B_lo, B_up,
252 B_ge, B_gs, B_su,
253 B_an, B_co, B_ls, B_or, B_rs, B_xo,
254 };
255
256 /* tokens and their corresponding info values */
257
258 #define NTC "\377" /* switch to next token class (tc<<1) */
259 #define NTCC '\377'
260
261 #define OC_B OC_BUILTIN
262
263 static const char tokenlist[] =
264 "\1(" NTC
265 "\1)" NTC
266 "\1/" NTC /* REGEXP */
267 "\2>>" "\1>" "\1|" NTC /* OUTRDR */
268 "\2++" "\2--" NTC /* UOPPOST */
269 "\2++" "\2--" "\1$" NTC /* UOPPRE1 */
270 "\2==" "\1=" "\2+=" "\2-=" /* BINOPX */
271 "\2*=" "\2/=" "\2%=" "\2^="
272 "\1+" "\1-" "\3**=" "\2**"
273 "\1/" "\1%" "\1^" "\1*"
274 "\2!=" "\2>=" "\2<=" "\1>"
275 "\1<" "\2!~" "\1~" "\2&&"
276 "\2||" "\1?" "\1:" NTC
277 "\2in" NTC
278 "\1," NTC
279 "\1|" NTC
280 "\1+" "\1-" "\1!" NTC /* UOPPRE2 */
281 "\1]" NTC
282 "\1{" NTC
283 "\1}" NTC
284 "\1;" NTC
285 "\1\n" NTC
286 "\2if" "\2do" "\3for" "\5break" /* STATX */
287 "\10continue" "\6delete" "\5print"
288 "\6printf" "\4next" "\10nextfile"
289 "\6return" "\4exit" NTC
290 "\5while" NTC
291 "\4else" NTC
292
293 "\3and" "\5compl" "\6lshift" "\2or"
294 "\6rshift" "\3xor"
295 "\5close" "\6system" "\6fflush" "\5atan2" /* BUILTIN */
296 "\3cos" "\3exp" "\3int" "\3log"
297 "\4rand" "\3sin" "\4sqrt" "\5srand"
298 "\6gensub" "\4gsub" "\5index" "\6length"
299 "\5match" "\5split" "\7sprintf" "\3sub"
300 "\6substr" "\7systime" "\10strftime"
301 "\7tolower" "\7toupper" NTC
302 "\7getline" NTC
303 "\4func" "\10function" NTC
304 "\5BEGIN" NTC
305 "\3END" "\0"
306 ;
307
308 static const uint32_t tokeninfo[] = {
309 0,
310 0,
311 OC_REGEXP,
312 xS|'a', xS|'w', xS|'|',
313 OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m',
314 OC_UNARY|xV|P(9)|'P', OC_UNARY|xV|P(9)|'M',
315 OC_FIELD|xV|P(5),
316 OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74),
317 OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
318 OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/',
319 OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
320 OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-',
321 OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
322 OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%',
323 OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*',
324 OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3,
325 OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1,
326 OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!',
327 OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
328 OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?',
329 OC_COLON|xx|P(67)|':',
330 OC_IN|SV|P(49),
331 OC_COMMA|SS|P(80),
332 OC_PGETLINE|SV|P(37),
333 OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-',
334 OC_UNARY|xV|P(19)|'!',
335 0,
336 0,
337 0,
338 0,
339 0,
340 ST_IF, ST_DO, ST_FOR, OC_BREAK,
341 OC_CONTINUE, OC_DELETE|Vx, OC_PRINT,
342 OC_PRINTF, OC_NEXT, OC_NEXTFILE,
343 OC_RETURN|Vx, OC_EXIT|Nx,
344 ST_WHILE,
345 0,
346
347 OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
348 OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
349 OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
350 OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
351 OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
352 OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
353 OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6),
354 OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b),
355 OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
356 OC_GETLINE|SV|P(0),
357 0, 0,
358 0,
359 0
360 };
361
362 /* internal variable names and their initial values */
363 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
364 enum {
365 CONVFMT=0, OFMT, FS, OFS,
366 ORS, RS, RT, FILENAME,
367 SUBSEP, ARGIND, ARGC, ARGV,
368 ERRNO, FNR,
369 NR, NF, IGNORECASE,
370 ENVIRON, F0, _intvarcount_
371 };
372
373 static const char vNames[] =
374 "CONVFMT\0" "OFMT\0" "FS\0*" "OFS\0"
375 "ORS\0" "RS\0*" "RT\0" "FILENAME\0"
376 "SUBSEP\0" "ARGIND\0" "ARGC\0" "ARGV\0"
377 "ERRNO\0" "FNR\0"
378 "NR\0" "NF\0*" "IGNORECASE\0*"
379 "ENVIRON\0" "$\0*" "\0";
380
381 static const char vValues[] =
382 "%.6g\0" "%.6g\0" " \0" " \0"
383 "\n\0" "\n\0" "\0" "\0"
384 "\034\0"
385 "\377";
386
387 /* hash size may grow to these values */
388 #define FIRST_PRIME 61;
389 static const unsigned PRIMES[] = { 251, 1021, 4093, 16381, 65521 };
390 enum { NPRIMES = sizeof(PRIMES) / sizeof(unsigned) };
391
392 /* globals */
393
394 extern char **environ;
395
396 static var * V[_intvarcount_];
397 static chain beginseq, mainseq, endseq, *seq;
398 static int nextrec, nextfile;
399 static node *break_ptr, *continue_ptr;
400 static rstream *iF;
401 static xhash *vhash, *ahash, *fdhash, *fnhash;
402 static char *programname;
403 static short lineno;
404 static int is_f0_split;
405 static int nfields;
406 static var *Fields;
407 static tsplitter fsplitter, rsplitter;
408 static nvblock *cb;
409 static char *pos;
410 static char *buf;
411 static int icase;
412 static int exiting;
413
414 static struct {
415 uint32_t tclass;
416 uint32_t info;
417 char *string;
418 double number;
419 short lineno;
420 int rollback;
421 } t;
422
423 /* function prototypes */
424 static void handle_special(var *);
425 static node *parse_expr(uint32_t);
426 static void chain_group(void);
427 static var *evaluate(node *, var *);
428 static rstream *next_input_file(void);
429 static int fmt_num(char *, int, const char *, double, int);
430 static int awk_exit(int) ATTRIBUTE_NORETURN;
431
432 /* ---- error handling ---- */
433
434 static const char EMSG_INTERNAL_ERROR[] = "Internal error";
435 static const char EMSG_UNEXP_EOS[] = "Unexpected end of string";
436 static const char EMSG_UNEXP_TOKEN[] = "Unexpected token";
437 static const char EMSG_DIV_BY_ZERO[] = "Division by zero";
438 static const char EMSG_INV_FMT[] = "Invalid format specifier";
439 static const char EMSG_TOO_FEW_ARGS[] = "Too few arguments for builtin";
440 static const char EMSG_NOT_ARRAY[] = "Not an array";
441 static const char EMSG_POSSIBLE_ERROR[] = "Possible syntax error";
442 static const char EMSG_UNDEF_FUNC[] = "Call to undefined function";
443 #if !ENABLE_FEATURE_AWK_MATH
444 static const char EMSG_NO_MATH[] = "Math support is not compiled in";
445 #endif
446
447 static void zero_out_var(var * vp)
448 {
449 memset(vp, 0, sizeof(*vp));
450 }
451
452 static void syntax_error(const char * const message) ATTRIBUTE_NORETURN;
453 static void syntax_error(const char * const message)
454 {
455 bb_error_msg_and_die("%s:%i: %s", programname, lineno, message);
456 }
457
458 #define runtime_error(x) syntax_error(x)
459
460
461 /* ---- hash stuff ---- */
462
463 static unsigned hashidx(const char *name)
464 {
465 unsigned idx = 0;
466
467 while (*name) idx = *name++ + (idx << 6) - idx;
468 return idx;
469 }
470
471 /* create new hash */
472 static xhash *hash_init(void)
473 {
474 xhash *newhash;
475
476 newhash = xzalloc(sizeof(xhash));
477 newhash->csize = FIRST_PRIME;
478 newhash->items = xzalloc(newhash->csize * sizeof(hash_item *));
479
480 return newhash;
481 }
482
483 /* find item in hash, return ptr to data, NULL if not found */
484 static void *hash_search(xhash *hash, const char *name)
485 {
486 hash_item *hi;
487
488 hi = hash->items [ hashidx(name) % hash->csize ];
489 while (hi) {
490 if (strcmp(hi->name, name) == 0)
491 return &(hi->data);
492 hi = hi->next;
493 }
494 return NULL;
495 }
496
497 /* grow hash if it becomes too big */
498 static void hash_rebuild(xhash *hash)
499 {
500 unsigned newsize, i, idx;
501 hash_item **newitems, *hi, *thi;
502
503 if (hash->nprime == NPRIMES)
504 return;
505
506 newsize = PRIMES[hash->nprime++];
507 newitems = xzalloc(newsize * sizeof(hash_item *));
508
509 for (i=0; i<hash->csize; i++) {
510 hi = hash->items[i];
511 while (hi) {
512 thi = hi;
513 hi = thi->next;
514 idx = hashidx(thi->name) % newsize;
515 thi->next = newitems[idx];
516 newitems[idx] = thi;
517 }
518 }
519
520 free(hash->items);
521 hash->csize = newsize;
522 hash->items = newitems;
523 }
524
525 /* find item in hash, add it if necessary. Return ptr to data */
526 static void *hash_find(xhash *hash, const char *name)
527 {
528 hash_item *hi;
529 unsigned idx;
530 int l;
531
532 hi = hash_search(hash, name);
533 if (! hi) {
534 if (++hash->nel / hash->csize > 10)
535 hash_rebuild(hash);
536
537 l = strlen(name) + 1;
538 hi = xzalloc(sizeof(hash_item) + l);
539 memcpy(hi->name, name, l);
540
541 idx = hashidx(name) % hash->csize;
542 hi->next = hash->items[idx];
543 hash->items[idx] = hi;
544 hash->glen += l;
545 }
546 return &(hi->data);
547 }
548
549 #define findvar(hash, name) ((var*) hash_find((hash) , (name)))
550 #define newvar(name) ((var*) hash_find(vhash , (name)))
551 #define newfile(name) ((rstream*)hash_find(fdhash ,(name)))
552 #define newfunc(name) ((func*) hash_find(fnhash , (name)))
553
554 static void hash_remove(xhash *hash, const char *name)
555 {
556 hash_item *hi, **phi;
557
558 phi = &(hash->items[ hashidx(name) % hash->csize ]);
559 while (*phi) {
560 hi = *phi;
561 if (strcmp(hi->name, name) == 0) {
562 hash->glen -= (strlen(name) + 1);
563 hash->nel--;
564 *phi = hi->next;
565 free(hi);
566 break;
567 }
568 phi = &(hi->next);
569 }
570 }
571
572 /* ------ some useful functions ------ */
573
574 static void skip_spaces(char **s)
575 {
576 char *p = *s;
577
578 while (*p == ' ' || *p == '\t' ||
579 (*p == '\\' && *(p+1) == '\n' && (++p, ++t.lineno))) {
580 p++;
581 }
582 *s = p;
583 }
584
585 static char *nextword(char **s)
586 {
587 char *p = *s;
588
589 while (*(*s)++) /* */;
590
591 return p;
592 }
593
594 static char nextchar(char **s)
595 {
596 char c, *pps;
597
598 c = *((*s)++);
599 pps = *s;
600 if (c == '\\') c = bb_process_escape_sequence((const char**)s);
601 if (c == '\\' && *s == pps) c = *((*s)++);
602 return c;
603 }
604
605 static int ATTRIBUTE_ALWAYS_INLINE isalnum_(int c)
606 {
607 return (isalnum(c) || c == '_');
608 }
609
610 static FILE *afopen(const char *path, const char *mode)
611 {
612 return (*path == '-' && *(path+1) == '\0') ? stdin : xfopen(path, mode);
613 }
614
615 /* -------- working with variables (set/get/copy/etc) -------- */
616
617 static xhash *iamarray(var *v)
618 {
619 var *a = v;
620
621 while (a->type & VF_CHILD)
622 a = a->x.parent;
623
624 if (! (a->type & VF_ARRAY)) {
625 a->type |= VF_ARRAY;
626 a->x.array = hash_init();
627 }
628 return a->x.array;
629 }
630
631 static void clear_array(xhash *array)
632 {
633 unsigned i;
634 hash_item *hi, *thi;
635
636 for (i=0; i<array->csize; i++) {
637 hi = array->items[i];
638 while (hi) {
639 thi = hi;
640 hi = hi->next;
641 free(thi->data.v.string);
642 free(thi);
643 }
644 array->items[i] = NULL;
645 }
646 array->glen = array->nel = 0;
647 }
648
649 /* clear a variable */
650 static var *clrvar(var *v)
651 {
652 if (!(v->type & VF_FSTR))
653 free(v->string);
654
655 v->type &= VF_DONTTOUCH;
656 v->type |= VF_DIRTY;
657 v->string = NULL;
658 return v;
659 }
660
661 /* assign string value to variable */
662 static var *setvar_p(var *v, char *value)
663 {
664 clrvar(v);
665 v->string = value;
666 handle_special(v);
667
668 return v;
669 }
670
671 /* same as setvar_p but make a copy of string */
672 static var *setvar_s(var *v, const char *value)
673 {
674 return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
675 }
676
677 /* same as setvar_s but set USER flag */
678 static var *setvar_u(var *v, const char *value)
679 {
680 setvar_s(v, value);
681 v->type |= VF_USER;
682 return v;
683 }
684
685 /* set array element to user string */
686 static void setari_u(var *a, int idx, const char *s)
687 {
688 var *v;
689 static char sidx[12];
690
691 sprintf(sidx, "%d", idx);
692 v = findvar(iamarray(a), sidx);
693 setvar_u(v, s);
694 }
695
696 /* assign numeric value to variable */
697 static var *setvar_i(var *v, double value)
698 {
699 clrvar(v);
700 v->type |= VF_NUMBER;
701 v->number = value;
702 handle_special(v);
703 return v;
704 }
705
706 static char *getvar_s(var *v)
707 {
708 /* if v is numeric and has no cached string, convert it to string */
709 if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
710 fmt_num(buf, MAXVARFMT, getvar_s(V[CONVFMT]), v->number, TRUE);
711 v->string = xstrdup(buf);
712 v->type |= VF_CACHED;
713 }
714 return (v->string == NULL) ? "" : v->string;
715 }
716
717 static double getvar_i(var *v)
718 {
719 char *s;
720
721 if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
722 v->number = 0;
723 s = v->string;
724 if (s && *s) {
725 v->number = strtod(s, &s);
726 if (v->type & VF_USER) {
727 skip_spaces(&s);
728 if (*s != '\0')
729 v->type &= ~VF_USER;
730 }
731 } else {
732 v->type &= ~VF_USER;
733 }
734 v->type |= VF_CACHED;
735 }
736 return v->number;
737 }
738
739 static var *copyvar(var *dest, const var *src)
740 {
741 if (dest != src) {
742 clrvar(dest);
743 dest->type |= (src->type & ~VF_DONTTOUCH);
744 dest->number = src->number;
745 if (src->string)
746 dest->string = xstrdup(src->string);
747 }
748 handle_special(dest);
749 return dest;
750 }
751
752 static var *incvar(var *v)
753 {
754 return setvar_i(v, getvar_i(v)+1.);
755 }
756
757 /* return true if v is number or numeric string */
758 static int is_numeric(var *v)
759 {
760 getvar_i(v);
761 return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
762 }
763
764 /* return 1 when value of v corresponds to true, 0 otherwise */
765 static int istrue(var *v)
766 {
767 if (is_numeric(v))
768 return (v->number == 0) ? 0 : 1;
769 else
770 return (v->string && *(v->string)) ? 1 : 0;
771 }
772
773 /* temporary variables allocator. Last allocated should be first freed */
774 static var *nvalloc(int n)
775 {
776 nvblock *pb = NULL;
777 var *v, *r;
778 int size;
779
780 while (cb) {
781 pb = cb;
782 if ((cb->pos - cb->nv) + n <= cb->size) break;
783 cb = cb->next;
784 }
785
786 if (! cb) {
787 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
788 cb = xmalloc(sizeof(nvblock) + size * sizeof(var));
789 cb->size = size;
790 cb->pos = cb->nv;
791 cb->prev = pb;
792 cb->next = NULL;
793 if (pb) pb->next = cb;
794 }
795
796 v = r = cb->pos;
797 cb->pos += n;
798
799 while (v < cb->pos) {
800 v->type = 0;
801 v->string = NULL;
802 v++;
803 }
804
805 return r;
806 }
807
808 static void nvfree(var *v)
809 {
810 var *p;
811
812 if (v < cb->nv || v >= cb->pos)
813 runtime_error(EMSG_INTERNAL_ERROR);
814
815 for (p=v; p<cb->pos; p++) {
816 if ((p->type & (VF_ARRAY|VF_CHILD)) == VF_ARRAY) {
817 clear_array(iamarray(p));
818 free(p->x.array->items);
819 free(p->x.array);
820 }
821 if (p->type & VF_WALK)
822 free(p->x.walker);
823
824 clrvar(p);
825 }
826
827 cb->pos = v;
828 while (cb->prev && cb->pos == cb->nv) {
829 cb = cb->prev;
830 }
831 }
832
833 /* ------- awk program text parsing ------- */
834
835 /* Parse next token pointed by global pos, place results into global t.
836 * If token isn't expected, give away. Return token class
837 */
838 static uint32_t next_token(uint32_t expected)
839 {
840 static int concat_inserted;
841 static uint32_t save_tclass, save_info;
842 static uint32_t ltclass = TC_OPTERM;
843
844 char *p, *pp, *s;
845 const char *tl;
846 uint32_t tc;
847 const uint32_t *ti;
848 int l;
849
850 if (t.rollback) {
851 t.rollback = FALSE;
852
853 } else if (concat_inserted) {
854 concat_inserted = FALSE;
855 t.tclass = save_tclass;
856 t.info = save_info;
857
858 } else {
859 p = pos;
860 readnext:
861 skip_spaces(&p);
862 lineno = t.lineno;
863 if (*p == '#')
864 while (*p != '\n' && *p != '\0') p++;
865
866 if (*p == '\n')
867 t.lineno++;
868
869 if (*p == '\0') {
870 tc = TC_EOF;
871
872 } else if (*p == '\"') {
873 /* it's a string */
874 t.string = s = ++p;
875 while (*p != '\"') {
876 if (*p == '\0' || *p == '\n')
877 syntax_error(EMSG_UNEXP_EOS);
878 *(s++) = nextchar(&p);
879 }
880 p++;
881 *s = '\0';
882 tc = TC_STRING;
883
884 } else if ((expected & TC_REGEXP) && *p == '/') {
885 /* it's regexp */
886 t.string = s = ++p;
887 while (*p != '/') {
888 if (*p == '\0' || *p == '\n')
889 syntax_error(EMSG_UNEXP_EOS);
890 if ((*s++ = *p++) == '\\') {
891 pp = p;
892 *(s-1) = bb_process_escape_sequence((const char **)&p);
893 if (*pp == '\\') *s++ = '\\';
894 if (p == pp) *s++ = *p++;
895 }
896 }
897 p++;
898 *s = '\0';
899 tc = TC_REGEXP;
900
901 } else if (*p == '.' || isdigit(*p)) {
902 /* it's a number */
903 t.number = strtod(p, &p);
904 if (*p == '.')
905 syntax_error(EMSG_UNEXP_TOKEN);
906 tc = TC_NUMBER;
907
908 } else {
909 /* search for something known */
910 tl = tokenlist;
911 tc = 0x00000001;
912 ti = tokeninfo;
913 while (*tl) {
914 l = *(tl++);
915 if (l == NTCC) {
916 tc <<= 1;
917 continue;
918 }
919 /* if token class is expected, token
920 * matches and it's not a longer word,
921 * then this is what we are looking for
922 */
923 if ((tc & (expected | TC_WORD | TC_NEWLINE)) &&
924 *tl == *p && strncmp(p, tl, l) == 0 &&
925 !((tc & TC_WORD) && isalnum_(*(p + l)))) {
926 t.info = *ti;
927 p += l;
928 break;
929 }
930 ti++;
931 tl += l;
932 }
933
934 if (!*tl) {
935 /* it's a name (var/array/function),
936 * otherwise it's something wrong
937 */
938 if (!isalnum_(*p))
939 syntax_error(EMSG_UNEXP_TOKEN);
940
941 t.string = --p;
942 while (isalnum_(*(++p))) {
943 *(p-1) = *p;
944 }
945 *(p-1) = '\0';
946 tc = TC_VARIABLE;
947 /* also consume whitespace between functionname and bracket */
948 if (!(expected & TC_VARIABLE)) skip_spaces(&p);
949 if (*p == '(') {
950 tc = TC_FUNCTION;
951 } else {
952 if (*p == '[') {
953 p++;
954 tc = TC_ARRAY;
955 }
956 }
957 }
958 }
959 pos = p;
960
961 /* skipping newlines in some cases */
962 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
963 goto readnext;
964
965 /* insert concatenation operator when needed */
966 if ((ltclass&TC_CONCAT1) && (tc&TC_CONCAT2) && (expected&TC_BINOP)) {
967 concat_inserted = TRUE;
968 save_tclass = tc;
969 save_info = t.info;
970 tc = TC_BINOP;
971 t.info = OC_CONCAT | SS | P(35);
972 }
973
974 t.tclass = tc;
975 }
976 ltclass = t.tclass;
977
978 /* Are we ready for this? */
979 if (! (ltclass & expected))
980 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
981 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
982
983 return ltclass;
984 }
985
986 static void rollback_token(void) { t.rollback = TRUE; }
987
988 static node *new_node(uint32_t info)
989 {
990 node *n;
991
992 n = xzalloc(sizeof(node));
993 n->info = info;
994 n->lineno = lineno;
995 return n;
996 }
997
998 static node *mk_re_node(char *s, node *n, regex_t *re)
999 {
1000 n->info = OC_REGEXP;
1001 n->l.re = re;
1002 n->r.ire = re + 1;
1003 xregcomp(re, s, REG_EXTENDED);
1004 xregcomp(re+1, s, REG_EXTENDED | REG_ICASE);
1005
1006 return n;
1007 }
1008
1009 static node *condition(void)
1010 {
1011 next_token(TC_SEQSTART);
1012 return parse_expr(TC_SEQTERM);
1013 }
1014
1015 /* parse expression terminated by given argument, return ptr
1016 * to built subtree. Terminator is eaten by parse_expr */
1017 static node *parse_expr(uint32_t iexp)
1018 {
1019 node sn;
1020 node *cn = &sn;
1021 node *vn, *glptr;
1022 uint32_t tc, xtc;
1023 var *v;
1024
1025 sn.info = PRIMASK;
1026 sn.r.n = glptr = NULL;
1027 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1028
1029 while (! ((tc = next_token(xtc)) & iexp)) {
1030 if (glptr && (t.info == (OC_COMPARE|VV|P(39)|2))) {
1031 /* input redirection (<) attached to glptr node */
1032 cn = glptr->l.n = new_node(OC_CONCAT|SS|P(37));
1033 cn->a.n = glptr;
1034 xtc = TC_OPERAND | TC_UOPPRE;
1035 glptr = NULL;
1036
1037 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1038 /* for binary and postfix-unary operators, jump back over
1039 * previous operators with higher priority */
1040 vn = cn;
1041 while ( ((t.info & PRIMASK) > (vn->a.n->info & PRIMASK2)) ||
1042 ((t.info == vn->info) && ((t.info & OPCLSMASK) == OC_COLON)) )
1043 vn = vn->a.n;
1044 if ((t.info & OPCLSMASK) == OC_TERNARY)
1045 t.info += P(6);
1046 cn = vn->a.n->r.n = new_node(t.info);
1047 cn->a.n = vn->a.n;
1048 if (tc & TC_BINOP) {
1049 cn->l.n = vn;
1050 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1051 if ((t.info & OPCLSMASK) == OC_PGETLINE) {
1052 /* it's a pipe */
1053 next_token(TC_GETLINE);
1054 /* give maximum priority to this pipe */
1055 cn->info &= ~PRIMASK;
1056 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1057 }
1058 } else {
1059 cn->r.n = vn;
1060 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1061 }
1062 vn->a.n = cn;
1063
1064 } else {
1065 /* for operands and prefix-unary operators, attach them
1066 * to last node */
1067 vn = cn;
1068 cn = vn->r.n = new_node(t.info);
1069 cn->a.n = vn;
1070 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1071 if (tc & (TC_OPERAND | TC_REGEXP)) {
1072 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1073 /* one should be very careful with switch on tclass -
1074 * only simple tclasses should be used! */
1075 switch (tc) {
1076 case TC_VARIABLE:
1077 case TC_ARRAY:
1078 cn->info = OC_VAR;
1079 if ((v = hash_search(ahash, t.string)) != NULL) {
1080 cn->info = OC_FNARG;
1081 cn->l.i = v->x.aidx;
1082 } else {
1083 cn->l.v = newvar(t.string);
1084 }
1085 if (tc & TC_ARRAY) {
1086 cn->info |= xS;
1087 cn->r.n = parse_expr(TC_ARRTERM);
1088 }
1089 break;
1090
1091 case TC_NUMBER:
1092 case TC_STRING:
1093 cn->info = OC_VAR;
1094 v = cn->l.v = xzalloc(sizeof(var));
1095 if (tc & TC_NUMBER)
1096 setvar_i(v, t.number);
1097 else
1098 setvar_s(v, t.string);
1099 break;
1100
1101 case TC_REGEXP:
1102 mk_re_node(t.string, cn, xzalloc(sizeof(regex_t)*2));
1103 break;
1104
1105 case TC_FUNCTION:
1106 cn->info = OC_FUNC;
1107 cn->r.f = newfunc(t.string);
1108 cn->l.n = condition();
1109 break;
1110
1111 case TC_SEQSTART:
1112 cn = vn->r.n = parse_expr(TC_SEQTERM);
1113 cn->a.n = vn;
1114 break;
1115
1116 case TC_GETLINE:
1117 glptr = cn;
1118 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1119 break;
1120
1121 case TC_BUILTIN:
1122 cn->l.n = condition();
1123 break;
1124 }
1125 }
1126 }
1127 }
1128 return sn.r.n;
1129 }
1130
1131 /* add node to chain. Return ptr to alloc'd node */
1132 static node *chain_node(uint32_t info)
1133 {
1134 node *n;
1135
1136 if (! seq->first)
1137 seq->first = seq->last = new_node(0);
1138
1139 if (seq->programname != programname) {
1140 seq->programname = programname;
1141 n = chain_node(OC_NEWSOURCE);
1142 n->l.s = xstrdup(programname);
1143 }
1144
1145 n = seq->last;
1146 n->info = info;
1147 seq->last = n->a.n = new_node(OC_DONE);
1148
1149 return n;
1150 }
1151
1152 static void chain_expr(uint32_t info)
1153 {
1154 node *n;
1155
1156 n = chain_node(info);
1157 n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1158 if (t.tclass & TC_GRPTERM)
1159 rollback_token();
1160 }
1161
1162 static node *chain_loop(node *nn)
1163 {
1164 node *n, *n2, *save_brk, *save_cont;
1165
1166 save_brk = break_ptr;
1167 save_cont = continue_ptr;
1168
1169 n = chain_node(OC_BR | Vx);
1170 continue_ptr = new_node(OC_EXEC);
1171 break_ptr = new_node(OC_EXEC);
1172 chain_group();
1173 n2 = chain_node(OC_EXEC | Vx);
1174 n2->l.n = nn;
1175 n2->a.n = n;
1176 continue_ptr->a.n = n2;
1177 break_ptr->a.n = n->r.n = seq->last;
1178
1179 continue_ptr = save_cont;
1180 break_ptr = save_brk;
1181
1182 return n;
1183 }
1184
1185 /* parse group and attach it to chain */
1186 static void chain_group(void)
1187 {
1188 uint32_t c;
1189 node *n, *n2, *n3;
1190
1191 do {
1192 c = next_token(TC_GRPSEQ);
1193 } while (c & TC_NEWLINE);
1194
1195 if (c & TC_GRPSTART) {
1196 while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1197 if (t.tclass & TC_NEWLINE) continue;
1198 rollback_token();
1199 chain_group();
1200 }
1201 } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1202 rollback_token();
1203 chain_expr(OC_EXEC | Vx);
1204 } else { /* TC_STATEMNT */
1205 switch (t.info & OPCLSMASK) {
1206 case ST_IF:
1207 n = chain_node(OC_BR | Vx);
1208 n->l.n = condition();
1209 chain_group();
1210 n2 = chain_node(OC_EXEC);
1211 n->r.n = seq->last;
1212 if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE)==TC_ELSE) {
1213 chain_group();
1214 n2->a.n = seq->last;
1215 } else {
1216 rollback_token();
1217 }
1218 break;
1219
1220 case ST_WHILE:
1221 n2 = condition();
1222 n = chain_loop(NULL);
1223 n->l.n = n2;
1224 break;
1225
1226 case ST_DO:
1227 n2 = chain_node(OC_EXEC);
1228 n = chain_loop(NULL);
1229 n2->a.n = n->a.n;
1230 next_token(TC_WHILE);
1231 n->l.n = condition();
1232 break;
1233
1234 case ST_FOR:
1235 next_token(TC_SEQSTART);
1236 n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1237 if (t.tclass & TC_SEQTERM) { /* for-in */
1238 if ((n2->info & OPCLSMASK) != OC_IN)
1239 syntax_error(EMSG_UNEXP_TOKEN);
1240 n = chain_node(OC_WALKINIT | VV);
1241 n->l.n = n2->l.n;
1242 n->r.n = n2->r.n;
1243 n = chain_loop(NULL);
1244 n->info = OC_WALKNEXT | Vx;
1245 n->l.n = n2->l.n;
1246 } else { /* for (;;) */
1247 n = chain_node(OC_EXEC | Vx);
1248 n->l.n = n2;
1249 n2 = parse_expr(TC_SEMICOL);
1250 n3 = parse_expr(TC_SEQTERM);
1251 n = chain_loop(n3);
1252 n->l.n = n2;
1253 if (! n2)
1254 n->info = OC_EXEC;
1255 }
1256 break;
1257
1258 case OC_PRINT:
1259 case OC_PRINTF:
1260 n = chain_node(t.info);
1261 n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1262 if (t.tclass & TC_OUTRDR) {
1263 n->info |= t.info;
1264 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1265 }
1266 if (t.tclass & TC_GRPTERM)
1267 rollback_token();
1268 break;
1269
1270 case OC_BREAK:
1271 n = chain_node(OC_EXEC);
1272 n->a.n = break_ptr;
1273 break;
1274
1275 case OC_CONTINUE:
1276 n = chain_node(OC_EXEC);
1277 n->a.n = continue_ptr;
1278 break;
1279
1280 /* delete, next, nextfile, return, exit */
1281 default:
1282 chain_expr(t.info);
1283 }
1284 }
1285 }
1286
1287 static void parse_program(char *p)
1288 {
1289 uint32_t tclass;
1290 node *cn;
1291 func *f;
1292 var *v;
1293
1294 pos = p;
1295 t.lineno = 1;
1296 while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1297 TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1298
1299 if (tclass & TC_OPTERM)
1300 continue;
1301
1302 seq = &mainseq;
1303 if (tclass & TC_BEGIN) {
1304 seq = &beginseq;
1305 chain_group();
1306
1307 } else if (tclass & TC_END) {
1308 seq = &endseq;
1309 chain_group();
1310
1311 } else if (tclass & TC_FUNCDECL) {
1312 next_token(TC_FUNCTION);
1313 pos++;
1314 f = newfunc(t.string);
1315 f->body.first = NULL;
1316 f->nargs = 0;
1317 while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1318 v = findvar(ahash, t.string);
1319 v->x.aidx = (f->nargs)++;
1320
1321 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1322 break;
1323 }
1324 seq = &(f->body);
1325 chain_group();
1326 clear_array(ahash);
1327
1328 } else if (tclass & TC_OPSEQ) {
1329 rollback_token();
1330 cn = chain_node(OC_TEST);
1331 cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1332 if (t.tclass & TC_GRPSTART) {
1333 rollback_token();
1334 chain_group();
1335 } else {
1336 chain_node(OC_PRINT);
1337 }
1338 cn->r.n = mainseq.last;
1339
1340 } else /* if (tclass & TC_GRPSTART) */ {
1341 rollback_token();
1342 chain_group();
1343 }
1344 }
1345 }
1346
1347
1348 /* -------- program execution part -------- */
1349
1350 static node *mk_splitter(char *s, tsplitter *spl)
1351 {
1352 regex_t *re, *ire;
1353 node *n;
1354
1355 re = &spl->re[0];
1356 ire = &spl->re[1];
1357 n = &spl->n;
1358 if ((n->info & OPCLSMASK) == OC_REGEXP) {
1359 regfree(re);
1360 regfree(ire);
1361 }
1362 if (strlen(s) > 1) {
1363 mk_re_node(s, n, re);
1364 } else {
1365 n->info = (uint32_t) *s;
1366 }
1367
1368 return n;
1369 }
1370
1371 /* use node as a regular expression. Supplied with node ptr and regex_t
1372 * storage space. Return ptr to regex (if result points to preg, it should
1373 * be later regfree'd manually
1374 */
1375 static regex_t *as_regex(node *op, regex_t *preg)
1376 {
1377 var *v;
1378 char *s;
1379
1380 if ((op->info & OPCLSMASK) == OC_REGEXP) {
1381 return icase ? op->r.ire : op->l.re;
1382 } else {
1383 v = nvalloc(1);
1384 s = getvar_s(evaluate(op, v));
1385 xregcomp(preg, s, icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED);
1386 nvfree(v);
1387 return preg;
1388 }
1389 }
1390
1391 /* gradually increasing buffer */
1392 static void qrealloc(char **b, int n, int *size)
1393 {
1394 if (!*b || n >= *size)
1395 *b = xrealloc(*b, *size = n + (n>>1) + 80);
1396 }
1397
1398 /* resize field storage space */
1399 static void fsrealloc(int size)
1400 {
1401 static int maxfields; /* = 0;*/
1402 int i;
1403
1404 if (size >= maxfields) {
1405 i = maxfields;
1406 maxfields = size + 16;
1407 Fields = xrealloc(Fields, maxfields * sizeof(var));
1408 for (; i < maxfields; i++) {
1409 Fields[i].type = VF_SPECIAL;
1410 Fields[i].string = NULL;
1411 }
1412 }
1413
1414 if (size < nfields) {
1415 for (i = size; i < nfields; i++) {
1416 clrvar(Fields + i);
1417 }
1418 }
1419 nfields = size;
1420 }
1421
1422 static int awk_split(char *s, node *spl, char **slist)
1423 {
1424 int l, n = 0;
1425 char c[4];
1426 char *s1;
1427 regmatch_t pmatch[2];
1428
1429 /* in worst case, each char would be a separate field */
1430 *slist = s1 = xstrndup(s, strlen(s) * 2 + 3);
1431
1432 c[0] = c[1] = (char)spl->info;
1433 c[2] = c[3] = '\0';
1434 if (*getvar_s(V[RS]) == '\0') c[2] = '\n';
1435
1436 if ((spl->info & OPCLSMASK) == OC_REGEXP) { /* regex split */
1437 while (*s) {
1438 l = strcspn(s, c+2);
1439 if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0 &&
1440 pmatch[0].rm_so <= l) {
1441 l = pmatch[0].rm_so;
1442 if (pmatch[0].rm_eo == 0) { l++; pmatch[0].rm_eo++; }
1443 } else {
1444 pmatch[0].rm_eo = l;
1445 if (s[l]) pmatch[0].rm_eo++;
1446 }
1447
1448 memcpy(s1, s, l);
1449 s1[l] = '\0';
1450 nextword(&s1);
1451 s += pmatch[0].rm_eo;
1452 n++;
1453 }
1454 } else if (c[0] == '\0') { /* null split */
1455 while (*s) {
1456 *s1++ = *s++;
1457 *s1++ = '\0';
1458 n++;
1459 }
1460 } else if (c[0] != ' ') { /* single-character split */
1461 if (icase) {
1462 c[0] = toupper(c[0]);
1463 c[1] = tolower(c[1]);
1464 }
1465 if (*s1) n++;
1466 while ((s1 = strpbrk(s1, c))) {
1467 *s1++ = '\0';
1468 n++;
1469 }
1470 } else { /* space split */
1471 while (*s) {
1472 s = skip_whitespace(s);
1473 if (!*s) break;
1474 n++;
1475 while (*s && !isspace(*s))
1476 *s1++ = *s++;
1477 *s1++ = '\0';
1478 }
1479 }
1480 return n;
1481 }
1482
1483 static void split_f0(void)
1484 {
1485 static char *fstrings = NULL;
1486 int i, n;
1487 char *s;
1488
1489 if (is_f0_split)
1490 return;
1491
1492 is_f0_split = TRUE;
1493 free(fstrings);
1494 fsrealloc(0);
1495 n = awk_split(getvar_s(V[F0]), &fsplitter.n, &fstrings);
1496 fsrealloc(n);
1497 s = fstrings;
1498 for (i = 0; i < n; i++) {
1499 Fields[i].string = nextword(&s);
1500 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1501 }
1502
1503 /* set NF manually to avoid side effects */
1504 clrvar(V[NF]);
1505 V[NF]->type = VF_NUMBER | VF_SPECIAL;
1506 V[NF]->number = nfields;
1507 }
1508
1509 /* perform additional actions when some internal variables changed */
1510 static void handle_special(var *v)
1511 {
1512 int n;
1513 char *b, *sep, *s;
1514 int sl, l, len, i, bsize;
1515
1516 if (!(v->type & VF_SPECIAL))
1517 return;
1518
1519 if (v == V[NF]) {
1520 n = (int)getvar_i(v);
1521 fsrealloc(n);
1522
1523 /* recalculate $0 */
1524 sep = getvar_s(V[OFS]);
1525 sl = strlen(sep);
1526 b = NULL;
1527 len = 0;
1528 for (i=0; i<n; i++) {
1529 s = getvar_s(&Fields[i]);
1530 l = strlen(s);
1531 if (b) {
1532 memcpy(b+len, sep, sl);
1533 len += sl;
1534 }
1535 qrealloc(&b, len+l+sl, &bsize);
1536 memcpy(b+len, s, l);
1537 len += l;
1538 }
1539 if (b)
1540 b[len] = '\0';
1541 setvar_p(V[F0], b);
1542 is_f0_split = TRUE;
1543
1544 } else if (v == V[F0]) {
1545 is_f0_split = FALSE;
1546
1547 } else if (v == V[FS]) {
1548 mk_splitter(getvar_s(v), &fsplitter);
1549
1550 } else if (v == V[RS]) {
1551 mk_splitter(getvar_s(v), &rsplitter);
1552
1553 } else if (v == V[IGNORECASE]) {
1554 icase = istrue(v);
1555
1556 } else { /* $n */
1557 n = getvar_i(V[NF]);
1558 setvar_i(V[NF], n > v-Fields ? n : v-Fields+1);
1559 /* right here v is invalid. Just to note... */
1560 }
1561 }
1562
1563 /* step through func/builtin/etc arguments */
1564 static node *nextarg(node **pn)
1565 {
1566 node *n;
1567
1568 n = *pn;
1569 if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1570 *pn = n->r.n;
1571 n = n->l.n;
1572 } else {
1573 *pn = NULL;
1574 }
1575 return n;
1576 }
1577
1578 static void hashwalk_init(var *v, xhash *array)
1579 {
1580 char **w;
1581 hash_item *hi;
1582 int i;
1583
1584 if (v->type & VF_WALK)
1585 free(v->x.walker);
1586
1587 v->type |= VF_WALK;
1588 w = v->x.walker = xzalloc(2 + 2*sizeof(char *) + array->glen);
1589 *w = *(w+1) = (char *)(w + 2);
1590 for (i=0; i<array->csize; i++) {
1591 hi = array->items[i];
1592 while (hi) {
1593 strcpy(*w, hi->name);
1594 nextword(w);
1595 hi = hi->next;
1596 }
1597 }
1598 }
1599
1600 static int hashwalk_next(var *v)
1601 {
1602 char **w;
1603
1604 w = v->x.walker;
1605 if (*(w+1) == *w)
1606 return FALSE;
1607
1608 setvar_s(v, nextword(w+1));
1609 return TRUE;
1610 }
1611
1612 /* evaluate node, return 1 when result is true, 0 otherwise */
1613 static int ptest(node *pattern)
1614 {
1615 static var v; /* static: to save stack space? */
1616
1617 return istrue(evaluate(pattern, &v));
1618 }
1619
1620 /* read next record from stream rsm into a variable v */
1621 static int awk_getline(rstream *rsm, var *v)
1622 {
1623 char *b;
1624 regmatch_t pmatch[2];
1625 int a, p, pp=0, size;
1626 int fd, so, eo, r, rp;
1627 char c, *m, *s;
1628
1629 /* we're using our own buffer since we need access to accumulating
1630 * characters
1631 */
1632 fd = fileno(rsm->F);
1633 m = rsm->buffer;
1634 a = rsm->adv;
1635 p = rsm->pos;
1636 size = rsm->size;
1637 c = (char) rsplitter.n.info;
1638 rp = 0;
1639
1640 if (! m) qrealloc(&m, 256, &size);
1641 do {
1642 b = m + a;
1643 so = eo = p;
1644 r = 1;
1645 if (p > 0) {
1646 if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1647 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1648 b, 1, pmatch, 0) == 0) {
1649 so = pmatch[0].rm_so;
1650 eo = pmatch[0].rm_eo;
1651 if (b[eo] != '\0')
1652 break;
1653 }
1654 } else if (c != '\0') {
1655 s = strchr(b+pp, c);
1656 if (! s) s = memchr(b+pp, '\0', p - pp);
1657 if (s) {
1658 so = eo = s-b;
1659 eo++;
1660 break;
1661 }
1662 } else {
1663 while (b[rp] == '\n')
1664 rp++;
1665 s = strstr(b+rp, "\n\n");
1666 if (s) {
1667 so = eo = s-b;
1668 while (b[eo] == '\n') eo++;
1669 if (b[eo] != '\0')
1670 break;
1671 }
1672 }
1673 }
1674
1675 if (a > 0) {
1676 memmove(m, (const void *)(m+a), p+1);
1677 b = m;
1678 a = 0;
1679 }
1680
1681 qrealloc(&m, a+p+128, &size);
1682 b = m + a;
1683 pp = p;
1684 p += safe_read(fd, b+p, size-p-1);
1685 if (p < pp) {
1686 p = 0;
1687 r = 0;
1688 setvar_i(V[ERRNO], errno);
1689 }
1690 b[p] = '\0';
1691
1692 } while (p > pp);
1693
1694 if (p == 0) {
1695 r--;
1696 } else {
1697 c = b[so]; b[so] = '\0';
1698 setvar_s(v, b+rp);
1699 v->type |= VF_USER;
1700 b[so] = c;
1701 c = b[eo]; b[eo] = '\0';
1702 setvar_s(V[RT], b+so);
1703 b[eo] = c;
1704 }
1705
1706 rsm->buffer = m;
1707 rsm->adv = a + eo;
1708 rsm->pos = p - eo;
1709 rsm->size = size;
1710
1711 return r;
1712 }
1713
1714 static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
1715 {
1716 int r = 0;
1717 char c;
1718 const char *s = format;
1719
1720 if (int_as_int && n == (int)n) {
1721 r = snprintf(b, size, "%d", (int)n);
1722 } else {
1723 do { c = *s; } while (c && *++s);
1724 if (strchr("diouxX", c)) {
1725 r = snprintf(b, size, format, (int)n);
1726 } else if (strchr("eEfgG", c)) {
1727 r = snprintf(b, size, format, n);
1728 } else {
1729 runtime_error(EMSG_INV_FMT);
1730 }
1731 }
1732 return r;
1733 }
1734
1735
1736 /* formatted output into an allocated buffer, return ptr to buffer */
1737 static char *awk_printf(node *n)
1738 {
1739 char *b = NULL;
1740 char *fmt, *s, *s1, *f;
1741 int i, j, incr, bsize;
1742 char c, c1;
1743 var *v, *arg;
1744
1745 v = nvalloc(1);
1746 fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1747
1748 i = 0;
1749 while (*f) {
1750 s = f;
1751 while (*f && (*f != '%' || *(++f) == '%'))
1752 f++;
1753 while (*f && !isalpha(*f))
1754 f++;
1755
1756 incr = (f - s) + MAXVARFMT;
1757 qrealloc(&b, incr + i, &bsize);
1758 c = *f;
1759 if (c != '\0') f++;
1760 c1 = *f;
1761 *f = '\0';
1762 arg = evaluate(nextarg(&n), v);
1763
1764 j = i;
1765 if (c == 'c' || !c) {
1766 i += sprintf(b+i, s, is_numeric(arg) ?
1767 (char)getvar_i(arg) : *getvar_s(arg));
1768
1769 } else if (c == 's') {
1770 s1 = getvar_s(arg);
1771 qrealloc(&b, incr+i+strlen(s1), &bsize);
1772 i += sprintf(b+i, s, s1);
1773
1774 } else {
1775 i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1776 }
1777 *f = c1;
1778
1779 /* if there was an error while sprintf, return value is negative */
1780 if (i < j) i = j;
1781 }
1782
1783 b = xrealloc(b, i + 1);
1784 free(fmt);
1785 nvfree(v);
1786 b[i] = '\0';
1787 return b;
1788 }
1789
1790 /* common substitution routine
1791 * replace (nm) substring of (src) that match (n) with (repl), store
1792 * result into (dest), return number of substitutions. If nm=0, replace
1793 * all matches. If src or dst is NULL, use $0. If ex=TRUE, enable
1794 * subexpression matching (\1-\9)
1795 */
1796 static int awk_sub(node *rn, char *repl, int nm, var *src, var *dest, int ex)
1797 {
1798 char *ds = NULL;
1799 char *sp, *s;
1800 int c, i, j, di, rl, so, eo, nbs, n, dssize;
1801 regmatch_t pmatch[10];
1802 regex_t sreg, *re;
1803
1804 re = as_regex(rn, &sreg);
1805 if (! src) src = V[F0];
1806 if (! dest) dest = V[F0];
1807
1808 i = di = 0;
1809 sp = getvar_s(src);
1810 rl = strlen(repl);
1811 while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0:REG_NOTBOL) == 0) {
1812 so = pmatch[0].rm_so;
1813 eo = pmatch[0].rm_eo;
1814
1815 qrealloc(&ds, di + eo + rl, &dssize);
1816 memcpy(ds + di, sp, eo);
1817 di += eo;
1818 if (++i >= nm) {
1819 /* replace */
1820 di -= (eo - so);
1821 nbs = 0;
1822 for (s = repl; *s; s++) {
1823 ds[di++] = c = *s;
1824 if (c == '\\') {
1825 nbs++;
1826 continue;
1827 }
1828 if (c == '&' || (ex && c >= '0' && c <= '9')) {
1829 di -= ((nbs + 3) >> 1);
1830 j = 0;
1831 if (c != '&') {
1832 j = c - '0';
1833 nbs++;
1834 }
1835 if (nbs % 2) {
1836 ds[di++] = c;
1837 } else {
1838 n = pmatch[j].rm_eo - pmatch[j].rm_so;
1839 qrealloc(&ds, di + rl + n, &dssize);
1840 memcpy(ds + di, sp + pmatch[j].rm_so, n);
1841 di += n;
1842 }
1843 }
1844 nbs = 0;
1845 }
1846 }
1847
1848 sp += eo;
1849 if (i == nm) break;
1850 if (eo == so) {
1851 if (! (ds[di++] = *sp++)) break;
1852 }
1853 }
1854
1855 qrealloc(&ds, di + strlen(sp), &dssize);
1856 strcpy(ds + di, sp);
1857 setvar_p(dest, ds);
1858 if (re == &sreg) regfree(re);
1859 return i;
1860 }
1861
1862 static var *exec_builtin(node *op, var *res)
1863 {
1864 int (*to_xxx)(int);
1865 var *tv;
1866 node *an[4];
1867 var *av[4];
1868 char *as[4];
1869 regmatch_t pmatch[2];
1870 regex_t sreg, *re;
1871 static tsplitter tspl;
1872 node *spl;
1873 uint32_t isr, info;
1874 int nargs;
1875 time_t tt;
1876 char *s, *s1;
1877 int i, l, ll, n;
1878
1879 tv = nvalloc(4);
1880 isr = info = op->info;
1881 op = op->l.n;
1882
1883 av[2] = av[3] = NULL;
1884 for (i=0 ; i<4 && op ; i++) {
1885 an[i] = nextarg(&op);
1886 if (isr & 0x09000000) av[i] = evaluate(an[i], &tv[i]);
1887 if (isr & 0x08000000) as[i] = getvar_s(av[i]);
1888 isr >>= 1;
1889 }
1890
1891 nargs = i;
1892 if (nargs < (info >> 30))
1893 runtime_error(EMSG_TOO_FEW_ARGS);
1894
1895 switch (info & OPNMASK) {
1896
1897 case B_a2:
1898 #if ENABLE_FEATURE_AWK_MATH
1899 setvar_i(res, atan2(getvar_i(av[i]), getvar_i(av[1])));
1900 #else
1901 runtime_error(EMSG_NO_MATH);
1902 #endif
1903 break;
1904
1905 case B_sp:
1906 if (nargs > 2) {
1907 spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
1908 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
1909 } else {
1910 spl = &fsplitter.n;
1911 }
1912
1913 n = awk_split(as[0], spl, &s);
1914 s1 = s;
1915 clear_array(iamarray(av[1]));
1916 for (i=1; i<=n; i++)
1917 setari_u(av[1], i, nextword(&s1));
1918 free(s);
1919 setvar_i(res, n);
1920 break;
1921
1922 case B_ss:
1923 l = strlen(as[0]);
1924 i = getvar_i(av[1]) - 1;
1925 if (i>l) i=l; if (i<0) i=0;
1926 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
1927 if (n<0) n=0;
1928 s = xmalloc(n+1);
1929 strncpy(s, as[0]+i, n);
1930 s[n] = '\0';
1931 setvar_p(res, s);
1932 break;
1933
1934 case B_an:
1935 setvar_i(res, (long)getvar_i(av[0]) & (long)getvar_i(av[1]));
1936 break;
1937
1938 case B_co:
1939 setvar_i(res, ~(long)getvar_i(av[0]));
1940 break;
1941
1942 case B_ls:
1943 setvar_i(res, (long)getvar_i(av[0]) << (long)getvar_i(av[1]));
1944 break;
1945
1946 case B_or:
1947 setvar_i(res, (long)getvar_i(av[0]) | (long)getvar_i(av[1]));
1948 break;
1949
1950 case B_rs:
1951 setvar_i(res, (long)((unsigned long)getvar_i(av[0]) >> (unsigned long)getvar_i(av[1])));
1952 break;
1953
1954 case B_xo:
1955 setvar_i(res, (long)getvar_i(av[0]) ^ (long)getvar_i(av[1]));
1956 break;
1957
1958 case B_lo:
1959 to_xxx = tolower;
1960 goto lo_cont;
1961
1962 case B_up:
1963 to_xxx = toupper;
1964 lo_cont:
1965 s1 = s = xstrdup(as[0]);
1966 while (*s1) {
1967 *s1 = (*to_xxx)(*s1);
1968 s1++;
1969 }
1970 setvar_p(res, s);
1971 break;
1972
1973 case B_ix:
1974 n = 0;
1975 ll = strlen(as[1]);
1976 l = strlen(as[0]) - ll;
1977 if (ll > 0 && l >= 0) {
1978 if (! icase) {
1979 s = strstr(as[0], as[1]);
1980 if (s) n = (s - as[0]) + 1;
1981 } else {
1982 /* this piece of code is terribly slow and
1983 * really should be rewritten
1984 */
1985 for (i=0; i<=l; i++) {
1986 if (strncasecmp(as[0]+i, as[1], ll) == 0) {
1987 n = i+1;
1988 break;
1989 }
1990 }
1991 }
1992 }
1993 setvar_i(res, n);
1994 break;
1995
1996 case B_ti:
1997 if (nargs > 1)
1998 tt = getvar_i(av[1]);
1999 else
2000 time(&tt);
2001 s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2002 i = strftime(buf, MAXVARFMT, s, localtime(&tt));
2003 buf[i] = '\0';
2004 setvar_s(res, buf);
2005 break;
2006
2007 case B_ma:
2008 re = as_regex(an[1], &sreg);
2009 n = regexec(re, as[0], 1, pmatch, 0);
2010 if (n == 0) {
2011 pmatch[0].rm_so++;
2012 pmatch[0].rm_eo++;
2013 } else {
2014 pmatch[0].rm_so = 0;
2015 pmatch[0].rm_eo = -1;
2016 }
2017 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2018 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2019 setvar_i(res, pmatch[0].rm_so);
2020 if (re == &sreg) regfree(re);
2021 break;
2022
2023 case B_ge:
2024 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2025 break;
2026
2027 case B_gs:
2028 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2029 break;
2030
2031 case B_su:
2032 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2033 break;
2034 }
2035
2036 nvfree(tv);
2037 return res;
2038 }
2039
2040 /*
2041 * Evaluate node - the heart of the program. Supplied with subtree
2042 * and place where to store result. returns ptr to result.
2043 */
2044 #define XC(n) ((n) >> 8)
2045
2046 static var *evaluate(node *op, var *res)
2047 {
2048 /* This procedure is recursive so we should count every byte */
2049 static var *fnargs = NULL;
2050 static unsigned seed = 1;
2051 static regex_t sreg;
2052
2053 node *op1;
2054 var *v1;
2055 union {
2056 var *v;
2057 char *s;
2058 double d;
2059 int i;
2060 } L, R;
2061 uint32_t opinfo;
2062 short opn;
2063 union {
2064 char *s;
2065 rstream *rsm;
2066 FILE *F;
2067 var *v;
2068 regex_t *re;
2069 uint32_t info;
2070 } X;
2071
2072 if (!op)
2073 return setvar_s(res, NULL);
2074
2075 v1 = nvalloc(2);
2076
2077 while (op) {
2078
2079 opinfo = op->info;
2080 opn = (short)(opinfo & OPNMASK);
2081 lineno = op->lineno;
2082
2083 /* execute inevitable things */
2084 op1 = op->l.n;
2085 if (opinfo & OF_RES1) X.v = L.v = evaluate(op1, v1);
2086 if (opinfo & OF_RES2) R.v = evaluate(op->r.n, v1+1);
2087 if (opinfo & OF_STR1) L.s = getvar_s(L.v);
2088 if (opinfo & OF_STR2) R.s = getvar_s(R.v);
2089 if (opinfo & OF_NUM1) L.d = getvar_i(L.v);
2090
2091 switch (XC(opinfo & OPCLSMASK)) {
2092
2093 /* -- iterative node type -- */
2094
2095 /* test pattern */
2096 case XC( OC_TEST ):
2097 if ((op1->info & OPCLSMASK) == OC_COMMA) {
2098 /* it's range pattern */
2099 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2100 op->info |= OF_CHECKED;
2101 if (ptest(op1->r.n))
2102 op->info &= ~OF_CHECKED;
2103
2104 op = op->a.n;
2105 } else {
2106 op = op->r.n;
2107 }
2108 } else {
2109 op = (ptest(op1)) ? op->a.n : op->r.n;
2110 }
2111 break;
2112
2113 /* just evaluate an expression, also used as unconditional jump */
2114 case XC( OC_EXEC ):
2115 break;
2116
2117 /* branch, used in if-else and various loops */
2118 case XC( OC_BR ):
2119 op = istrue(L.v) ? op->a.n : op->r.n;
2120 break;
2121
2122 /* initialize for-in loop */
2123 case XC( OC_WALKINIT ):
2124 hashwalk_init(L.v, iamarray(R.v));
2125 break;
2126
2127 /* get next array item */
2128 case XC( OC_WALKNEXT ):
2129 op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2130 break;
2131
2132 case XC( OC_PRINT ):
2133 case XC( OC_PRINTF ):
2134 X.F = stdout;
2135 if (op->r.n) {
2136 X.rsm = newfile(R.s);
2137 if (! X.rsm->F) {
2138 if (opn == '|') {
2139 if((X.rsm->F = popen(R.s, "w")) == NULL)
2140 bb_perror_msg_and_die("popen");
2141 X.rsm->is_pipe = 1;
2142 } else {
2143 X.rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2144 }
2145 }
2146 X.F = X.rsm->F;
2147 }
2148
2149 if ((opinfo & OPCLSMASK) == OC_PRINT) {
2150 if (! op1) {
2151 fputs(getvar_s(V[F0]), X.F);
2152 } else {
2153 while (op1) {
2154 L.v = evaluate(nextarg(&op1), v1);
2155 if (L.v->type & VF_NUMBER) {
2156 fmt_num(buf, MAXVARFMT, getvar_s(V[OFMT]),
2157 getvar_i(L.v), TRUE);
2158 fputs(buf, X.F);
2159 } else {
2160 fputs(getvar_s(L.v), X.F);
2161 }
2162
2163 if (op1) fputs(getvar_s(V[OFS]), X.F);
2164 }
2165 }
2166 fputs(getvar_s(V[ORS]), X.F);
2167
2168 } else { /* OC_PRINTF */
2169 L.s = awk_printf(op1);
2170 fputs(L.s, X.F);
2171 free(L.s);
2172 }
2173 fflush(X.F);
2174 break;
2175
2176 case XC( OC_DELETE ):
2177 X.info = op1->info & OPCLSMASK;
2178 if (X.info == OC_VAR) {
2179 R.v = op1->l.v;
2180 } else if (X.info == OC_FNARG) {
2181 R.v = &fnargs[op1->l.i];
2182 } else {
2183 runtime_error(EMSG_NOT_ARRAY);
2184 }
2185
2186 if (op1->r.n) {
2187 clrvar(L.v);
2188 L.s = getvar_s(evaluate(op1->r.n, v1));
2189 hash_remove(iamarray(R.v), L.s);
2190 } else {
2191 clear_array(iamarray(R.v));
2192 }
2193 break;
2194
2195 case XC( OC_NEWSOURCE ):
2196 programname = op->l.s;
2197 break;
2198
2199 case XC( OC_RETURN ):
2200 copyvar(res, L.v);
2201 break;
2202
2203 case XC( OC_NEXTFILE ):
2204 nextfile = TRUE;
2205 case XC( OC_NEXT ):
2206 nextrec = TRUE;
2207 case XC( OC_DONE ):
2208 clrvar(res);
2209 break;
2210
2211 case XC( OC_EXIT ):
2212 awk_exit(L.d);
2213
2214 /* -- recursive node type -- */
2215
2216 case XC( OC_VAR ):
2217 L.v = op->l.v;
2218 if (L.v == V[NF])
2219 split_f0();
2220 goto v_cont;
2221
2222 case XC( OC_FNARG ):
2223 L.v = &fnargs[op->l.i];
2224 v_cont:
2225 res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
2226 break;
2227
2228 case XC( OC_IN ):
2229 setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2230 break;
2231
2232 case XC( OC_REGEXP ):
2233 op1 = op;
2234 L.s = getvar_s(V[F0]);
2235 goto re_cont;
2236
2237 case XC( OC_MATCH ):
2238 op1 = op->r.n;
2239 re_cont:
2240 X.re = as_regex(op1, &sreg);
2241 R.i = regexec(X.re, L.s, 0, NULL, 0);
2242 if (X.re == &sreg) regfree(X.re);
2243 setvar_i(res, (R.i == 0 ? 1 : 0) ^ (opn == '!' ? 1 : 0));
2244 break;
2245
2246 case XC( OC_MOVE ):
2247 /* if source is a temporary string, jusk relink it to dest */
2248 if (R.v == v1+1 && R.v->string) {
2249 res = setvar_p(L.v, R.v->string);
2250 R.v->string = NULL;
2251 } else {
2252 res = copyvar(L.v, R.v);
2253 }
2254 break;
2255
2256 case XC( OC_TERNARY ):
2257 if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2258 runtime_error(EMSG_POSSIBLE_ERROR);
2259 res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2260 break;
2261
2262 case XC( OC_FUNC ):
2263 if (! op->r.f->body.first)
2264 runtime_error(EMSG_UNDEF_FUNC);
2265
2266 X.v = R.v = nvalloc(op->r.f->nargs+1);
2267 while (op1) {
2268 L.v = evaluate(nextarg(&op1), v1);
2269 copyvar(R.v, L.v);
2270 R.v->type |= VF_CHILD;
2271 R.v->x.parent = L.v;
2272 if (++R.v - X.v >= op->r.f->nargs)
2273 break;
2274 }
2275
2276 R.v = fnargs;
2277 fnargs = X.v;
2278
2279 L.s = programname;
2280 res = evaluate(op->r.f->body.first, res);
2281 programname = L.s;
2282
2283 nvfree(fnargs);
2284 fnargs = R.v;
2285 break;
2286
2287 case XC( OC_GETLINE ):
2288 case XC( OC_PGETLINE ):
2289 if (op1) {
2290 X.rsm = newfile(L.s);
2291 if (! X.rsm->F) {
2292 if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2293 X.rsm->F = popen(L.s, "r");
2294 X.rsm->is_pipe = TRUE;
2295 } else {
2296 X.rsm->F = fopen(L.s, "r"); /* not xfopen! */
2297 }
2298 }
2299 } else {
2300 if (! iF) iF = next_input_file();
2301 X.rsm = iF;
2302 }
2303
2304 if (! X.rsm->F) {
2305 setvar_i(V[ERRNO], errno);
2306 setvar_i(res, -1);
2307 break;
2308 }
2309
2310 if (! op->r.n)
2311 R.v = V[F0];
2312
2313 L.i = awk_getline(X.rsm, R.v);
2314 if (L.i > 0) {
2315 if (! op1) {
2316 incvar(V[FNR]);
2317 incvar(V[NR]);
2318 }
2319 }
2320 setvar_i(res, L.i);
2321 break;
2322
2323 /* simple builtins */
2324 case XC( OC_FBLTIN ):
2325 switch (opn) {
2326
2327 case F_in:
2328 R.d = (int)L.d;
2329 break;
2330
2331 case F_rn:
2332 R.d = (double)rand() / (double)RAND_MAX;
2333 break;
2334
2335 #if ENABLE_FEATURE_AWK_MATH
2336 case F_co:
2337 R.d = cos(L.d);
2338 break;
2339
2340 case F_ex:
2341 R.d = exp(L.d);
2342 break;
2343
2344 case F_lg:
2345 R.d = log(L.d);
2346 break;
2347
2348 case F_si:
2349 R.d = sin(L.d);
2350 break;
2351
2352 case F_sq:
2353 R.d = sqrt(L.d);
2354 break;
2355 #else
2356 case F_co:
2357 case F_ex:
2358 case F_lg:
2359 case F_si:
2360 case F_sq:
2361 runtime_error(EMSG_NO_MATH);
2362 break;
2363 #endif
2364
2365 case F_sr:
2366 R.d = (double)seed;
2367 seed = op1 ? (unsigned)L.d : (unsigned)time(NULL);
2368 srand(seed);
2369 break;
2370
2371 case F_ti:
2372 R.d = time(NULL);
2373 break;
2374
2375 case F_le:
2376 if (! op1)
2377 L.s = getvar_s(V[F0]);
2378 R.d = strlen(L.s);
2379 break;
2380
2381 case F_sy:
2382 fflush(NULL);
2383 R.d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2384 ? (system(L.s) >> 8) : 0;
2385 break;
2386
2387 case F_ff:
2388 if (! op1)
2389 fflush(stdout);
2390 else {
2391 if (L.s && *L.s) {
2392 X.rsm = newfile(L.s);
2393 fflush(X.rsm->F);
2394 } else {
2395 fflush(NULL);
2396 }
2397 }
2398 break;
2399
2400 case F_cl:
2401 X.rsm = (rstream *)hash_search(fdhash, L.s);
2402 if (X.rsm) {
2403 R.i = X.rsm->is_pipe ? pclose(X.rsm->F) : fclose(X.rsm->F);
2404 free(X.rsm->buffer);
2405 hash_remove(fdhash, L.s);
2406 }
2407 if (R.i != 0)
2408 setvar_i(V[ERRNO], errno);
2409 R.d = (double)R.i;
2410 break;
2411 }
2412 setvar_i(res, R.d);
2413 break;
2414
2415 case XC( OC_BUILTIN ):
2416 res = exec_builtin(op, res);
2417 break;
2418
2419 case XC( OC_SPRINTF ):
2420 setvar_p(res, awk_printf(op1));
2421 break;
2422
2423 case XC( OC_UNARY ):
2424 X.v = R.v;
2425 L.d = R.d = getvar_i(R.v);
2426 switch (opn) {
2427 case 'P':
2428 L.d = ++R.d;
2429 goto r_op_change;
2430 case 'p':
2431 R.d++;
2432 goto r_op_change;
2433 case 'M':
2434 L.d = --R.d;
2435 goto r_op_change;
2436 case 'm':
2437 R.d--;
2438 goto r_op_change;
2439 case '!':
2440 L.d = istrue(X.v) ? 0 : 1;
2441 break;
2442 case '-':
2443 L.d = -R.d;
2444 break;
2445 r_op_change:
2446 setvar_i(X.v, R.d);
2447 }
2448 setvar_i(res, L.d);
2449 break;
2450
2451 case XC( OC_FIELD ):
2452 R.i = (int)getvar_i(R.v);
2453 if (R.i == 0) {
2454 res = V[F0];
2455 } else {
2456 split_f0();
2457 if (R.i > nfields)
2458 fsrealloc(R.i);
2459
2460 res = &Fields[R.i-1];
2461 }
2462 break;
2463
2464 /* concatenation (" ") and index joining (",") */
2465 case XC( OC_CONCAT ):
2466 case XC( OC_COMMA ):
2467 opn = strlen(L.s) + strlen(R.s) + 2;
2468 X.s = xmalloc(opn);
2469 strcpy(X.s, L.s);
2470 if ((opinfo & OPCLSMASK) == OC_COMMA) {
2471 L.s = getvar_s(V[SUBSEP]);
2472 X.s = xrealloc(X.s, opn + strlen(L.s));
2473 strcat(X.s, L.s);
2474 }
2475 strcat(X.s, R.s);
2476 setvar_p(res, X.s);
2477 break;
2478
2479 case XC( OC_LAND ):
2480 setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2481 break;
2482
2483 case XC( OC_LOR ):
2484 setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2485 break;
2486
2487 case XC( OC_BINARY ):
2488 case XC( OC_REPLACE ):
2489 R.d = getvar_i(R.v);
2490 switch (opn) {
2491 case '+':
2492 L.d += R.d;
2493 break;
2494 case '-':
2495 L.d -= R.d;
2496 break;
2497 case '*':
2498 L.d *= R.d;
2499 break;
2500 case '/':
2501 if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO);
2502 L.d /= R.d;
2503 break;
2504 case '&':
2505 #if ENABLE_FEATURE_AWK_MATH
2506 L.d = pow(L.d, R.d);
2507 #else
2508 runtime_error(EMSG_NO_MATH);
2509 #endif
2510 break;
2511 case '%':
2512 if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO);
2513 L.d -= (int)(L.d / R.d) * R.d;
2514 break;
2515 }
2516 res = setvar_i(((opinfo&OPCLSMASK) == OC_BINARY) ? res : X.v, L.d);
2517 break;
2518
2519 case XC( OC_COMPARE ):
2520 if (is_numeric(L.v) && is_numeric(R.v)) {
2521 L.d = getvar_i(L.v) - getvar_i(R.v);
2522 } else {
2523 L.s = getvar_s(L.v);
2524 R.s = getvar_s(R.v);
2525 L.d = icase ? strcasecmp(L.s, R.s) : strcmp(L.s, R.s);
2526 }
2527 switch (opn & 0xfe) {
2528 case 0:
2529 R.i = (L.d > 0);
2530 break;
2531 case 2:
2532 R.i = (L.d >= 0);
2533 break;
2534 case 4:
2535 R.i = (L.d == 0);
2536 break;
2537 }
2538 setvar_i(res, (opn & 0x1 ? R.i : !R.i) ? 1 : 0);
2539 break;
2540
2541 default:
2542 runtime_error(EMSG_POSSIBLE_ERROR);
2543 }
2544 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2545 op = op->a.n;
2546 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2547 break;
2548 if (nextrec)
2549 break;
2550 }
2551 nvfree(v1);
2552 return res;
2553 }
2554
2555
2556 /* -------- main & co. -------- */
2557
2558 static int awk_exit(int r)
2559 {
2560 var tv;
2561 unsigned i;
2562 hash_item *hi;
2563
2564 zero_out_var(&tv);
2565
2566 if (!exiting) {
2567 exiting = TRUE;
2568 nextrec = FALSE;
2569 evaluate(endseq.first, &tv);
2570 }
2571
2572 /* waiting for children */
2573 for (i = 0; i < fdhash->csize; i++) {
2574 hi = fdhash->items[i];
2575 while (hi) {
2576 if (hi->data.rs.F && hi->data.rs.is_pipe)
2577 pclose(hi->data.rs.F);
2578 hi = hi->next;
2579 }
2580 }
2581
2582 exit(r);
2583 }
2584
2585 /* if expr looks like "var=value", perform assignment and return 1,
2586 * otherwise return 0 */
2587 static int is_assignment(const char *expr)
2588 {
2589 char *exprc, *s, *s0, *s1;
2590
2591 exprc = xstrdup(expr);
2592 if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
2593 free(exprc);
2594 return FALSE;
2595 }
2596
2597 *(s++) = '\0';
2598 s0 = s1 = s;
2599 while (*s)
2600 *(s1++) = nextchar(&s);
2601
2602 *s1 = '\0';
2603 setvar_u(newvar(exprc), s0);
2604 free(exprc);
2605 return TRUE;
2606 }
2607
2608 /* switch to next input file */
2609 static rstream *next_input_file(void)
2610 {
2611 static rstream rsm;
2612 FILE *F = NULL;
2613 char *fname, *ind;
2614 static int files_happen = FALSE;
2615
2616 if (rsm.F) fclose(rsm.F);
2617 rsm.F = NULL;
2618 rsm.pos = rsm.adv = 0;
2619
2620 do {
2621 if (getvar_i(V[ARGIND])+1 >= getvar_i(V[ARGC])) {
2622 if (files_happen)
2623 return NULL;
2624 fname = "-";
2625 F = stdin;
2626 } else {
2627 ind = getvar_s(incvar(V[ARGIND]));
2628 fname = getvar_s(findvar(iamarray(V[ARGV]), ind));
2629 if (fname && *fname && !is_assignment(fname))
2630 F = afopen(fname, "r");
2631 }
2632 } while (!F);
2633
2634 files_happen = TRUE;
2635 setvar_s(V[FILENAME], fname);
2636 rsm.F = F;
2637 return &rsm;
2638 }
2639
2640 int awk_main(int argc, char **argv)
2641 {
2642 unsigned opt;
2643 char *opt_F, *opt_v, *opt_W;
2644 int i, j, flen;
2645 var *v;
2646 var tv;
2647 char **envp;
2648 char *vnames = (char *)vNames; /* cheat */
2649 char *vvalues = (char *)vValues;
2650
2651 /* Undo busybox.c, or else strtod may eat ','! This breaks parsing:
2652 * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */
2653 if (ENABLE_LOCALE_SUPPORT)
2654 setlocale(LC_NUMERIC, "C");
2655
2656 zero_out_var(&tv);
2657
2658 /* allocate global buffer */
2659 buf = xmalloc(MAXVARFMT + 1);
2660
2661 vhash = hash_init();
2662 ahash = hash_init();
2663 fdhash = hash_init();
2664 fnhash = hash_init();
2665
2666 /* initialize variables */
2667 for (i = 0; *vnames; i++) {
2668 V[i] = v = newvar(nextword(&vnames));
2669 if (*vvalues != '\377')
2670 setvar_s(v, nextword(&vvalues));
2671 else
2672 setvar_i(v, 0);
2673
2674 if (*vnames == '*') {
2675 v->type |= VF_SPECIAL;
2676 vnames++;
2677 }
2678 }
2679
2680 handle_special(V[FS]);
2681 handle_special(V[RS]);
2682
2683 newfile("/dev/stdin")->F = stdin;
2684 newfile("/dev/stdout")->F = stdout;
2685 newfile("/dev/stderr")->F = stderr;
2686
2687 for (envp = environ; *envp; envp++) {
2688 char *s = xstrdup(*envp);
2689 char *s1 = strchr(s, '=');
2690 if (s1) {
2691 *s1++ = '\0';
2692 setvar_u(findvar(iamarray(V[ENVIRON]), s), s1);
2693 }
2694 free(s);
2695 }
2696
2697 opt = getopt32(argc, argv, "F:v:f:W:", &opt_F, &opt_v, &programname, &opt_W);
2698 argv += optind;
2699 argc -= optind;
2700 if (opt & 0x1) setvar_s(V[FS], opt_F); // -F
2701 if (opt & 0x2) if (!is_assignment(opt_v)) bb_show_usage(); // -v
2702 if (opt & 0x4) { // -f
2703 char *s = s; /* die, gcc, die */
2704 FILE *from_file = afopen(programname, "r");
2705 /* one byte is reserved for some trick in next_token */
2706 if (fseek(from_file, 0, SEEK_END) == 0) {
2707 flen = ftell(from_file);
2708 s = xmalloc(flen + 4);
2709 fseek(from_file, 0, SEEK_SET);
2710 i = 1 + fread(s + 1, 1, flen, from_file);
2711 } else {
2712 for (i = j = 1; j > 0; i += j) {
2713 s = xrealloc(s, i + 4096);
2714 j = fread(s + i, 1, 4094, from_file);
2715 }
2716 }
2717 s[i] = '\0';
2718 fclose(from_file);
2719 parse_program(s + 1);
2720 free(s);
2721 } else { // no -f: take program from 1st parameter
2722 if (!argc)
2723 bb_show_usage();
2724 programname = "cmd. line";
2725 parse_program(*argv++);
2726 argc--;
2727 }
2728 if (opt & 0x8) // -W
2729 bb_error_msg("warning: unrecognized option '-W %s' ignored", opt_W);
2730
2731 /* fill in ARGV array */
2732 setvar_i(V[ARGC], argc + 1);
2733 setari_u(V[ARGV], 0, "awk");
2734 i = 0;
2735 while (*argv)
2736 setari_u(V[ARGV], ++i, *argv++);
2737
2738 evaluate(beginseq.first, &tv);
2739 if (!mainseq.first && !endseq.first)
2740 awk_exit(EXIT_SUCCESS);
2741
2742 /* input file could already be opened in BEGIN block */
2743 if (!iF) iF = next_input_file();
2744
2745 /* passing through input files */
2746 while (iF) {
2747 nextfile = FALSE;
2748 setvar_i(V[FNR], 0);
2749
2750 while ((i = awk_getline(iF, V[F0])) > 0) {
2751 nextrec = FALSE;
2752 incvar(V[NR]);
2753 incvar(V[FNR]);
2754 evaluate(mainseq.first, &tv);
2755
2756 if (nextfile)
2757 break;
2758 }
2759
2760 if (i < 0)
2761 runtime_error(strerror(errno));
2762
2763 iF = next_input_file();
2764 }
2765
2766 awk_exit(EXIT_SUCCESS);
2767 /*return 0;*/
2768 }