awk: make code a bit less obfuscated

This commit is contained in:
Denis Vlasenko 2007-05-17 23:03:35 +00:00
parent 5b34083004
commit ffba941d29

View File

@ -161,19 +161,19 @@ typedef struct tsplitter_s {
/* combined token classes */
#define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
#define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
#define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION | \
TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
#define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
| TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
#define TC_STATEMNT (TC_STATX | TC_WHILE)
#define TC_OPTERM (TC_SEMICOL | TC_NEWLINE)
/* word tokens, cannot mean something else if not expected */
#define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN | \
TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
#define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN \
| TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
/* discard newlines after these */
#define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM | \
TC_BINOP | TC_OPTERM)
#define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
| TC_BINOP | TC_OPTERM)
/* what can expression begin with */
#define TC_OPSEQ (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
@ -182,8 +182,8 @@ typedef struct tsplitter_s {
/* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
/* operator is inserted between them */
#define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM | \
TC_STRING | TC_NUMBER | TC_UOPPOST)
#define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
| TC_STRING | TC_NUMBER | TC_UOPPOST)
#define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
#define OF_RES1 0x010000
@ -245,13 +245,13 @@ enum {
/* simple builtins */
enum {
F_in=0, F_rn, F_co, F_ex, F_lg, F_si, F_sq, F_sr,
F_in, F_rn, F_co, F_ex, F_lg, F_si, F_sq, F_sr,
F_ti, F_le, F_sy, F_ff, F_cl
};
/* builtins */
enum {
B_a2=0, B_ix, B_ma, B_sp, B_ss, B_ti, B_lo, B_up,
B_a2, B_ix, B_ma, B_sp, B_ss, B_ti, B_lo, B_up,
B_ge, B_gs, B_su,
B_an, B_co, B_ls, B_or, B_rs, B_xo,
};
@ -365,12 +365,12 @@ static const uint32_t tokeninfo[] = {
/* internal variable names and their initial values */
/* asterisk marks SPECIAL vars; $ is just no-named Field0 */
enum {
CONVFMT=0, OFMT, FS, OFS,
CONVFMT, OFMT, FS, OFS,
ORS, RS, RT, FILENAME,
SUBSEP, ARGIND, ARGC, ARGV,
ERRNO, FNR,
NR, NF, IGNORECASE,
ENVIRON, F0, _intvarcount_
ENVIRON, F0, NUM_INTERNAL_VARS
};
static const char vNames[] =
@ -390,11 +390,11 @@ static const char vValues[] =
/* hash size may grow to these values */
#define FIRST_PRIME 61;
static const unsigned PRIMES[] = { 251, 1021, 4093, 16381, 65521 };
enum { NPRIMES = sizeof(PRIMES) / sizeof(unsigned) };
enum { NPRIMES = sizeof(PRIMES) / sizeof(PRIMES[0]) };
/* globals */
static var * V[_intvarcount_];
static var *intvar[NUM_INTERNAL_VARS];
static chain beginseq, mainseq, endseq, *seq;
static int nextrec, nextfile;
static node *break_ptr, *continue_ptr;
@ -421,6 +421,7 @@ static struct {
int rollback;
} ttt;
/* It had even better name: 't'. Whoever knows what is it, please rename! */
/* (actually it looks like unrelated stuff lumped together...) */
/* function prototypes */
static void handle_special(var *);
@ -671,7 +672,6 @@ static var *setvar_p(var *v, char *value)
clrvar(v);
v->string = value;
handle_special(v);
return v;
}
@ -692,8 +692,8 @@ static var *setvar_u(var *v, const char *value)
/* set array element to user string */
static void setari_u(var *a, int idx, const char *s)
{
char sidx[sizeof(int)*3 + 1];
var *v;
static char sidx[12];
sprintf(sidx, "%d", idx);
v = findvar(iamarray(a), sidx);
@ -714,7 +714,7 @@ static const char *getvar_s(var *v)
{
/* if v is numeric and has no cached string, convert it to string */
if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
fmt_num(buf, MAXVARFMT, getvar_s(V[CONVFMT]), v->number, TRUE);
fmt_num(buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE);
v->string = xstrdup(buf);
v->type |= VF_CACHED;
}
@ -868,7 +868,8 @@ static uint32_t next_token(uint32_t expected)
skip_spaces(&p);
lineno = ttt.lineno;
if (*p == '#')
while (*p != '\n' && *p != '\0') p++;
while (*p != '\n' && *p != '\0')
p++;
if (*p == '\n')
ttt.lineno++;
@ -894,11 +895,14 @@ static uint32_t next_token(uint32_t expected)
while (*p != '/') {
if (*p == '\0' || *p == '\n')
syntax_error(EMSG_UNEXP_EOS);
if ((*s++ = *p++) == '\\') {
*s++ = *p++;
if (*s++ == '\\') {
pp = p;
*(s-1) = bb_process_escape_sequence((const char **)&p);
if (*pp == '\\') *s++ = '\\';
if (p == pp) *s++ = *p++;
if (*pp == '\\')
*s++ = '\\';
if (p == pp)
*s++ = *p++;
}
}
p++;
@ -927,9 +931,10 @@ static uint32_t next_token(uint32_t expected)
* matches and it's not a longer word,
* then this is what we are looking for
*/
if ((tc & (expected | TC_WORD | TC_NEWLINE)) &&
*tl == *p && strncmp(p, tl, l) == 0 &&
!((tc & TC_WORD) && isalnum_(*(p + l)))) {
if ((tc & (expected | TC_WORD | TC_NEWLINE))
&& *tl == *p && strncmp(p, tl, l) == 0
&& !((tc & TC_WORD) && isalnum_(p[l]))
) {
ttt.info = *ti;
p += l;
break;
@ -952,7 +957,8 @@ static uint32_t next_token(uint32_t expected)
*(p-1) = '\0';
tc = TC_VARIABLE;
/* also consume whitespace between functionname and bracket */
if (!(expected & TC_VARIABLE)) skip_spaces(&p);
if (!(expected & TC_VARIABLE))
skip_spaces(&p);
if (*p == '(') {
tc = TC_FUNCTION;
} else {
@ -1048,8 +1054,8 @@ static node *parse_expr(uint32_t iexp)
/* for binary and postfix-unary operators, jump back over
* previous operators with higher priority */
vn = cn;
while ( ((ttt.info & PRIMASK) > (vn->a.n->info & PRIMASK2)) ||
((ttt.info == vn->info) && ((ttt.info & OPCLSMASK) == OC_COLON)) )
while ( ((ttt.info & PRIMASK) > (vn->a.n->info & PRIMASK2))
|| ((ttt.info == vn->info) && ((ttt.info & OPCLSMASK) == OC_COLON)) )
vn = vn->a.n;
if ((ttt.info & OPCLSMASK) == OC_TERNARY)
ttt.info += P(6);
@ -1086,7 +1092,8 @@ static node *parse_expr(uint32_t iexp)
case TC_VARIABLE:
case TC_ARRAY:
cn->info = OC_VAR;
if ((v = hash_search(ahash, ttt.string)) != NULL) {
v = hash_search(ahash, ttt.string);
if (v != NULL) {
cn->info = OC_FNARG;
cn->l.i = v->x.aidx;
} else {
@ -1389,14 +1396,13 @@ static regex_t *as_regex(node *op, regex_t *preg)
if ((op->info & OPCLSMASK) == OC_REGEXP) {
return icase ? op->r.ire : op->l.re;
} else {
}
v = nvalloc(1);
s = getvar_s(evaluate(op, v));
xregcomp(preg, s, icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED);
nvfree(v);
return preg;
}
}
/* gradually increasing buffer */
static void qrealloc(char **b, int n, int *size)
@ -1442,7 +1448,8 @@ static int awk_split(const char *s, node *spl, char **slist)
c[0] = c[1] = (char)spl->info;
c[2] = c[3] = '\0';
if (*getvar_s(V[RS]) == '\0') c[2] = '\n';
if (*getvar_s(intvar[RS]) == '\0')
c[2] = '\n';
if ((spl->info & OPCLSMASK) == OC_REGEXP) { /* regex split */
while (*s) {
@ -1451,7 +1458,10 @@ static int awk_split(const char *s, node *spl, char **slist)
&& pmatch[0].rm_so <= l
) {
l = pmatch[0].rm_so;
if (pmatch[0].rm_eo == 0) { l++; pmatch[0].rm_eo++; }
if (pmatch[0].rm_eo == 0) {
l++;
pmatch[0].rm_eo++;
}
} else {
pmatch[0].rm_eo = l;
if (s[l]) pmatch[0].rm_eo++;
@ -1495,6 +1505,7 @@ static int awk_split(const char *s, node *spl, char **slist)
static void split_f0(void)
{
static char *fstrings = NULL;
int i, n;
char *s;
@ -1504,7 +1515,7 @@ static void split_f0(void)
is_f0_split = TRUE;
free(fstrings);
fsrealloc(0);
n = awk_split(getvar_s(V[F0]), &fsplitter.n, &fstrings);
n = awk_split(getvar_s(intvar[F0]), &fsplitter.n, &fstrings);
fsrealloc(n);
s = fstrings;
for (i = 0; i < n; i++) {
@ -1513,9 +1524,9 @@ static void split_f0(void)
}
/* set NF manually to avoid side effects */
clrvar(V[NF]);
V[NF]->type = VF_NUMBER | VF_SPECIAL;
V[NF]->number = nfields;
clrvar(intvar[NF]);
intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
intvar[NF]->number = nfields;
}
/* perform additional actions when some internal variables changed */
@ -1529,12 +1540,12 @@ static void handle_special(var *v)
if (!(v->type & VF_SPECIAL))
return;
if (v == V[NF]) {
if (v == intvar[NF]) {
n = (int)getvar_i(v);
fsrealloc(n);
/* recalculate $0 */
sep = getvar_s(V[OFS]);
sep = getvar_s(intvar[OFS]);
sl = strlen(sep);
b = NULL;
len = 0;
@ -1551,24 +1562,24 @@ static void handle_special(var *v)
}
if (b)
b[len] = '\0';
setvar_p(V[F0], b);
setvar_p(intvar[F0], b);
is_f0_split = TRUE;
} else if (v == V[F0]) {
} else if (v == intvar[F0]) {
is_f0_split = FALSE;
} else if (v == V[FS]) {
} else if (v == intvar[FS]) {
mk_splitter(getvar_s(v), &fsplitter);
} else if (v == V[RS]) {
} else if (v == intvar[RS]) {
mk_splitter(getvar_s(v), &rsplitter);
} else if (v == V[IGNORECASE]) {
} else if (v == intvar[IGNORECASE]) {
icase = istrue(v);
} else { /* $n */
n = getvar_i(V[NF]);
setvar_i(V[NF], n > v-Fields ? n : v-Fields+1);
n = getvar_i(intvar[NF]);
setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1);
/* right here v is invalid. Just to note... */
}
}
@ -1599,7 +1610,7 @@ static void hashwalk_init(var *v, xhash *array)
v->type |= VF_WALK;
w = v->x.walker = xzalloc(2 + 2*sizeof(char *) + array->glen);
*w = *(w+1) = (char *)(w + 2);
w[0] = w[1] = (char *)(w + 2);
for (i = 0; i < array->csize; i++) {
hi = array->items[i];
while (hi) {
@ -1615,7 +1626,7 @@ static int hashwalk_next(var *v)
char **w;
w = v->x.walker;
if (*(w+1) == *w)
if (w[1] == w[0])
return FALSE;
setvar_s(v, nextword(w+1));
@ -1698,7 +1709,7 @@ static int awk_getline(rstream *rsm, var *v)
if (p < pp) {
p = 0;
r = 0;
setvar_i(V[ERRNO], errno);
setvar_i(intvar[ERRNO], errno);
}
b[p] = '\0';
@ -1712,7 +1723,7 @@ static int awk_getline(rstream *rsm, var *v)
v->type |= VF_USER;
b[so] = c;
c = b[eo]; b[eo] = '\0';
setvar_s(V[RT], b+so);
setvar_s(intvar[RT], b+so);
b[eo] = c;
}
@ -1782,12 +1793,10 @@ static char *awk_printf(node *n)
if (c == 'c' || !c) {
i += sprintf(b+i, s, is_numeric(arg) ?
(char)getvar_i(arg) : *getvar_s(arg));
} else if (c == 's') {
s1 = getvar_s(arg);
qrealloc(&b, incr+i+strlen(s1), &bsize);
i += sprintf(b+i, s, s1);
} else {
i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
}
@ -1820,8 +1829,8 @@ static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int
regex_t sreg, *re;
re = as_regex(rn, &sreg);
if (! src) src = V[F0];
if (! dest) dest = V[F0];
if (! src) src = intvar[F0];
if (! dest) dest = intvar[F0];
i = di = 0;
sp = getvar_s(src);
@ -1940,7 +1949,8 @@ static var *exec_builtin(node *op, var *res)
case B_ss:
l = strlen(as[0]);
i = getvar_i(av[1]) - 1;
if (i>l) i=l; if (i<0) i=0;
if (i > l) i = l;
if (i < 0) i = 0;
n = (nargs > 2) ? getvar_i(av[2]) : l-i;
if (n < 0) n = 0;
s = xmalloc(n+1);
@ -2168,22 +2178,22 @@ static var *evaluate(node *op, var *res)
if ((opinfo & OPCLSMASK) == OC_PRINT) {
if (! op1) {
fputs(getvar_s(V[F0]), X.F);
fputs(getvar_s(intvar[F0]), X.F);
} else {
while (op1) {
L.v = evaluate(nextarg(&op1), v1);
if (L.v->type & VF_NUMBER) {
fmt_num(buf, MAXVARFMT, getvar_s(V[OFMT]),
fmt_num(buf, MAXVARFMT, getvar_s(intvar[OFMT]),
getvar_i(L.v), TRUE);
fputs(buf, X.F);
} else {
fputs(getvar_s(L.v), X.F);
}
if (op1) fputs(getvar_s(V[OFS]), X.F);
if (op1) fputs(getvar_s(intvar[OFS]), X.F);
}
}
fputs(getvar_s(V[ORS]), X.F);
fputs(getvar_s(intvar[ORS]), X.F);
} else { /* OC_PRINTF */
L.s = awk_printf(op1);
@ -2235,7 +2245,7 @@ static var *evaluate(node *op, var *res)
case XC( OC_VAR ):
L.v = op->l.v;
if (L.v == V[NF])
if (L.v == intvar[NF])
split_f0();
goto v_cont;
@ -2251,7 +2261,7 @@ static var *evaluate(node *op, var *res)
case XC( OC_REGEXP ):
op1 = op;
L.s = getvar_s(V[F0]);
L.s = getvar_s(intvar[F0]);
goto re_cont;
case XC( OC_MATCH ):
@ -2322,19 +2332,19 @@ static var *evaluate(node *op, var *res)
}
if (!X.rsm->F) {
setvar_i(V[ERRNO], errno);
setvar_i(intvar[ERRNO], errno);
setvar_i(res, -1);
break;
}
if (!op->r.n)
R.v = V[F0];
R.v = intvar[F0];
L.i = awk_getline(X.rsm, R.v);
if (L.i > 0) {
if (!op1) {
incvar(V[FNR]);
incvar(V[NR]);
incvar(intvar[FNR]);
incvar(intvar[NR]);
}
}
setvar_i(res, L.i);
@ -2392,7 +2402,7 @@ static var *evaluate(node *op, var *res)
case F_le:
if (!op1)
L.s = getvar_s(V[F0]);
L.s = getvar_s(intvar[F0]);
R.d = strlen(L.s);
break;
@ -2423,7 +2433,7 @@ static var *evaluate(node *op, var *res)
hash_remove(fdhash, L.s);
}
if (R.i != 0)
setvar_i(V[ERRNO], errno);
setvar_i(intvar[ERRNO], errno);
R.d = (double)R.i;
break;
}
@ -2469,12 +2479,11 @@ static var *evaluate(node *op, var *res)
case XC( OC_FIELD ):
R.i = (int)getvar_i(R.v);
if (R.i == 0) {
res = V[F0];
res = intvar[F0];
} else {
split_f0();
if (R.i > nfields)
fsrealloc(R.i);
res = &Fields[R.i - 1];
}
break;
@ -2486,7 +2495,7 @@ static var *evaluate(node *op, var *res)
X.s = xmalloc(opn);
strcpy(X.s, L.s);
if ((opinfo & OPCLSMASK) == OC_COMMA) {
L.s = getvar_s(V[SUBSEP]);
L.s = getvar_s(intvar[SUBSEP]);
X.s = xrealloc(X.s, opn + strlen(L.s));
strcat(X.s, L.s);
}
@ -2627,30 +2636,31 @@ static int is_assignment(const char *expr)
static rstream *next_input_file(void)
{
static rstream rsm;
static int files_happen = FALSE;
FILE *F = NULL;
const char *fname, *ind;
static int files_happen = FALSE;
if (rsm.F) fclose(rsm.F);
rsm.F = NULL;
rsm.pos = rsm.adv = 0;
do {
if (getvar_i(V[ARGIND])+1 >= getvar_i(V[ARGC])) {
if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
if (files_happen)
return NULL;
fname = "-";
F = stdin;
} else {
ind = getvar_s(incvar(V[ARGIND]));
fname = getvar_s(findvar(iamarray(V[ARGV]), ind));
ind = getvar_s(incvar(intvar[ARGIND]));
fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
if (fname && *fname && !is_assignment(fname))
F = afopen(fname, "r");
}
} while (!F);
files_happen = TRUE;
setvar_s(V[FILENAME], fname);
setvar_s(intvar[FILENAME], fname);
rsm.F = F;
return &rsm;
}
@ -2685,7 +2695,7 @@ int awk_main(int argc, char **argv)
/* initialize variables */
for (i = 0; *vnames; i++) {
V[i] = v = newvar(nextword(&vnames));
intvar[i] = v = newvar(nextword(&vnames));
if (*vvalues != '\377')
setvar_s(v, nextword(&vvalues));
else
@ -2697,8 +2707,8 @@ int awk_main(int argc, char **argv)
}
}
handle_special(V[FS]);
handle_special(V[RS]);
handle_special(intvar[FS]);
handle_special(intvar[RS]);
newfile("/dev/stdin")->F = stdin;
newfile("/dev/stdout")->F = stdout;
@ -2710,7 +2720,7 @@ int awk_main(int argc, char **argv)
char *s1 = strchr(s, '=');
if (s1) {
*s1++ = '\0';
setvar_u(findvar(iamarray(V[ENVIRON]), s), s1);
setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1);
}
free(s);
}
@ -2718,7 +2728,8 @@ int awk_main(int argc, char **argv)
opt = getopt32(argc, argv, "F:v:f:W:", &opt_F, &opt_v, &programname, &opt_W);
argv += optind;
argc -= optind;
if (opt & 0x1) setvar_s(V[FS], opt_F); // -F
if (opt & 0x1)
setvar_s(intvar[FS], opt_F); // -F
while (opt_v) { /* -v */
if (!is_assignment(llist_pop(&opt_v)))
bb_show_usage();
@ -2753,11 +2764,11 @@ int awk_main(int argc, char **argv)
bb_error_msg("warning: unrecognized option '-W %s' ignored", opt_W);
/* fill in ARGV array */
setvar_i(V[ARGC], argc + 1);
setari_u(V[ARGV], 0, "awk");
setvar_i(intvar[ARGC], argc + 1);
setari_u(intvar[ARGV], 0, "awk");
i = 0;
while (*argv)
setari_u(V[ARGV], ++i, *argv++);
setari_u(intvar[ARGV], ++i, *argv++);
evaluate(beginseq.first, &tv);
if (!mainseq.first && !endseq.first)
@ -2769,12 +2780,12 @@ int awk_main(int argc, char **argv)
/* passing through input files */
while (iF) {
nextfile = FALSE;
setvar_i(V[FNR], 0);
setvar_i(intvar[FNR], 0);
while ((i = awk_getline(iF, V[F0])) > 0) {
while ((i = awk_getline(iF, intvar[F0])) > 0) {
nextrec = FALSE;
incvar(V[NR]);
incvar(V[FNR]);
incvar(intvar[NR]);
incvar(intvar[FNR]);
evaluate(mainseq.first, &tv);
if (nextfile)