awk: fix a SEGV

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
This commit is contained in:
Denys Vlasenko 2011-09-11 00:30:56 +02:00
parent d48fdde370
commit 7b46d11582
2 changed files with 65 additions and 9 deletions

View File

@ -25,6 +25,7 @@
* to perform debug printfs to stderr: */ * to perform debug printfs to stderr: */
#define debug_printf_walker(...) do {} while (0) #define debug_printf_walker(...) do {} while (0)
#define debug_printf_eval(...) do {} while (0) #define debug_printf_eval(...) do {} while (0)
#define debug_printf_parse(...) do {} while (0)
#ifndef debug_printf_walker #ifndef debug_printf_walker
# define debug_printf_walker(...) (fprintf(stderr, __VA_ARGS__)) # define debug_printf_walker(...) (fprintf(stderr, __VA_ARGS__))
@ -32,6 +33,9 @@
#ifndef debug_printf_eval #ifndef debug_printf_eval
# define debug_printf_eval(...) (fprintf(stderr, __VA_ARGS__)) # define debug_printf_eval(...) (fprintf(stderr, __VA_ARGS__))
#endif #endif
#ifndef debug_printf_parse
# define debug_printf_parse(...) (fprintf(stderr, __VA_ARGS__))
#endif
@ -435,13 +439,13 @@ struct globals {
smallint nextrec; smallint nextrec;
smallint nextfile; smallint nextfile;
smallint is_f0_split; smallint is_f0_split;
smallint t_rollback;
}; };
struct globals2 { struct globals2 {
uint32_t t_info; /* often used */ uint32_t t_info; /* often used */
uint32_t t_tclass; uint32_t t_tclass;
char *t_string; char *t_string;
int t_lineno; int t_lineno;
int t_rollback;
var *intvar[NUM_INTERNAL_VARS]; /* often used */ var *intvar[NUM_INTERNAL_VARS]; /* often used */
@ -499,11 +503,11 @@ struct globals2 {
#define nextrec (G1.nextrec ) #define nextrec (G1.nextrec )
#define nextfile (G1.nextfile ) #define nextfile (G1.nextfile )
#define is_f0_split (G1.is_f0_split ) #define is_f0_split (G1.is_f0_split )
#define t_rollback (G1.t_rollback )
#define t_info (G.t_info ) #define t_info (G.t_info )
#define t_tclass (G.t_tclass ) #define t_tclass (G.t_tclass )
#define t_string (G.t_string ) #define t_string (G.t_string )
#define t_lineno (G.t_lineno ) #define t_lineno (G.t_lineno )
#define t_rollback (G.t_rollback )
#define intvar (G.intvar ) #define intvar (G.intvar )
#define fsplitter (G.fsplitter ) #define fsplitter (G.fsplitter )
#define rsplitter (G.rsplitter ) #define rsplitter (G.rsplitter )
@ -1011,6 +1015,7 @@ static uint32_t next_token(uint32_t expected)
if (*p == '\0') { if (*p == '\0') {
tc = TC_EOF; tc = TC_EOF;
debug_printf_parse("%s: token found: TC_EOF\n", __func__);
} else if (*p == '\"') { } else if (*p == '\"') {
/* it's a string */ /* it's a string */
@ -1026,6 +1031,7 @@ static uint32_t next_token(uint32_t expected)
p++; p++;
*s = '\0'; *s = '\0';
tc = TC_STRING; tc = TC_STRING;
debug_printf_parse("%s: token found:'%s' TC_STRING\n", __func__, t_string);
} else if ((expected & TC_REGEXP) && *p == '/') { } else if ((expected & TC_REGEXP) && *p == '/') {
/* it's regexp */ /* it's regexp */
@ -1048,6 +1054,7 @@ static uint32_t next_token(uint32_t expected)
p++; p++;
*s = '\0'; *s = '\0';
tc = TC_REGEXP; tc = TC_REGEXP;
debug_printf_parse("%s: token found:'%s' TC_REGEXP\n", __func__, t_string);
} else if (*p == '.' || isdigit(*p)) { } else if (*p == '.' || isdigit(*p)) {
/* it's a number */ /* it's a number */
@ -1057,6 +1064,7 @@ static uint32_t next_token(uint32_t expected)
if (*p == '.') if (*p == '.')
syntax_error(EMSG_UNEXP_TOKEN); syntax_error(EMSG_UNEXP_TOKEN);
tc = TC_NUMBER; tc = TC_NUMBER;
debug_printf_parse("%s: token found:%f TC_NUMBER\n", __func__, t_double);
} else { } else {
/* search for something known */ /* search for something known */
@ -1079,6 +1087,7 @@ static uint32_t next_token(uint32_t expected)
) { ) {
/* then this is what we are looking for */ /* then this is what we are looking for */
t_info = *ti; t_info = *ti;
debug_printf_parse("%s: token found:'%.*s' t_info:%x\n", __func__, l, p, t_info);
p += l; p += l;
goto token_found; goto token_found;
} }
@ -1102,14 +1111,17 @@ static uint32_t next_token(uint32_t expected)
p = skip_spaces(p); p = skip_spaces(p);
if (*p == '(') { if (*p == '(') {
tc = TC_FUNCTION; tc = TC_FUNCTION;
debug_printf_parse("%s: token found:'%s' TC_FUNCTION\n", __func__, t_string);
} else { } else {
if (*p == '[') { if (*p == '[') {
p++; p++;
tc = TC_ARRAY; tc = TC_ARRAY;
} debug_printf_parse("%s: token found:'%s' TC_ARRAY\n", __func__, t_string);
} else
debug_printf_parse("%s: token found:'%s' TC_VARIABLE\n", __func__, t_string);
} }
token_found: ;
} }
token_found:
g_pos = p; g_pos = p;
/* skipping newlines in some cases */ /* skipping newlines in some cases */
@ -1181,6 +1193,8 @@ static node *parse_expr(uint32_t iexp)
uint32_t tc, xtc; uint32_t tc, xtc;
var *v; var *v;
debug_printf_parse("%s(%x)\n", __func__, iexp);
sn.info = PRIMASK; sn.info = PRIMASK;
sn.r.n = glptr = NULL; sn.r.n = glptr = NULL;
xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp; xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
@ -1189,12 +1203,14 @@ static node *parse_expr(uint32_t iexp)
if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) { if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) {
/* input redirection (<) attached to glptr node */ /* input redirection (<) attached to glptr node */
debug_printf_parse("%s: input redir\n", __func__);
cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37)); cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
cn->a.n = glptr; cn->a.n = glptr;
xtc = TC_OPERAND | TC_UOPPRE; xtc = TC_OPERAND | TC_UOPPRE;
glptr = NULL; glptr = NULL;
} else if (tc & (TC_BINOP | TC_UOPPOST)) { } else if (tc & (TC_BINOP | TC_UOPPOST)) {
debug_printf_parse("%s: TC_BINOP | TC_UOPPOST\n", __func__);
/* for binary and postfix-unary operators, jump back over /* for binary and postfix-unary operators, jump back over
* previous operators with higher priority */ * previous operators with higher priority */
vn = cn; vn = cn;
@ -1224,6 +1240,7 @@ static node *parse_expr(uint32_t iexp)
vn->a.n = cn; vn->a.n = cn;
} else { } else {
debug_printf_parse("%s: other\n", __func__);
/* for operands and prefix-unary operators, attach them /* for operands and prefix-unary operators, attach them
* to last node */ * to last node */
vn = cn; vn = cn;
@ -1231,12 +1248,14 @@ static node *parse_expr(uint32_t iexp)
cn->a.n = vn; cn->a.n = vn;
xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP; xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
if (tc & (TC_OPERAND | TC_REGEXP)) { if (tc & (TC_OPERAND | TC_REGEXP)) {
debug_printf_parse("%s: TC_OPERAND | TC_REGEXP\n", __func__);
xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp; xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
/* one should be very careful with switch on tclass - /* one should be very careful with switch on tclass -
* only simple tclasses should be used! */ * only simple tclasses should be used! */
switch (tc) { switch (tc) {
case TC_VARIABLE: case TC_VARIABLE:
case TC_ARRAY: case TC_ARRAY:
debug_printf_parse("%s: TC_VARIABLE | TC_ARRAY\n", __func__);
cn->info = OC_VAR; cn->info = OC_VAR;
v = hash_search(ahash, t_string); v = hash_search(ahash, t_string);
if (v != NULL) { if (v != NULL) {
@ -1253,6 +1272,7 @@ static node *parse_expr(uint32_t iexp)
case TC_NUMBER: case TC_NUMBER:
case TC_STRING: case TC_STRING:
debug_printf_parse("%s: TC_NUMBER | TC_STRING\n", __func__);
cn->info = OC_VAR; cn->info = OC_VAR;
v = cn->l.v = xzalloc(sizeof(var)); v = cn->l.v = xzalloc(sizeof(var));
if (tc & TC_NUMBER) if (tc & TC_NUMBER)
@ -1262,32 +1282,41 @@ static node *parse_expr(uint32_t iexp)
break; break;
case TC_REGEXP: case TC_REGEXP:
debug_printf_parse("%s: TC_REGEXP\n", __func__);
mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2)); mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
break; break;
case TC_FUNCTION: case TC_FUNCTION:
debug_printf_parse("%s: TC_FUNCTION\n", __func__);
cn->info = OC_FUNC; cn->info = OC_FUNC;
cn->r.f = newfunc(t_string); cn->r.f = newfunc(t_string);
cn->l.n = condition(); cn->l.n = condition();
break; break;
case TC_SEQSTART: case TC_SEQSTART:
debug_printf_parse("%s: TC_SEQSTART\n", __func__);
cn = vn->r.n = parse_expr(TC_SEQTERM); cn = vn->r.n = parse_expr(TC_SEQTERM);
if (!cn)
syntax_error("Empty sequence");
cn->a.n = vn; cn->a.n = vn;
break; break;
case TC_GETLINE: case TC_GETLINE:
debug_printf_parse("%s: TC_GETLINE\n", __func__);
glptr = cn; glptr = cn;
xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp; xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
break; break;
case TC_BUILTIN: case TC_BUILTIN:
debug_printf_parse("%s: TC_BUILTIN\n", __func__);
cn->l.n = condition(); cn->l.n = condition();
break; break;
} }
} }
} }
} }
debug_printf_parse("%s() returns %p\n", __func__, sn.r.n);
return sn.r.n; return sn.r.n;
} }
@ -1356,18 +1385,25 @@ static void chain_group(void)
} while (c & TC_NEWLINE); } while (c & TC_NEWLINE);
if (c & TC_GRPSTART) { if (c & TC_GRPSTART) {
debug_printf_parse("%s: TC_GRPSTART\n", __func__);
while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) { while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
debug_printf_parse("%s: !TC_GRPTERM\n", __func__);
if (t_tclass & TC_NEWLINE) if (t_tclass & TC_NEWLINE)
continue; continue;
rollback_token(); rollback_token();
chain_group(); chain_group();
} }
debug_printf_parse("%s: TC_GRPTERM\n", __func__);
} else if (c & (TC_OPSEQ | TC_OPTERM)) { } else if (c & (TC_OPSEQ | TC_OPTERM)) {
debug_printf_parse("%s: TC_OPSEQ | TC_OPTERM\n", __func__);
rollback_token(); rollback_token();
chain_expr(OC_EXEC | Vx); chain_expr(OC_EXEC | Vx);
} else { /* TC_STATEMNT */ } else {
/* TC_STATEMNT */
debug_printf_parse("%s: TC_STATEMNT(?)\n", __func__);
switch (t_info & OPCLSMASK) { switch (t_info & OPCLSMASK) {
case ST_IF: case ST_IF:
debug_printf_parse("%s: ST_IF\n", __func__);
n = chain_node(OC_BR | Vx); n = chain_node(OC_BR | Vx);
n->l.n = condition(); n->l.n = condition();
chain_group(); chain_group();
@ -1382,12 +1418,14 @@ static void chain_group(void)
break; break;
case ST_WHILE: case ST_WHILE:
debug_printf_parse("%s: ST_WHILE\n", __func__);
n2 = condition(); n2 = condition();
n = chain_loop(NULL); n = chain_loop(NULL);
n->l.n = n2; n->l.n = n2;
break; break;
case ST_DO: case ST_DO:
debug_printf_parse("%s: ST_DO\n", __func__);
n2 = chain_node(OC_EXEC); n2 = chain_node(OC_EXEC);
n = chain_loop(NULL); n = chain_loop(NULL);
n2->a.n = n->a.n; n2->a.n = n->a.n;
@ -1396,6 +1434,7 @@ static void chain_group(void)
break; break;
case ST_FOR: case ST_FOR:
debug_printf_parse("%s: ST_FOR\n", __func__);
next_token(TC_SEQSTART); next_token(TC_SEQSTART);
n2 = parse_expr(TC_SEMICOL | TC_SEQTERM); n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
if (t_tclass & TC_SEQTERM) { /* for-in */ if (t_tclass & TC_SEQTERM) { /* for-in */
@ -1421,6 +1460,7 @@ static void chain_group(void)
case OC_PRINT: case OC_PRINT:
case OC_PRINTF: case OC_PRINTF:
debug_printf_parse("%s: OC_PRINT[F]\n", __func__);
n = chain_node(t_info); n = chain_node(t_info);
n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM); n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
if (t_tclass & TC_OUTRDR) { if (t_tclass & TC_OUTRDR) {
@ -1432,17 +1472,20 @@ static void chain_group(void)
break; break;
case OC_BREAK: case OC_BREAK:
debug_printf_parse("%s: OC_BREAK\n", __func__);
n = chain_node(OC_EXEC); n = chain_node(OC_EXEC);
n->a.n = break_ptr; n->a.n = break_ptr;
break; break;
case OC_CONTINUE: case OC_CONTINUE:
debug_printf_parse("%s: OC_CONTINUE\n", __func__);
n = chain_node(OC_EXEC); n = chain_node(OC_EXEC);
n->a.n = continue_ptr; n->a.n = continue_ptr;
break; break;
/* delete, next, nextfile, return, exit */ /* delete, next, nextfile, return, exit */
default: default:
debug_printf_parse("%s: default\n", __func__);
chain_expr(t_info); chain_expr(t_info);
} }
} }
@ -1460,19 +1503,24 @@ static void parse_program(char *p)
while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART | while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) { TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
if (tclass & TC_OPTERM) if (tclass & TC_OPTERM) {
debug_printf_parse("%s: TC_OPTERM\n", __func__);
continue; continue;
}
seq = &mainseq; seq = &mainseq;
if (tclass & TC_BEGIN) { if (tclass & TC_BEGIN) {
debug_printf_parse("%s: TC_BEGIN\n", __func__);
seq = &beginseq; seq = &beginseq;
chain_group(); chain_group();
} else if (tclass & TC_END) { } else if (tclass & TC_END) {
debug_printf_parse("%s: TC_END\n", __func__);
seq = &endseq; seq = &endseq;
chain_group(); chain_group();
} else if (tclass & TC_FUNCDECL) { } else if (tclass & TC_FUNCDECL) {
debug_printf_parse("%s: TC_FUNCDECL\n", __func__);
next_token(TC_FUNCTION); next_token(TC_FUNCTION);
g_pos++; g_pos++;
f = newfunc(t_string); f = newfunc(t_string);
@ -1490,22 +1538,27 @@ static void parse_program(char *p)
clear_array(ahash); clear_array(ahash);
} else if (tclass & TC_OPSEQ) { } else if (tclass & TC_OPSEQ) {
debug_printf_parse("%s: TC_OPSEQ\n", __func__);
rollback_token(); rollback_token();
cn = chain_node(OC_TEST); cn = chain_node(OC_TEST);
cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART); cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
if (t_tclass & TC_GRPSTART) { if (t_tclass & TC_GRPSTART) {
debug_printf_parse("%s: TC_GRPSTART\n", __func__);
rollback_token(); rollback_token();
chain_group(); chain_group();
} else { } else {
debug_printf_parse("%s: !TC_GRPSTART\n", __func__);
chain_node(OC_PRINT); chain_node(OC_PRINT);
} }
cn->r.n = mainseq.last; cn->r.n = mainseq.last;
} else /* if (tclass & TC_GRPSTART) */ { } else /* if (tclass & TC_GRPSTART) */ {
debug_printf_parse("%s: TC_GRPSTART(?)\n", __func__);
rollback_token(); rollback_token();
chain_group(); chain_group();
} }
} }
debug_printf_parse("%s: TC_EOF\n", __func__);
} }
@ -3000,7 +3053,7 @@ int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
int awk_main(int argc, char **argv) int awk_main(int argc, char **argv)
{ {
unsigned opt; unsigned opt;
char *opt_F, *opt_W; char *opt_F;
llist_t *list_v = NULL; llist_t *list_v = NULL;
llist_t *list_f = NULL; llist_t *list_f = NULL;
int i, j; int i, j;
@ -3062,7 +3115,7 @@ int awk_main(int argc, char **argv)
} }
} }
opt_complementary = "v::f::"; /* -v and -f can occur multiple times */ opt_complementary = "v::f::"; /* -v and -f can occur multiple times */
opt = getopt32(argv, "F:v:f:W:", &opt_F, &list_v, &list_f, &opt_W); opt = getopt32(argv, "F:v:f:W:", &opt_F, &list_v, &list_f, NULL);
argv += optind; argv += optind;
argc -= optind; argc -= optind;
if (opt & 0x1) if (opt & 0x1)
@ -3096,7 +3149,7 @@ int awk_main(int argc, char **argv)
parse_program(*argv++); parse_program(*argv++);
} }
if (opt & 0x8) // -W if (opt & 0x8) // -W
bb_error_msg("warning: unrecognized option '-W %s' ignored", opt_W); bb_error_msg("warning: option -W is ignored");
/* fill in ARGV array */ /* fill in ARGV array */
setvar_i(intvar[ARGC], argc); setvar_i(intvar[ARGC], argc);

View File

@ -202,4 +202,7 @@ end d
" \ " \
"" "" "" ""
testing "awk handles empty ()" \
"awk 'BEGIN {print()}' 2>&1" "awk: cmd. line:1: Empty sequence\n" "" ""
exit $FAILCOUNT exit $FAILCOUNT