awk: make -F STR interpret escape sequences. Closes 5126

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
This commit is contained in:
Denys Vlasenko 2012-06-22 18:41:01 +02:00
parent 440a509849
commit ea664dde87
2 changed files with 24 additions and 7 deletions

View File

@ -696,6 +696,10 @@ static char nextchar(char **s)
pps = *s; pps = *s;
if (c == '\\') if (c == '\\')
c = bb_process_escape_sequence((const char**)s); c = bb_process_escape_sequence((const char**)s);
/* Example awk statement:
* s = "abc\"def"
* we must treat \" as "
*/
if (c == '\\' && *s == pps) { /* unrecognized \z? */ if (c == '\\' && *s == pps) { /* unrecognized \z? */
c = *(*s); /* yes, fetch z */ c = *(*s); /* yes, fetch z */
if (c) if (c)
@ -704,6 +708,15 @@ static char nextchar(char **s)
return c; return c;
} }
/* TODO: merge with strcpy_and_process_escape_sequences()?
*/
static void unescape_string_in_place(char *s1)
{
char *s = s1;
while ((*s1 = nextchar(&s)) != '\0')
s1++;
}
static ALWAYS_INLINE int isalnum_(int c) static ALWAYS_INLINE int isalnum_(int c)
{ {
return (isalnum(c) || c == '_'); return (isalnum(c) || c == '_');
@ -2992,7 +3005,7 @@ static int awk_exit(int r)
* otherwise return 0 */ * otherwise return 0 */
static int is_assignment(const char *expr) static int is_assignment(const char *expr)
{ {
char *exprc, *val, *s, *s1; char *exprc, *val;
if (!isalnum_(*expr) || (val = strchr(expr, '=')) == NULL) { if (!isalnum_(*expr) || (val = strchr(expr, '=')) == NULL) {
return FALSE; return FALSE;
@ -3002,10 +3015,7 @@ static int is_assignment(const char *expr)
val = exprc + (val - expr); val = exprc + (val - expr);
*val++ = '\0'; *val++ = '\0';
s = s1 = val; unescape_string_in_place(val);
while ((*s1 = nextchar(&s)) != '\0')
s1++;
setvar_u(newvar(exprc), val); setvar_u(newvar(exprc), val);
free(exprc); free(exprc);
return TRUE; return TRUE;
@ -3118,8 +3128,10 @@ int awk_main(int argc, char **argv)
opt = getopt32(argv, "F:v:f:W:", &opt_F, &list_v, &list_f, NULL); opt = getopt32(argv, "F:v:f:W:", &opt_F, &list_v, &list_f, NULL);
argv += optind; argv += optind;
argc -= optind; argc -= optind;
if (opt & 0x1) if (opt & 0x1) { /* -F */
setvar_s(intvar[FS], opt_F); // -F unescape_string_in_place(opt_F);
setvar_s(intvar[FS], opt_F);
}
while (list_v) { /* -v */ while (list_v) { /* -v */
if (!is_assignment(llist_pop(&list_v))) if (!is_assignment(llist_pop(&list_v)))
bb_show_usage(); bb_show_usage();

View File

@ -43,6 +43,11 @@ testing "awk long field sep" "awk -F-- '{ print NF, length(\$NF), \$NF }'" \
"" \ "" \
"a--\na--b--\na--b--c--\na--b--c--d--" "a--\na--b--\na--b--c--\na--b--c--d--"
testing "awk -F handles escapes" "awk -F'\\x21' '{print \$1}'" \
"a\n" \
"" \
"a!b\n"
# '@(samp|code|file)\{' is an invalid extended regex (unmatched '{'), # '@(samp|code|file)\{' is an invalid extended regex (unmatched '{'),
# but gawk 3.1.5 does not bail out on it. # but gawk 3.1.5 does not bail out on it.
testing "awk gsub falls back to non-extended-regex" \ testing "awk gsub falls back to non-extended-regex" \