From e998c7c032458a05a7afcc13ce0dc980b99ecc6c Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sun, 23 Jan 2022 18:48:49 +0100 Subject: [PATCH] sed: fix handling of escaped delimiters in s/// search pattern, closes 14541 function old new delta copy_parsing_escapes 67 96 +29 parse_regex_delim 109 111 +2 get_address 213 215 +2 add_cmd 1176 1178 +2 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 4/0 up/down: 35/0) Total: 35 bytes Signed-off-by: Denys Vlasenko --- editors/sed.c | 19 +++++++++++-------- testsuite/sed.tests | 10 ++++++++++ 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/editors/sed.c b/editors/sed.c index 48b0dbf67..02a527b4a 100644 --- a/editors/sed.c +++ b/editors/sed.c @@ -246,7 +246,6 @@ static void cleanup_outname(void) } /* strcpy, replacing "\from" with 'to'. If to is NUL, replacing "\any" with 'any' */ - static unsigned parse_escapes(char *dest, const char *string, int len, char from, char to) { char *d = dest; @@ -276,7 +275,7 @@ static unsigned parse_escapes(char *dest, const char *string, int len, char from return d - dest; } -static char *copy_parsing_escapes(const char *string, int len) +static char *copy_parsing_escapes(const char *string, int len, char delim) { const char *s; char *dest = xmalloc(len + 1); @@ -287,10 +286,15 @@ static char *copy_parsing_escapes(const char *string, int len) len = parse_escapes(dest, string, len, s[1], s[0]); string = dest; } + if (delim) { + /* we additionally unescape any instances of escaped delimiter. + * For example, in 's+9\++X+' the pattern is "9+", not "9\+". + */ + len = parse_escapes(dest, string, len, delim, delim); + } return dest; } - /* * index_of_next_unescaped_regexp_delim - walks left to right through a string * beginning at a specified index and returns the index of the next regular @@ -347,12 +351,11 @@ static int parse_regex_delim(const char *cmdstr, char **match, char **replace) /* save the match string */ idx = index_of_next_unescaped_regexp_delim(delimiter, cmdstr_ptr); - *match = copy_parsing_escapes(cmdstr_ptr, idx); - + *match = copy_parsing_escapes(cmdstr_ptr, idx, delimiter); /* save the replacement string */ cmdstr_ptr += idx + 1; idx = index_of_next_unescaped_regexp_delim(- (int)delimiter, cmdstr_ptr); - *replace = copy_parsing_escapes(cmdstr_ptr, idx); + *replace = copy_parsing_escapes(cmdstr_ptr, idx, 0); return ((cmdstr_ptr - cmdstr) + idx); } @@ -380,7 +383,7 @@ static int get_address(const char *my_str, int *linenum, regex_t ** regex) delimiter = *++pos; next = index_of_next_unescaped_regexp_delim(delimiter, ++pos); if (next != 0) { - temp = copy_parsing_escapes(pos, next); + temp = copy_parsing_escapes(pos, next, 0); G.previous_regex_ptr = *regex = xzalloc(sizeof(regex_t)); xregcomp(*regex, temp, G.regex_type); free(temp); @@ -575,7 +578,7 @@ static const char *parse_cmd_args(sed_cmd_t *sed_cmd, const char *cmdstr) cmdstr++; } len = strlen(cmdstr); - sed_cmd->string = copy_parsing_escapes(cmdstr, len); + sed_cmd->string = copy_parsing_escapes(cmdstr, len, 0); cmdstr += len; /* "\anychar" -> "anychar" */ parse_escapes(sed_cmd->string, sed_cmd->string, -1, '\0', '\0'); diff --git a/testsuite/sed.tests b/testsuite/sed.tests index e62b839f7..440996a21 100755 --- a/testsuite/sed.tests +++ b/testsuite/sed.tests @@ -324,6 +324,16 @@ testing "sed zero chars match/replace logic must not falsely trigger here 2" \ "sed 's/ *$/_/g'" \ "qwerty_\n" "" "qwerty\n" +# the pattern here is interpreted as "9+", not as "9\+" +testing "sed special char as s/// delimiter, in pattern" \ + "sed 's+9\++X+'" \ + "X8=17\n" "" "9+8=17\n" + +# but in replacement string, "\&" remains "\&", not interpreted as "&" +testing "sed special char as s/// delimiter, in replacement" \ + "sed 's&9&X\&&'" \ + "X&+8=17\n" "" "9+8=17\n" + testing "sed /\$_in_regex/ should not match newlines, only end-of-line" \ "sed ': testcont; /\\\\$/{ =; N; b testcont }'" \ "\