sed: understand \n,\r and \t in i and a commands. Closes 8871
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
This commit is contained in:
parent
4c8fa34417
commit
cbdff15bb7
@ -218,23 +218,33 @@ static void cleanup_outname(void)
|
|||||||
|
|
||||||
/* strcpy, replacing "\from" with 'to'. If to is NUL, replacing "\any" with 'any' */
|
/* strcpy, replacing "\from" with 'to'. If to is NUL, replacing "\any" with 'any' */
|
||||||
|
|
||||||
static void parse_escapes(char *dest, const char *string, int len, char from, char to)
|
static unsigned parse_escapes(char *dest, const char *string, int len, char from, char to)
|
||||||
{
|
{
|
||||||
|
char *d = dest;
|
||||||
int i = 0;
|
int i = 0;
|
||||||
|
|
||||||
|
if (len == -1)
|
||||||
|
len = strlen(string);
|
||||||
|
|
||||||
while (i < len) {
|
while (i < len) {
|
||||||
if (string[i] == '\\') {
|
if (string[i] == '\\') {
|
||||||
if (!to || string[i+1] == from) {
|
if (!to || string[i+1] == from) {
|
||||||
*dest++ = to ? to : string[i+1];
|
if ((*d = to ? to : string[i+1]) == '\0')
|
||||||
|
return d - dest;
|
||||||
i += 2;
|
i += 2;
|
||||||
|
d++;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
*dest++ = string[i++];
|
i++; /* skip backslash in string[] */
|
||||||
|
*d++ = '\\';
|
||||||
|
/* fall through: copy next char verbatim */
|
||||||
}
|
}
|
||||||
/* TODO: is it safe wrt a string with trailing '\\' ? */
|
if ((*d = string[i++]) == '\0')
|
||||||
*dest++ = string[i++];
|
return d - dest;
|
||||||
|
d++;
|
||||||
}
|
}
|
||||||
*dest = '\0';
|
*d = '\0';
|
||||||
|
return d - dest;
|
||||||
}
|
}
|
||||||
|
|
||||||
static char *copy_parsing_escapes(const char *string, int len)
|
static char *copy_parsing_escapes(const char *string, int len)
|
||||||
@ -245,9 +255,8 @@ static char *copy_parsing_escapes(const char *string, int len)
|
|||||||
/* sed recognizes \n */
|
/* sed recognizes \n */
|
||||||
/* GNU sed also recognizes \t and \r */
|
/* GNU sed also recognizes \t and \r */
|
||||||
for (s = "\nn\tt\rr"; *s; s += 2) {
|
for (s = "\nn\tt\rr"; *s; s += 2) {
|
||||||
parse_escapes(dest, string, len, s[1], s[0]);
|
len = parse_escapes(dest, string, len, s[1], s[0]);
|
||||||
string = dest;
|
string = dest;
|
||||||
len = strlen(dest);
|
|
||||||
}
|
}
|
||||||
return dest;
|
return dest;
|
||||||
}
|
}
|
||||||
@ -516,6 +525,8 @@ static const char *parse_cmd_args(sed_cmd_t *sed_cmd, const char *cmdstr)
|
|||||||
}
|
}
|
||||||
/* handle edit cmds: (a)ppend, (i)nsert, and (c)hange */
|
/* handle edit cmds: (a)ppend, (i)nsert, and (c)hange */
|
||||||
else if (idx <= IDX_c) { /* a,i,c */
|
else if (idx <= IDX_c) { /* a,i,c */
|
||||||
|
unsigned len;
|
||||||
|
|
||||||
if (idx < IDX_c) { /* a,i */
|
if (idx < IDX_c) { /* a,i */
|
||||||
if (sed_cmd->end_line || sed_cmd->end_match)
|
if (sed_cmd->end_line || sed_cmd->end_match)
|
||||||
bb_error_msg_and_die("command '%c' uses only one address", sed_cmd->cmd);
|
bb_error_msg_and_die("command '%c' uses only one address", sed_cmd->cmd);
|
||||||
@ -529,10 +540,11 @@ static const char *parse_cmd_args(sed_cmd_t *sed_cmd, const char *cmdstr)
|
|||||||
break;
|
break;
|
||||||
cmdstr++;
|
cmdstr++;
|
||||||
}
|
}
|
||||||
sed_cmd->string = xstrdup(cmdstr);
|
len = strlen(cmdstr);
|
||||||
|
sed_cmd->string = copy_parsing_escapes(cmdstr, len);
|
||||||
|
cmdstr += len;
|
||||||
/* "\anychar" -> "anychar" */
|
/* "\anychar" -> "anychar" */
|
||||||
parse_escapes(sed_cmd->string, sed_cmd->string, strlen(cmdstr), '\0', '\0');
|
parse_escapes(sed_cmd->string, sed_cmd->string, -1, '\0', '\0');
|
||||||
cmdstr += strlen(cmdstr);
|
|
||||||
}
|
}
|
||||||
/* handle file cmds: (r)ead */
|
/* handle file cmds: (r)ead */
|
||||||
else if (idx <= IDX_w) { /* r,w */
|
else if (idx <= IDX_w) { /* r,w */
|
||||||
@ -564,8 +576,8 @@ static const char *parse_cmd_args(sed_cmd_t *sed_cmd, const char *cmdstr)
|
|||||||
|
|
||||||
cmdstr += parse_regex_delim(cmdstr, &match, &replace)+1;
|
cmdstr += parse_regex_delim(cmdstr, &match, &replace)+1;
|
||||||
/* \n already parsed, but \delimiter needs unescaping. */
|
/* \n already parsed, but \delimiter needs unescaping. */
|
||||||
parse_escapes(match, match, strlen(match), i, i);
|
parse_escapes(match, match, -1, i, i);
|
||||||
parse_escapes(replace, replace, strlen(replace), i, i);
|
parse_escapes(replace, replace, -1, i, i);
|
||||||
|
|
||||||
sed_cmd->string = xzalloc((strlen(match) + 1) * 2);
|
sed_cmd->string = xzalloc((strlen(match) + 1) * 2);
|
||||||
for (i = 0; match[i] && replace[i]; i++) {
|
for (i = 0; match[i] && replace[i]; i++) {
|
||||||
|
@ -275,6 +275,24 @@ testing "sed a cmd ended by double backslash" \
|
|||||||
| two \\
|
| two \\
|
||||||
'
|
'
|
||||||
|
|
||||||
|
testing "sed a cmd understands \\n,\\t,\\r" \
|
||||||
|
"sed '/1/a\\\\t\\rzero\\none\\\\ntwo\\\\\\nthree'" \
|
||||||
|
"\
|
||||||
|
line1
|
||||||
|
\t\rzero
|
||||||
|
one\\\\ntwo\\
|
||||||
|
three
|
||||||
|
" "" "line1\n"
|
||||||
|
|
||||||
|
testing "sed i cmd understands \\n,\\t,\\r" \
|
||||||
|
"sed '/1/i\\\\t\\rzero\\none\\\\ntwo\\\\\\nthree'" \
|
||||||
|
"\
|
||||||
|
\t\rzero
|
||||||
|
one\\\\ntwo\\
|
||||||
|
three
|
||||||
|
line1
|
||||||
|
" "" "line1\n"
|
||||||
|
|
||||||
# first three lines are deleted; 4th line is matched and printed by "2,3" and by "4" ranges
|
# first three lines are deleted; 4th line is matched and printed by "2,3" and by "4" ranges
|
||||||
testing "sed with N skipping lines past ranges on next cmds" \
|
testing "sed with N skipping lines past ranges on next cmds" \
|
||||||
"sed -n '1{N;N;d};1p;2,3p;3p;4p'" \
|
"sed -n '1{N;N;d};1p;2,3p;3p;4p'" \
|
||||||
|
Loading…
Reference in New Issue
Block a user