vi: allow regular expressions in ':s' commands

BusyBox vi has never supported the use of regular expressions in
search/replace (':s') commands.  Implement this using GNU regex
when VI_REGEX_SEARCH is enabled.

The implementation:

- uses basic regular expressions, to match those used in the search
  command;

- only supports substitution of back references ('\0' - '\9') in the
  replacement string.  Any other character following a backslash is
  treated as that literal character.

VI_REGEX_SEARCH isn't enabled in the default build.  In that case:

function                                             old     new   delta
colon                                               4036    4033      -3
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 0/1 up/down: 0/-3)               Total: -3 bytes

When VI_REGEX_SEARCH is enabled:

function                                             old     new   delta
colon                                               4036    4378    +342
.rodata                                           108207  108229     +22
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 2/0 up/down: 364/0)             Total: 364 bytes

v2: Rebase.  Code shrink.  Ensure empty replacement string is null terminated.

Signed-off-by: Andrey Dobrovolsky <andrey.dobrovolsky.odessa@gmail.com>
Signed-off-by: Ron Yorston <rmy@pobox.com>
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
This commit is contained in:
Denys Vlasenko 2021-07-13 14:38:20 +02:00
parent c76c78740a
commit 95ac4a48f1

View File

@ -2677,6 +2677,59 @@ static char *expand_args(char *args)
# endif # endif
#endif /* FEATURE_VI_COLON */ #endif /* FEATURE_VI_COLON */
#if ENABLE_FEATURE_VI_REGEX_SEARCH
# define MAX_SUBPATTERN 10 // subpatterns \0 .. \9
// If the return value is not NULL the caller should free R
static char *regex_search(char *q, regex_t *preg, const char *Rorig,
size_t *len_F, size_t *len_R, char **R)
{
regmatch_t regmatch[MAX_SUBPATTERN], *cur_match;
char *found = NULL;
const char *t;
char *r;
regmatch[0].rm_so = 0;
regmatch[0].rm_eo = end_line(q) - q;
if (regexec(preg, q, MAX_SUBPATTERN, regmatch, REG_STARTEND) != 0)
return found;
found = q + regmatch[0].rm_so;
*len_F = regmatch[0].rm_eo - regmatch[0].rm_so;
*R = NULL;
fill_result:
// first pass calculates len_R, second fills R
*len_R = 0;
for (t = Rorig, r = *R; *t; t++) {
size_t len = 1; // default is to copy one char from replace pattern
const char *from = t;
if (*t == '\\') {
from = ++t; // skip backslash
if (*t >= '0' && *t < '0' + MAX_SUBPATTERN) {
cur_match = regmatch + (*t - '0');
if (cur_match->rm_so >= 0) {
len = cur_match->rm_eo - cur_match->rm_so;
from = q + cur_match->rm_so;
}
}
}
*len_R += len;
if (*R) {
memcpy(r, from, len);
r += len;
/* *r = '\0'; - xzalloc did it */
}
}
if (*R == NULL) {
*R = xzalloc(*len_R + 1);
goto fill_result;
}
return found;
}
#endif /* ENABLE_FEATURE_VI_REGEX_SEARCH */
// buf must be no longer than MAX_INPUT_LEN! // buf must be no longer than MAX_INPUT_LEN!
static void colon(char *buf) static void colon(char *buf)
{ {
@ -3083,6 +3136,14 @@ static void colon(char *buf)
int subs = 0; // number of substitutions int subs = 0; // number of substitutions
# if ENABLE_FEATURE_VI_VERBOSE_STATUS # if ENABLE_FEATURE_VI_VERBOSE_STATUS
int last_line = 0, lines = 0; int last_line = 0, lines = 0;
# endif
# if ENABLE_FEATURE_VI_REGEX_SEARCH
regex_t preg;
int cflags;
char *Rorig;
# if ENABLE_FEATURE_VI_UNDO
int undo = 0;
# endif
# endif # endif
// F points to the "find" pattern // F points to the "find" pattern
@ -3100,7 +3161,6 @@ static void colon(char *buf)
*flags++ = '\0'; // terminate "replace" *flags++ = '\0'; // terminate "replace"
gflag = *flags; gflag = *flags;
} }
len_R = strlen(R);
if (len_F) { // save "find" as last search pattern if (len_F) { // save "find" as last search pattern
free(last_search_pattern); free(last_search_pattern);
@ -3122,31 +3182,68 @@ static void colon(char *buf)
b = e; b = e;
} }
# if ENABLE_FEATURE_VI_REGEX_SEARCH
Rorig = R;
cflags = 0;
if (ignorecase)
cflags = REG_ICASE;
memset(&preg, 0, sizeof(preg));
if (regcomp(&preg, F, cflags) != 0) {
status_line(":s bad search pattern");
goto regex_search_end;
}
# else
len_R = strlen(R);
# endif
for (i = b; i <= e; i++) { // so, :20,23 s \0 find \0 replace \0 for (i = b; i <= e; i++) { // so, :20,23 s \0 find \0 replace \0
char *ls = q; // orig line start char *ls = q; // orig line start
char *found; char *found;
vc4: vc4:
# if ENABLE_FEATURE_VI_REGEX_SEARCH
found = regex_search(q, &preg, Rorig, &len_F, &len_R, &R);
# else
found = char_search(q, F, (FORWARD << 1) | LIMITED); // search cur line only for "find" found = char_search(q, F, (FORWARD << 1) | LIMITED); // search cur line only for "find"
# endif
if (found) { if (found) {
uintptr_t bias; uintptr_t bias;
// we found the "find" pattern - delete it // we found the "find" pattern - delete it
// For undo support, the first item should not be chained // For undo support, the first item should not be chained
text_hole_delete(found, found + len_F - 1, // This needs to be handled differently depending on
subs ? ALLOW_UNDO_CHAIN: ALLOW_UNDO); // whether or not regex support is enabled.
// can't do this above, no undo => no third argument # if ENABLE_FEATURE_VI_REGEX_SEARCH
subs++; # define TEST_LEN_F len_F // len_F may be zero
# if ENABLE_FEATURE_VI_VERBOSE_STATUS # define TEST_UNDO1 undo++
if (last_line != i) { # define TEST_UNDO2 undo++
last_line = i; # else
++lines; # define TEST_LEN_F 1 // len_F is never zero
} # define TEST_UNDO1 subs
# define TEST_UNDO2 1
# endif # endif
// insert the "replace" patern if (TEST_LEN_F) // match can be empty, no delete needed
bias = string_insert(found, R, ALLOW_UNDO_CHAIN); text_hole_delete(found, found + len_F - 1,
found += bias; TEST_UNDO1 ? ALLOW_UNDO_CHAIN: ALLOW_UNDO);
ls += bias; if (len_R) { // insert the "replace" pattern, if required
dot = ls; bias = string_insert(found, R,
//q += bias; - recalculated anyway TEST_UNDO2 ? ALLOW_UNDO_CHAIN: ALLOW_UNDO);
found += bias;
ls += bias;
dot = ls;
//q += bias; - recalculated anyway
}
# if ENABLE_FEATURE_VI_REGEX_SEARCH
free(R);
# endif
if (TEST_LEN_F || len_R) {
dot = ls;
subs++;
# if ENABLE_FEATURE_VI_VERBOSE_STATUS
if (last_line != i) {
last_line = i;
++lines;
}
# endif
}
// check for "global" :s/foo/bar/g // check for "global" :s/foo/bar/g
if (gflag == 'g') { if (gflag == 'g') {
if ((found + len_R) < end_line(ls)) { if ((found + len_R) < end_line(ls)) {
@ -3166,6 +3263,10 @@ static void colon(char *buf)
status_line("%d substitutions on %d lines", subs, lines); status_line("%d substitutions on %d lines", subs, lines);
# endif # endif
} }
# if ENABLE_FEATURE_VI_REGEX_SEARCH
regex_search_end:
regfree(&preg);
# endif
# endif /* FEATURE_VI_SEARCH */ # endif /* FEATURE_VI_SEARCH */
} else if (strncmp(cmd, "version", i) == 0) { // show software version } else if (strncmp(cmd, "version", i) == 0) { // show software version
status_line(BB_VER); status_line(BB_VER);