ash: expand: Do not reprocess data when expanding words

Upstream patch:

    Date: Wed, 30 May 2018 02:06:03 +0800
    expand: Do not reprocess data when expanding words

    Currently various paths will reprocess data when performing word
    expansion.  For example, expari will skip backwards looking for
    the start of the arithmetic expansion, while evalvar will skip
    unexpanded words manually.

    This is cumbersome and error-prone.  This patch fixes this by
    making word expansions proceed in a linear fashion.  This means
    changing argstr and the various expansion functions such as expari
    and subevalvar to return the next character to be expanded.

    This is inspired by similar code from FreeBSD.  However, we take
    things one step further and completely remove the manual word
    skipping in evalvar.  This is accomplished by introducing a new
    EXP_DISCARD flag that tells argstr to only parse and not produce
    any actual expansions.

    Incidentally, argstr will now always NUL-terminate the expansion
    unless the EXP_WORD flag is set.  This is because all but one
    caller of argstr wants the result to be NUL-termianted.

    Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

Also includes two one-line follow-up fixes:

    expand: Eat closing brace for length parameter
            if (subtype == VSLENGTH) {
    +               p++;
                    if (flag & EXP_DISCARD)
    expand: Fix double-decrement in argstr
    -               newloc = expdest - (char *)stackblock() - end;
    +               newloc = q - (char *)stackblock() - end;

and changes in code for bash substring extensions.

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
This commit is contained in:
Denys Vlasenko 2020-02-24 10:02:50 +01:00
parent 6cda0b04a3
commit 823318822c

View File

@ -6035,6 +6035,7 @@ static int substr_atoi(const char *s)
#define EXP_WORD 0x40 /* expand word in parameter expansion */
#define EXP_QUOTED 0x100 /* expand word in double quotes */
#define EXP_KEEPNUL 0x200 /* do not skip NUL characters */
#define EXP_DISCARD 0x400 /* discard result of expansion */
/*
* rmescape() flags
@ -6452,13 +6453,15 @@ removerecordregions(int endoff)
}
static char *
exptilde(char *startp, char *p, int flag)
exptilde(char *startp, int flag)
{
unsigned char c;
char *name;
struct passwd *pw;
const char *home;
char *p;
p = startp;
name = p + 1;
while ((c = *++p) != '\0') {
@ -6477,6 +6480,8 @@ exptilde(char *startp, char *p, int flag)
}
}
done:
if (flag & EXP_DISCARD)
goto out;
*p = '\0';
if (*name == '\0') {
home = lookupvar("HOME");
@ -6486,13 +6491,13 @@ exptilde(char *startp, char *p, int flag)
goto lose;
home = pw->pw_dir;
}
*p = c;
if (!home)
goto lose;
*p = c;
strtodest(home, flag | EXP_QUOTED);
out:
return p;
lose:
*p = c;
return startp;
}
@ -6591,6 +6596,9 @@ expbackq(union node *cmd, int flag)
int startloc;
struct stackmark smark;
if (flag & EXP_DISCARD)
goto out;
INT_OFF;
startloc = expdest - (char *)stackblock();
pushstackmark(&smark, startloc);
@ -6632,64 +6640,57 @@ expbackq(union node *cmd, int flag)
(int)((dest - (char *)stackblock()) - startloc),
(int)((dest - (char *)stackblock()) - startloc),
stackblock() + startloc));
out:
argbackq = argbackq->next;
}
/* expari needs it */
static char *argstr(char *p, int flag);
#if ENABLE_FEATURE_SH_MATH
/*
* Expand arithmetic expression. Backup to start of expression,
* evaluate, place result in (backed up) result, adjust string position.
*/
static void
expari(int flag)
static char *
expari(char *start, int flag)
{
char *p, *start;
struct stackmark sm;
int begoff;
int endoff;
int len;
arith_t result;
char *p;
/* ifsfree(); */
p = stackblock();
begoff = expdest - p;
p = argstr(start, flag & EXP_DISCARD);
if (flag & EXP_DISCARD)
goto out;
/*
* This routine is slightly over-complicated for
* efficiency. Next we scan backwards looking for the
* start of arithmetic.
*/
start = stackblock();
p = expdest - 1;
*p = '\0';
p--;
while (1) {
int esc;
while ((unsigned char)*p != CTLARI) {
p--;
#if DEBUG
if (p < start) {
ash_msg_and_raise_error("missing CTLARI (shouldn't happen)");
}
#endif
}
esc = esclen(start, p);
if (!(esc % 2)) {
break;
}
p -= esc + 1;
}
begoff = p - start;
endoff = expdest - start;
start += begoff;
STADJUST(start - expdest, expdest);
removerecordregions(begoff);
expdest = p;
if (flag & QUOTES_ESC)
rmescapes(p + 1, 0, NULL);
rmescapes(start, 0, NULL);
len = cvtnum(ash_arith(p + 1), flag);
pushstackmark(&sm, endoff);
result = ash_arith(start);
popstackmark(&sm);
len = cvtnum(result, flag);
if (!(flag & EXP_QUOTED))
recordregion(begoff, begoff + len, 0);
out:
return p;
}
#endif
@ -6701,7 +6702,7 @@ static char *evalvar(char *p, int flags);
* characters to allow for further processing. Otherwise treat
* $@ like $* since no splitting will be performed.
*/
static void
static char *
argstr(char *p, int flag)
{
static const char spclchars[] ALIGN1 = {
@ -6713,6 +6714,7 @@ argstr(char *p, int flag)
CTLVAR,
CTLBACKQ,
#if ENABLE_FEATURE_SH_MATH
CTLARI,
CTLENDARI,
#endif
'\0'
@ -6723,41 +6725,45 @@ argstr(char *p, int flag)
size_t length;
int startloc;
if (!(flag & EXP_VARTILDE)) {
reject += 2;
} else if (flag & EXP_VARTILDE2) {
reject++;
}
reject += !!(flag & EXP_VARTILDE2);
reject += flag & EXP_VARTILDE ? 0 : 2;
inquotes = 0;
length = 0;
if (flag & EXP_TILDE) {
char *q;
flag &= ~EXP_TILDE;
tilde:
q = p;
if (*q == '~')
p = exptilde(p, q, flag);
if (*p == '~')
p = exptilde(p, flag);
}
start:
startloc = expdest - (char *)stackblock();
for (;;) {
int end;
unsigned char c;
length += strcspn(p + length, reject);
end = 0;
c = p[length];
if (c) {
if (!(c & 0x80)
IF_FEATURE_SH_MATH(|| c == CTLENDARI)
) {
/* c == '=' || c == ':' || c == CTLENDARI */
length++;
}
if (!(c & 0x80)
IF_FEATURE_SH_MATH(|| c == CTLENDARI)
|| c == CTLENDVAR
) {
/*
* c == '=' || c == ':' || c == '\0' ||
* c == CTLENDARI || c == CTLENDVAR
*/
length++;
/* c == '\0' || c == CTLENDARI || c == CTLENDVAR */
end = !!((c - 1) & 0x80);
}
if (length > 0) {
if (length > 0 && !(flag & EXP_DISCARD)) {
int newloc;
expdest = stnputs(p, length, expdest);
newloc = expdest - (char *)stackblock();
char *q;
q = stnputs(p, length, expdest);
q[-1] &= end - 1;
expdest = q - (flag & EXP_WORD ? end : 0);
newloc = q - (char *)stackblock() - end;
if (breakall && !inquotes && newloc > startloc) {
recordregion(startloc, newloc, 0);
}
@ -6766,14 +6772,11 @@ argstr(char *p, int flag)
p += length + 1;
length = 0;
if (end)
break;
switch (c) {
case '\0':
goto breakloop;
case '=':
if (flag & EXP_VARTILDE2) {
p--;
continue;
}
flag |= EXP_VARTILDE2;
reject++;
/* fall through */
@ -6786,11 +6789,6 @@ argstr(char *p, int flag)
goto tilde;
}
continue;
}
switch (c) {
case CTLENDVAR: /* ??? */
goto breakloop;
case CTLQUOTEMARK:
/* "$@" syntax adherence hack */
if (!inquotes && !memcmp(p, dolatstr + 1, DOLATSTRLEN - 1)) {
@ -6816,17 +6814,15 @@ argstr(char *p, int flag)
goto start;
case CTLBACKQ:
expbackq(argbackq->n, flag | inquotes);
argbackq = argbackq->next;
goto start;
#if ENABLE_FEATURE_SH_MATH
case CTLENDARI:
p--;
expari(flag | inquotes);
case CTLARI:
p = expari(p, flag | inquotes);
goto start;
#endif
}
}
breakloop: ;
return p - 1;
}
static char *
@ -6951,25 +6947,27 @@ varunset(const char *end, const char *var, const char *umsg, int varflags)
ash_msg_and_raise_error("%.*s: %s%s", (int)(end - var - 1), var, msg, tail);
}
static const char *
subevalvar(char *p, char *str, int strloc, int subtype,
static char *
subevalvar(char *start, char *str, int strloc,
int startloc, int varflags, int flag)
{
struct nodelist *saveargbackq = argbackq;
int subtype = varflags & VSTYPE;
int quotes = flag & QUOTES_ESC;
char *startp;
char *loc;
char *rmesc, *rmescend;
int amount, resetloc;
long amount;
int resetloc;
int argstr_flags;
IF_BASH_PATTERN_SUBST(int workloc;)
IF_BASH_PATTERN_SUBST(int slash_pos;)
IF_BASH_PATTERN_SUBST(char *repl;)
int zero;
char *(*scan)(char*, char*, char*, char*, int, int);
char *p;
//bb_error_msg("subevalvar(p:'%s',str:'%s',strloc:%d,subtype:%d,startloc:%d,varflags:%x,quotes:%d)",
// p, str, strloc, subtype, startloc, varflags, quotes);
//bb_error_msg("subevalvar(start:'%s',str:'%s',strloc:%d,startloc:%d,varflags:%x,quotes:%d)",
// start, str, strloc, startloc, varflags, quotes);
#if BASH_PATTERN_SUBST
/* For "${v/pattern/repl}", we must find the delimiter _before_
@ -6979,7 +6977,7 @@ subevalvar(char *p, char *str, int strloc, int subtype,
repl = NULL;
if (subtype == VSREPLACE || subtype == VSREPLACEALL) {
/* Find '/' and replace with NUL */
repl = p;
repl = start;
/* The pattern can't be empty.
* IOW: if the first char after "${v//" is a slash,
* it does not terminate the pattern - it's the first char of the pattern:
@ -7004,17 +7002,17 @@ subevalvar(char *p, char *str, int strloc, int subtype,
}
}
#endif
argstr_flags = EXP_TILDE;
if (subtype != VSASSIGN
&& subtype != VSQUESTION
argstr_flags = (flag & EXP_DISCARD) | EXP_TILDE;
if (!str
#if BASH_SUBSTR
&& subtype != VSSUBSTR
#endif
) {
/* EXP_CASE keeps CTLESC's */
argstr_flags = EXP_TILDE | EXP_CASE;
argstr_flags |= EXP_CASE;
}
argstr(p, argstr_flags);
p = argstr(start, argstr_flags);
//bb_error_msg("str0:'%s'", (char *)stackblock() + strloc);
#if BASH_PATTERN_SUBST
slash_pos = -1;
@ -7022,24 +7020,25 @@ subevalvar(char *p, char *str, int strloc, int subtype,
slash_pos = expdest - ((char *)stackblock() + strloc);
STPUTC('/', expdest);
//bb_error_msg("repl+1:'%s'", repl + 1);
argstr(repl + 1, EXP_TILDE); /* EXP_TILDE: echo "${v/x/~}" expands ~ ! */
p = argstr(repl + 1, (flag & EXP_DISCARD) | EXP_TILDE); /* EXP_TILDE: echo "${v/x/~}" expands ~ ! */
*repl = '/';
}
#endif
STPUTC('\0', expdest);
argbackq = saveargbackq;
if (flag & EXP_DISCARD)
return p;
startp = (char *)stackblock() + startloc;
//bb_error_msg("str1:'%s'", (char *)stackblock() + strloc);
switch (subtype) {
case VSASSIGN:
setvar0(str, startp);
amount = startp - expdest;
STADJUST(amount, expdest);
return startp;
loc = startp;
goto out;
case VSQUESTION:
varunset(p, str, startp, varflags);
varunset(start, str, startp, varflags);
/* NOTREACHED */
#if BASH_SUBSTR
@ -7110,9 +7109,7 @@ subevalvar(char *p, char *str, int strloc, int subtype,
*loc++ = *vstr++;
}
*loc = '\0';
amount = loc - expdest;
STADJUST(amount, expdest);
return loc;
goto out;
}
#endif /* BASH_SUBSTR */
}
@ -7178,7 +7175,7 @@ subevalvar(char *p, char *str, int strloc, int subtype,
/* If there's no pattern to match, return the expansion unmolested */
if (str[0] == '\0')
return NULL;
goto out1;
len = 0;
idx = startp;
@ -7259,9 +7256,8 @@ subevalvar(char *p, char *str, int strloc, int subtype,
startp = (char *)stackblock() + startloc;
memmove(startp, (char *)stackblock() + workloc, len + 1);
//bb_error_msg("startp:'%s'", startp);
amount = expdest - (startp + len);
STADJUST(-amount, expdest);
return startp;
loc = startp + len;
goto out;
}
#endif /* BASH_PATTERN_SUBST */
@ -7282,10 +7278,17 @@ subevalvar(char *p, char *str, int strloc, int subtype,
loc = startp + (str - loc) - 1;
}
*loc = '\0';
amount = loc - expdest;
STADJUST(amount, expdest);
}
return loc;
} else
loc = str - 1;
out:
amount = loc - expdest;
STADJUST(amount, expdest);
out1:
/* Remove any recorded regions beyond start of variable */
removerecordregions(startloc);
return p;
}
/*
@ -7310,7 +7313,14 @@ varvalue(char *name, int varflags, int flags, int quoted)
ssize_t len = 0;
int sep;
int subtype = varflags & VSTYPE;
int discard = subtype == VSPLUS || subtype == VSLENGTH;
int discard = (subtype == VSPLUS || subtype == VSLENGTH) | (flags & EXP_DISCARD);
if (!subtype) {
if (discard)
return -1;
raise_error_syntax("bad substitution");
}
flags |= EXP_KEEPNUL;
flags &= discard ? ~QUOTES_ESC : ~0;
@ -7427,6 +7437,7 @@ varvalue(char *name, int varflags, int flags, int quoted)
if (discard)
STADJUST(-len, expdest);
return len;
}
@ -7439,18 +7450,15 @@ evalvar(char *p, int flag)
{
char varflags;
char subtype;
int quoted;
char *var;
int patloc;
int startloc;
ssize_t varlen;
int quoted;
varflags = (unsigned char) *p++;
subtype = varflags & VSTYPE;
if (!subtype)
raise_error_syntax("bad substitution");
quoted = flag & EXP_QUOTED;
var = p;
startloc = expdest - (char *)stackblock();
@ -7461,35 +7469,29 @@ evalvar(char *p, int flag)
if (varflags & VSNUL)
varlen--;
if (subtype == VSPLUS) {
switch (subtype) {
case VSPLUS:
varlen = -1 - varlen;
goto vsplus;
}
if (subtype == VSMINUS) {
vsplus:
if (varlen < 0) {
argstr(
p,
flag | EXP_TILDE | EXP_WORD
);
goto end;
}
/* fall through */
case 0:
case VSMINUS:
p = argstr(p, flag | EXP_TILDE | EXP_WORD);
if (varlen < 0)
return p;
goto record;
}
if (subtype == VSASSIGN || subtype == VSQUESTION) {
case VSASSIGN:
case VSQUESTION:
if (varlen >= 0)
goto record;
subevalvar(p, var, 0, subtype, startloc, varflags,
p = subevalvar(p, var, 0, startloc, varflags,
flag & ~QUOTES_ESC);
if (flag & EXP_DISCARD)
return p;
varflags &= ~VSNUL;
/*
* Remove any recorded regions beyond
* start of variable
*/
removerecordregions(startloc);
goto again;
}
@ -7497,20 +7499,15 @@ evalvar(char *p, int flag)
varunset(p, var, 0, 0);
if (subtype == VSLENGTH) {
p++;
if (flag & EXP_DISCARD)
return p;
cvtnum(varlen > 0 ? varlen : 0, flag);
goto record;
}
if (subtype == VSNORMAL) {
record:
if (quoted) {
quoted = *var == '@' && shellparam.nparam;
if (!quoted)
goto end;
}
recordregion(startloc, expdest - (char *)stackblock(), quoted);
goto end;
}
if (subtype == VSNORMAL)
goto record;
#if DEBUG
switch (subtype) {
@ -7531,46 +7528,28 @@ evalvar(char *p, int flag)
}
#endif
if (varlen >= 0) {
flag |= varlen < 0 ? EXP_DISCARD : 0;
if (!(flag & EXP_DISCARD)) {
/*
* Terminate the string and start recording the pattern
* right after it
*/
STPUTC('\0', expdest);
patloc = expdest - (char *)stackblock();
if (NULL == subevalvar(p, /* varname: */ NULL, patloc, subtype,
startloc, varflags, flag)) {
int amount = expdest - (
(char *)stackblock() + patloc - 1
);
STADJUST(-amount, expdest);
}
/* Remove any recorded regions beyond start of variable */
removerecordregions(startloc);
goto record;
}
varlen = 0;
patloc = expdest - (char *)stackblock();
p = subevalvar(p, NULL, patloc, startloc, varflags, flag);
end:
if (subtype != VSNORMAL) { /* skip to end of alternative */
int nesting = 1;
for (;;) {
unsigned char c = *p++;
if (c == CTLESC)
p++;
else if (c == CTLBACKQ) {
if (varlen >= 0)
argbackq = argbackq->next;
} else if (c == CTLVAR) {
if ((*p++ & VSTYPE) != VSNORMAL)
nesting++;
} else if (c == CTLENDVAR) {
if (--nesting == 0)
break;
}
}
record:
if (flag & EXP_DISCARD)
return p;
if (quoted) {
quoted = *var == '@' && shellparam.nparam;
if (!quoted)
return p;
}
recordregion(startloc, expdest - (char *)stackblock(), quoted);
return p;
}
@ -7983,13 +7962,11 @@ expandarg(union node *arg, struct arglist *arglist, int flag)
STARTSTACKSTR(expdest);
TRACE(("expandarg: argstr('%s',flags:%x)\n", arg->narg.text, flag));
argstr(arg->narg.text, flag);
p = _STPUTC('\0', expdest);
expdest = p - 1;
if (arglist == NULL) {
/* here document expanded */
goto out;
}
p = grabstackstr(p);
p = grabstackstr(expdest);
TRACE(("expandarg: p:'%s'\n", p));
exparg.lastp = &exparg.list;
/*
@ -8050,7 +8027,6 @@ casematch(union node *pattern, char *val)
argbackq = pattern->narg.backquote;
STARTSTACKSTR(expdest);
argstr(pattern->narg.text, EXP_TILDE | EXP_CASE);
STACKSTRNUL(expdest);
ifsfree();
result = patmatch(stackblock(), val);
popstackmark(&smark);