xargs: simplify logic

Removed double-buffering in a linked list.

function                                             old     new   delta
store_param                                            -      56     +56
xargs_main                                           871     840     -31
process_stdin                                        458     396     -62
process0_stdin                                       267     143    -124
------------------------------------------------------------------------------
(add/remove: 1/0 grow/shrink: 0/3 up/down: 56/-217)          Total: -161 bytes

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
This commit is contained in:
Denys Vlasenko 2010-06-14 00:57:05 +02:00
parent aaa24e09f9
commit 7a4021deba

View File

@ -59,10 +59,6 @@
//config: are not special. //config: are not special.
#include "libbb.h" #include "libbb.h"
/* This is a NOEXEC applet. Be very careful! */
/* COMPAT: SYSV version defaults size (and has a max value of) to 470. /* COMPAT: SYSV version defaults size (and has a max value of) to 470.
We try to make it as large as possible. */ We try to make it as large as possible. */
#if !defined(ARG_MAX) && defined(_SC_ARG_MAX) #if !defined(ARG_MAX) && defined(_SC_ARG_MAX)
@ -72,6 +68,12 @@
# define ARG_MAX 470 # define ARG_MAX 470
#endif #endif
/* This is a NOEXEC applet. Be very careful! */
//#define dbg_msg(...) bb_error_msg(__VA_ARGS__)
#define dbg_msg(...) ((void)0)
#ifdef TEST #ifdef TEST
# ifndef ENABLE_FEATURE_XARGS_SUPPORT_CONFIRMATION # ifndef ENABLE_FEATURE_XARGS_SUPPORT_CONFIRMATION
@ -88,26 +90,36 @@
# endif # endif
#endif #endif
struct globals {
char **args;
const char *eof_str;
int idx;
} FIX_ALIASING;
#define G (*(struct globals*)&bb_common_bufsiz1)
#define INIT_G() do { } while (0)
/* /*
* This function has special algorithm. * This function has special algorithm.
* Don't use fork and include to main! * Don't use fork and include to main!
*/ */
static int xargs_exec(char **args) static int xargs_exec(void)
{ {
int status; int status;
status = spawn_and_wait(args); status = spawn_and_wait(G.args);
if (status < 0) { if (status < 0) {
bb_simple_perror_msg(args[0]); bb_simple_perror_msg(G.args[0]);
return errno == ENOENT ? 127 : 126; return errno == ENOENT ? 127 : 126;
} }
if (status == 255) { if (status == 255) {
bb_error_msg("%s: exited with status 255; aborting", args[0]); bb_error_msg("%s: exited with status 255; aborting", G.args[0]);
return 124; return 124;
} }
if (status >= 0x180) { if (status >= 0x180) {
bb_error_msg("%s: terminated by signal %d", bb_error_msg("%s: terminated by signal %d",
args[0], status - 0x180); G.args[0], status - 0x180);
return 125; return 125;
} }
if (status) if (status)
@ -115,51 +127,59 @@ static int xargs_exec(char **args)
return 0; return 0;
} }
typedef struct xlist_t {
struct xlist_t *link;
size_t length; /* length of xstr[] including NUL */
char xstr[1];
} xlist_t;
/* In POSIX/C locale isspace is only these chars: "\t\n\v\f\r" and space. /* In POSIX/C locale isspace is only these chars: "\t\n\v\f\r" and space.
* "\t\n\v\f\r" happen to have ASCII codes 9,10,11,12,13. * "\t\n\v\f\r" happen to have ASCII codes 9,10,11,12,13.
*/ */
#define ISSPACE(a) ({ unsigned char xargs__isspace = (a) - 9; xargs__isspace == (' ' - 9) || xargs__isspace <= (13 - 9); }) #define ISSPACE(a) ({ unsigned char xargs__isspace = (a) - 9; xargs__isspace == (' ' - 9) || xargs__isspace <= (13 - 9); })
static void store_param(char *s)
{
/* Grow by 256 elements at once */
if (!(G.idx & 0xff)) { /* G.idx == N*256 */
/* Enlarge, make G.args[(N+1)*256 - 1] last valid idx */
G.args = xrealloc(G.args, sizeof(G.args[0]) * (G.idx + 0x100));
}
G.args[G.idx++] = s;
}
/* process[0]_stdin:
* Read characters into buf[n_max_chars+1], and when parameter delimiter
* is seen, store the address of a new parameter to args[].
* If reading discovers that last chars do not form the complete
* parameter, the pointer to the first such "tail character" is returned.
* (buf has extra byte at the end to accomodate terminating NUL
* of "tail characters" string).
* Otherwise, the returned pointer points to NUL byte.
* The args[] vector is NULL-terminated.
* On entry, buf[] may contain some "seed chars" which are to become
* the beginning of the first parameter.
*/
#if ENABLE_FEATURE_XARGS_SUPPORT_QUOTES #if ENABLE_FEATURE_XARGS_SUPPORT_QUOTES
static xlist_t* process_stdin(xlist_t *list_arg, static char* FAST_FUNC process_stdin(int n_max_chars, int n_max_arg, char *buf)
const char *eof_str, size_t n_max_chars, char *buf)
{ {
#define NORM 0 #define NORM 0
#define QUOTE 1 #define QUOTE 1
#define BACKSLASH 2 #define BACKSLASH 2
#define SPACE 4 #define SPACE 4
char *s = NULL; /* start of the word */ char *s; /* start of the word */
char *p = NULL; /* pointer to end of the word */ char *p; /* pointer to end of the word */
char q = '\0'; /* quote char */ char q = '\0'; /* quote char */
char state = NORM; char state = NORM;
char eof_str_detected = 0;
size_t line_l = 0; /* size of loaded args */
xlist_t *cur;
xlist_t *prev;
prev = cur = list_arg; s = buf;
while (cur) { p = s + strlen(buf);
prev = cur;
line_l += cur->length; /* "goto ret" is used instead of "break" to make control flow
cur = cur->link; * more obvious: */
}
while (1) { while (1) {
int c = getchar(); int c = getchar();
if (c == EOF) { if (c == EOF) {
if (s) if (p != s)
goto unexpected_eof; goto close_word;
break; goto ret;
} }
if (eof_str_detected) /* skip till EOF */
continue;
if (state == BACKSLASH) { if (state == BACKSLASH) {
state = NORM; state = NORM;
goto set; goto set;
@ -171,15 +191,13 @@ static xlist_t* process_stdin(xlist_t *list_arg,
state = NORM; state = NORM;
} else { /* if (state == NORM) */ } else { /* if (state == NORM) */
if (ISSPACE(c)) { if (ISSPACE(c)) {
if (s) { if (p != s) {
unexpected_eof: close_word:
state = SPACE; state = SPACE;
c = '\0'; c = '\0';
goto set; goto set;
} }
} else { } else {
if (s == NULL)
s = p = buf;
if (c == '\\') { if (c == '\\') {
state = BACKSLASH; state = BACKSLASH;
} else if (c == '\'' || c == '"') { } else if (c == '\'' || c == '"') {
@ -187,8 +205,6 @@ static xlist_t* process_stdin(xlist_t *list_arg,
state = QUOTE; state = QUOTE;
} else { } else {
set: set:
if ((size_t)(p - buf) >= n_max_chars)
bb_error_msg_and_die("argument line too long");
*p++ = c; *p++ = c;
} }
} }
@ -199,149 +215,128 @@ static xlist_t* process_stdin(xlist_t *list_arg,
q == '\'' ? "single" : "double"); q == '\'' ? "single" : "double");
} }
/* A full word is loaded */ /* A full word is loaded */
if (eof_str) { if (G.eof_str) {
eof_str_detected = (strcmp(s, eof_str) == 0); if (strcmp(s, G.eof_str) == 0) {
while (getchar() != EOF)
continue;
p = s;
goto ret;
} }
if (!eof_str_detected) {
size_t length = (p - buf);
/* Dont xzalloc - it can be quite big */
cur = xmalloc(offsetof(xlist_t, xstr) + length);
cur->link = NULL;
cur->length = length;
memcpy(cur->xstr, s, length);
if (prev == NULL) {
list_arg = cur;
} else {
prev->link = cur;
} }
prev = cur; n_max_chars -= (p - s);
line_l += length; /* if (n_max_chars < 0) impossible */
if (line_l >= n_max_chars) /* limit memory usage */ store_param(s);
break; dbg_msg("args[]:'%s'", s);
s = p;
n_max_arg--;
if (n_max_arg == 0 || n_max_chars == 0) {
goto ret;
} }
s = NULL;
state = NORM; state = NORM;
} else /* state != SPACE */
if (p - s >= n_max_chars) {
dbg_msg("s:'%s' p-s:%d n_max_chars:%d", s, (int)(p-s), n_max_chars);
goto ret;
} }
} }
return list_arg; ret:
*p = '\0';
store_param(NULL);
dbg_msg("return:'%s'", s);
return s;
} }
#else #else
/* The variant does not support single quotes, double quotes or backslash */ /* The variant does not support single quotes, double quotes or backslash */
static xlist_t* process_stdin(xlist_t *list_arg, static char* FAST_FUNC process_stdin(int n_max_chars, int n_max_arg, char *buf)
const char *eof_str, size_t n_max_chars, char *buf)
{ {
char eof_str_detected = 0; char *s; /* start of the word */
char *s = NULL; /* start of the word */ char *p; /* pointer to end of the word */
char *p = NULL; /* pointer to end of the word */
size_t line_l = 0; /* size of loaded args */
xlist_t *cur;
xlist_t *prev;
prev = cur = list_arg; s = buf;
while (cur) { p = s + strlen(buf);
prev = cur;
line_l += cur->length;
cur = cur->link;
}
while (1) { while (1) {
int c = getchar(); int c = getchar();
if (c == EOF) { if (c == EOF) {
if (s == NULL) if (p == s)
break; goto ret;
}
if (eof_str_detected) { /* skip till EOF */
continue;
} }
if (c == EOF || ISSPACE(c)) { if (c == EOF || ISSPACE(c)) {
if (s == NULL) if (p == s)
continue; continue;
c = EOF; c = EOF;
} }
if (s == NULL)
s = p = buf;
if ((size_t)(p - buf) >= n_max_chars)
bb_error_msg_and_die("argument line too long");
*p++ = (c == EOF ? '\0' : c); *p++ = (c == EOF ? '\0' : c);
if (c == EOF) { /* word's delimiter or EOF detected */ if (c == EOF) { /* word's delimiter or EOF detected */
/* A full word is loaded */ /* A full word is loaded */
if (eof_str) { if (G.eof_str) {
eof_str_detected = (strcmp(s, eof_str) == 0); if (strcmp(s, G.eof_str) == 0) {
} while (getchar() != EOF)
if (!eof_str_detected) { continue;
size_t length = (p - buf); p = s;
/* Dont xzalloc - it can be quite big */ goto ret;
cur = xmalloc(offsetof(xlist_t, xstr) + length);
cur->link = NULL;
cur->length = length;
memcpy(cur->xstr, s, length);
if (prev == NULL) {
list_arg = cur;
} else {
prev->link = cur;
}
prev = cur;
line_l += length;
if (line_l >= n_max_chars) /* limit memory usage */
break;
}
s = NULL;
} }
} }
return list_arg; n_max_chars -= (p - s);
/* if (n_max_chars < 0) impossible */
store_param(s);
dbg_msg("args[]:'%s'", s);
s = p;
n_max_arg--;
if (n_max_arg == 0 || n_max_chars == 0) {
goto ret;
}
} else /* c != EOF */
if (p - s >= n_max_chars) {
goto ret;
}
}
ret:
*p = '\0';
store_param(NULL);
dbg_msg("return:'%s'", s);
return s;
} }
#endif /* FEATURE_XARGS_SUPPORT_QUOTES */ #endif /* FEATURE_XARGS_SUPPORT_QUOTES */
#if ENABLE_FEATURE_XARGS_SUPPORT_ZERO_TERM #if ENABLE_FEATURE_XARGS_SUPPORT_ZERO_TERM
static xlist_t* process0_stdin(xlist_t *list_arg, static char* FAST_FUNC process0_stdin(int n_max_chars, int n_max_arg, char *buf)
const char *eof_str UNUSED_PARAM, size_t n_max_chars, char *buf)
{ {
char *s = NULL; /* start of the word */ char *s; /* start of the word */
char *p = NULL; /* pointer to end of the word */ char *p; /* pointer to end of the word */
size_t line_l = 0; /* size of loaded args */
xlist_t *cur;
xlist_t *prev;
prev = cur = list_arg; s = buf;
while (cur) { p = s + strlen(buf);
prev = cur;
line_l += cur->length;
cur = cur->link;
}
while (1) { while (1) {
int c = getchar(); int c = getchar();
if (c == EOF) { if (c == EOF) {
if (s == NULL) if (p == s)
break; goto ret;
c = '\0'; c = '\0';
} }
if (s == NULL)
s = p = buf;
if ((size_t)(p - buf) >= n_max_chars)
bb_error_msg_and_die("argument line too long");
*p++ = c; *p++ = c;
if (c == '\0') { /* word's delimiter or EOF detected */ if (c == '\0') { /* word's delimiter or EOF detected */
/* A full word is loaded */ /* A full word is loaded */
size_t length = (p - buf); n_max_chars -= (p - s);
/* Dont xzalloc - it can be quite big */ /* if (n_max_chars < 0) impossible */
cur = xmalloc(offsetof(xlist_t, xstr) + length); store_param(s);
cur->link = NULL; dbg_msg("args[]:'%s'", s);
cur->length = length; n_max_arg--;
memcpy(cur->xstr, s, length); s = p;
if (prev == NULL) { if (n_max_arg == 0 || n_max_chars == 0) {
list_arg = cur; goto ret;
} else {
prev->link = cur;
} }
prev = cur; } else /* c != '\0' */
line_l += length; if (p - s >= n_max_chars) {
if (line_l >= n_max_chars) /* limit memory usage */ goto ret;
break;
s = NULL;
} }
} }
return list_arg; ret:
*p = '\0';
store_param(NULL);
dbg_msg("return:'%s'", s);
return s;
} }
#endif /* FEATURE_XARGS_SUPPORT_ZERO_TERM */ #endif /* FEATURE_XARGS_SUPPORT_ZERO_TERM */
@ -397,28 +392,30 @@ enum {
int xargs_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; int xargs_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
int xargs_main(int argc, char **argv) int xargs_main(int argc, char **argv)
{ {
xlist_t *list = NULL; int i;
int child_error = 0; int child_error = 0;
char *max_args; char *max_args;
char *max_chars; char *max_chars;
char *buf; char *buf;
int n_max_arg;
const char *eof_str = NULL;
unsigned opt; unsigned opt;
size_t n_max_chars; int n_max_chars;
int n_max_arg;
#if ENABLE_FEATURE_XARGS_SUPPORT_ZERO_TERM #if ENABLE_FEATURE_XARGS_SUPPORT_ZERO_TERM
xlist_t* (*read_args)(xlist_t*, const char*, size_t, char*) = process_stdin; char* FAST_FUNC (*read_args)(int, int, char*) = process_stdin;
#else #else
#define read_args process_stdin #define read_args process_stdin
#endif #endif
opt = getopt32(argv, OPTION_STR, &max_args, &max_chars, &eof_str, &eof_str); INIT_G();
G.eof_str = NULL;
opt = getopt32(argv, OPTION_STR, &max_args, &max_chars, &G.eof_str, &G.eof_str);
/* -E ""? You may wonder why not just omit -E? /* -E ""? You may wonder why not just omit -E?
* This is used for portability: * This is used for portability:
* old xargs was using "_" as default for -E / -e */ * old xargs was using "_" as default for -E / -e */
if ((opt & OPT_EOF_STRING1) && eof_str[0] == '\0') if ((opt & OPT_EOF_STRING1) && G.eof_str[0] == '\0')
eof_str = NULL; G.eof_str = NULL;
if (opt & OPT_ZEROTERM) if (opt & OPT_ZEROTERM)
IF_FEATURE_XARGS_SUPPORT_ZERO_TERM(read_args = process0_stdin); IF_FEATURE_XARGS_SUPPORT_ZERO_TERM(read_args = process0_stdin);
@ -451,93 +448,81 @@ int xargs_main(int argc, char **argv)
n_max_chars = 20 * 1024; n_max_chars = 20 * 1024;
if (opt & OPT_UPTO_SIZE) { if (opt & OPT_UPTO_SIZE) {
int i;
size_t n_chars = 0; size_t n_chars = 0;
n_max_chars = xatoul_range(max_chars, 1, INT_MAX); n_max_chars = xatou_range(max_chars, 1, INT_MAX);
for (i = 0; argv[i]; i++) { for (i = 0; argv[i]; i++) {
n_chars += strlen(argv[i]) + 1; n_chars += strlen(argv[i]) + 1;
} }
n_max_chars -= n_chars; n_max_chars -= n_chars;
if ((ssize_t)n_max_chars <= 0) { if (n_max_chars <= 0) {
bb_error_msg_and_die("can't fit single argument within argument list size limit"); bb_error_msg_and_die("can't fit single argument within argument list size limit");
} }
} }
buf = xmalloc(n_max_chars); buf = xzalloc(n_max_chars + 1);
if (opt & OPT_UPTO_NUMBER) { if (opt & OPT_UPTO_NUMBER) {
n_max_arg = xatoul_range(max_args, 1, INT_MAX); n_max_arg = xatou_range(max_args, 1, INT_MAX);
if (n_max_arg < n_max_chars) if (n_max_arg < n_max_chars)
goto skip; goto skip;
} }
n_max_arg = n_max_chars; n_max_arg = n_max_chars;
skip: skip:
while ((list = read_args(list, eof_str, n_max_chars, buf)) != NULL /* Allocate pointers for execvp */
|| !(opt & OPT_NO_EMPTY) /* We can statically allocate (argc + n_max_arg + 1) elements
) { * and do not bother with resizing args[], but on 64-bit machines
char **args; * this results in args[] vector which is ~8 times bigger
xlist_t *cur; * than n_max_chars! That is, with n_max_chars == 20k,
int i, n; * args[] will take 160k (!), which will most likely be
size_t n_chars; * almost entirely unused.
*/
/* See store_param() for matching 256-step growth logic */
G.args = xmalloc(sizeof(G.args[0]) * ((argc + 0xff) & ~0xff));
opt |= OPT_NO_EMPTY; /* Store the command to be executed, part 1 */
for (i = 0; argv[i]; i++)
G.args[i] = argv[i];
/* take args from list, not exceeding arg and char limits */ while (1) {
n_chars = 0; char *rem;
n = 0;
for (cur = list; cur; cur = cur->link) { G.idx = argc;
n_chars += cur->length; rem = read_args(n_max_chars, n_max_arg, buf);
if (n_chars > n_max_chars || n >= n_max_arg) {
if (opt & OPT_TERMINATE) if (!G.args[argc]) {
bb_error_msg_and_die("argument list too long"); if (*rem != '\0')
bb_error_msg_and_die("argument line too long");
if (opt & OPT_NO_EMPTY)
break; break;
} }
n++; opt |= OPT_NO_EMPTY;
}
/* allocate pointers for execvp */
args = xzalloc(sizeof(args[0]) * (argc + n + 1));
/* store the command to be executed
* (taken from the command line) */
for (i = 0; argv[i]; i++)
args[i] = argv[i];
/* (taken from stdin) */
for (cur = list; n; cur = cur->link) {
args[i++] = cur->xstr;
n--;
}
if (opt & (OPT_INTERACTIVE | OPT_VERBOSE)) { if (opt & (OPT_INTERACTIVE | OPT_VERBOSE)) {
for (i = 0; args[i]; i++) { for (i = 0; G.args[i]; i++) {
if (i) if (i)
bb_putchar_stderr(' '); bb_putchar_stderr(' ');
fputs(args[i], stderr); fputs(G.args[i], stderr);
} }
if (!(opt & OPT_INTERACTIVE)) if (!(opt & OPT_INTERACTIVE))
bb_putchar_stderr('\n'); bb_putchar_stderr('\n');
} }
if (!(opt & OPT_INTERACTIVE) || xargs_ask_confirmation()) { if (!(opt & OPT_INTERACTIVE) || xargs_ask_confirmation()) {
child_error = xargs_exec(args); child_error = xargs_exec();
} }
/* remove list elements which we consumed */
for (i = argc; args[i]; i++) {
cur = list;
list = list->link;
free(cur);
}
free(args);
if (child_error > 0 && child_error != 123) { if (child_error > 0 && child_error != 123) {
break; break;
} }
overlapping_strcpy(buf, rem);
} /* while */ } /* while */
if (ENABLE_FEATURE_CLEAN_UP) if (ENABLE_FEATURE_CLEAN_UP) {
free(G.args);
free(buf); free(buf);
}
return child_error; return child_error;
} }