awk: remove custom pool allocator for temporary awk variables

It seems to be designed to reduce overhead of malloc's auxiliary data,
by allocating at least 64 variables as a block.
With "struct var" being about 20-32 bytes long (32/64 bits),
malloc overhead for one temporary indeed is high, ~33% more memory used
than needed.

function                                             old     new   delta
evaluate                                            3137    3145      +8
modprobe_main                                        798     803      +5
exec_builtin                                        1414    1419      +5
awk_printf                                           476     481      +5
as_regex                                             132     137      +5
EMSG_INTERNAL_ERROR                                   15       -     -15
nvfree                                               169     116     -53
nvalloc                                              145       -    -145
------------------------------------------------------------------------------
(add/remove: 0/2 grow/shrink: 5/1 up/down: 28/-213)          Total: -185 bytes

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
This commit is contained in:
Denys Vlasenko 2021-06-30 02:12:27 +02:00
parent f99800758e
commit 6cf6f1eaee

View File

@ -93,7 +93,6 @@ enum {
};
#define MAXVARFMT 240
#define MINNVBLOCK 64
/* variable flags */
#define VF_NUMBER 0x0001 /* 1 = primary type is number */
@ -120,8 +119,8 @@ typedef struct walker_list {
/* Variable */
typedef struct var_s {
unsigned type; /* flags */
double number;
char *string;
double number;
union {
int aidx; /* func arg idx (for compilation stage) */
struct xhash_s *array; /* array ptr */
@ -192,15 +191,6 @@ typedef struct node_s {
} a;
} node;
/* Block of temporary variables */
typedef struct nvblock_s {
int size;
var *pos;
struct nvblock_s *prev;
struct nvblock_s *next;
var nv[];
} nvblock;
typedef struct tsplitter_s {
node n;
regex_t re[2];
@ -537,7 +527,6 @@ struct globals {
int nfields;
int maxfields; /* used in fsrealloc() only */
var *Fields;
nvblock *g_cb;
char *g_pos;
char g_saved_ch;
smallint icase;
@ -605,7 +594,6 @@ struct globals2 {
#define nfields (G1.nfields )
#define maxfields (G1.maxfields )
#define Fields (G1.Fields )
#define g_cb (G1.g_cb )
#define g_pos (G1.g_pos )
#define g_saved_ch (G1.g_saved_ch )
#define icase (G1.icase )
@ -640,7 +628,6 @@ static int awk_exit(int) NORETURN;
/* ---- error handling ---- */
static const char EMSG_INTERNAL_ERROR[] ALIGN1 = "Internal error";
static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string";
static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token";
static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero";
@ -1050,77 +1037,6 @@ static int istrue(var *v)
return (v->string && v->string[0]);
}
/* temporary variables allocator. Last allocated should be first freed */
static var *nvalloc(int n)
{
nvblock *pb = NULL;
var *v, *r;
int size;
while (g_cb) {
pb = g_cb;
if ((g_cb->pos - g_cb->nv) + n <= g_cb->size)
break;
g_cb = g_cb->next;
}
if (!g_cb) {
size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
g_cb = xzalloc(sizeof(nvblock) + size * sizeof(var));
g_cb->size = size;
g_cb->pos = g_cb->nv;
g_cb->prev = pb;
/*g_cb->next = NULL; - xzalloc did it */
if (pb)
pb->next = g_cb;
}
v = r = g_cb->pos;
g_cb->pos += n;
while (v < g_cb->pos) {
v->type = 0;
v->string = NULL;
v++;
}
return r;
}
static void nvfree(var *v)
{
var *p;
if (v < g_cb->nv || v >= g_cb->pos)
syntax_error(EMSG_INTERNAL_ERROR);
for (p = v; p < g_cb->pos; p++) {
if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
clear_array(iamarray(p));
free(p->x.array->items);
free(p->x.array);
}
if (p->type & VF_WALK) {
walker_list *n;
walker_list *w = p->x.walker;
debug_printf_walker("nvfree: freeing walker @%p\n", &p->x.walker);
p->x.walker = NULL;
while (w) {
n = w->prev;
debug_printf_walker(" free(%p)\n", w);
free(w);
w = n;
}
}
clrvar(p);
}
g_cb->pos = v;
while (g_cb->prev && g_cb->pos == g_cb->nv) {
g_cb = g_cb->prev;
}
}
/* ------- awk program text parsing ------- */
/* Parse next token pointed by global pos, place results into global t_XYZ variables.
@ -1793,6 +1709,41 @@ static void parse_program(char *p)
/* -------- program execution part -------- */
/* temporary variables allocator */
static var *nvalloc(int sz)
{
return xzalloc(sz * sizeof(var));
}
static void nvfree(var *v, int sz)
{
var *p = v;
while (--sz >= 0) {
if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
clear_array(iamarray(p));
free(p->x.array->items);
free(p->x.array);
}
if (p->type & VF_WALK) {
walker_list *n;
walker_list *w = p->x.walker;
debug_printf_walker("nvfree: freeing walker @%p\n", &p->x.walker);
p->x.walker = NULL;
while (w) {
n = w->prev;
debug_printf_walker(" free(%p)\n", w);
free(w);
w = n;
}
}
clrvar(p);
p++;
}
free(v);
}
static node *mk_splitter(const char *s, tsplitter *spl)
{
regex_t *re, *ire;
@ -1814,9 +1765,9 @@ static node *mk_splitter(const char *s, tsplitter *spl)
return n;
}
/* use node as a regular expression. Supplied with node ptr and regex_t
/* Use node as a regular expression. Supplied with node ptr and regex_t
* storage space. Return ptr to regex (if result points to preg, it should
* be later regfree'd manually
* be later regfree'd manually).
*/
static regex_t *as_regex(node *op, regex_t *preg)
{
@ -1840,7 +1791,7 @@ static regex_t *as_regex(node *op, regex_t *preg)
cflags &= ~REG_EXTENDED;
xregcomp(preg, s, cflags);
}
nvfree(v);
nvfree(v, 1);
return preg;
}
@ -2292,6 +2243,8 @@ static char *awk_printf(node *n, int *len)
var *v, *arg;
v = nvalloc(1);
//TODO: above, to avoid allocating a single temporary var, take a pointer
//to a temporary that our caller (evaluate()) already has?
fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
i = 0;
@ -2333,7 +2286,7 @@ static char *awk_printf(node *n, int *len)
}
free(fmt);
nvfree(v);
nvfree(v, 1);
b = xrealloc(b, i + 1);
b[i] = '\0';
#if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
@ -2661,14 +2614,14 @@ static NOINLINE var *exec_builtin(node *op, var *res)
break;
}
nvfree(tv);
nvfree(tv, 4);
return res;
#undef tspl
}
/*
* Evaluate node - the heart of the program. Supplied with subtree
* and place where to store result. returns ptr to result.
* and place where to store result. Returns ptr to result.
*/
#define XC(n) ((n) >> 8)
@ -2953,33 +2906,38 @@ static var *evaluate(node *op, var *res)
break;
case XC( OC_FUNC ): {
var *vbeg, *v;
var *tv, *sv_fnargs;
const char *sv_progname;
int nargs1, i;
debug_printf_eval("FUNC\n");
/* The body might be empty, still has to eval the args */
if (!op->r.n->info && !op->r.f->body.first)
syntax_error(EMSG_UNDEF_FUNC);
vbeg = v = nvalloc(op->r.f->nargs + 1);
/* The body might be empty, still has to eval the args */
nargs1 = op->r.f->nargs + 1;
tv = nvalloc(nargs1);
i = 0;
while (op1) {
//TODO: explain why one iteration is done even for the case p->r.f->nargs == 0
var *arg = evaluate(nextarg(&op1), v1);
copyvar(v, arg);
v->type |= VF_CHILD;
v->x.parent = arg;
if (++v - vbeg >= op->r.f->nargs)
copyvar(&tv[i], arg);
tv[i].type |= VF_CHILD;
tv[i].x.parent = arg;
if (++i >= op->r.f->nargs)
break;
}
v = fnargs;
fnargs = vbeg;
sv_fnargs = fnargs;
sv_progname = g_progname;
fnargs = tv;
res = evaluate(op->r.f->body.first, res);
nvfree(fnargs, nargs1);
g_progname = sv_progname;
nvfree(fnargs);
fnargs = v;
fnargs = sv_fnargs;
break;
}
@ -3301,7 +3259,7 @@ static var *evaluate(node *op, var *res)
break;
} /* while (op) */
nvfree(v1);
nvfree(v1, 2);
debug_printf_eval("returning from %s(): %p\n", __func__, res);
return res;
#undef fnargs