procps/proc/stat.c
Jim Warner 253ac7f709 library: improve <STAT> cpus offline/online management
When those standardized 'derived' TIC enumerators were
introduced, a problem with potential DELTA distortions
was also introduced when toggling cpus offline/online.

It has always been true that the 1st (summary) line in
/proc/stat will experience a decrease in total tics if
a new cpu is brought online. Such decreases are mostly
due to reductions in 'idle' and 'iowait' tics. Exactly
why such a counterintuitive phenomenon should occur is
a mystery, but this has been acknowledged in proc.txt.

A separate potential distortion arises with individual
cpus. And, here it extends to both bringing processors
online plus taking them offline too. When that happens
the order of the cpus array tracking is upset, placing
the 'new' values in some other processor's array slot.
But even if we were to occupy the same slot, the issue
regarding reductions in 'idle' & 'iowait' still apply.

In all cases, when a DELTA field was found to be minus
it was forced to zero via the 'TICsetH' macro. However
the 'derived' calculations are subject to new forms of
distortion with their own DELTA values. For example we
could find DELTA_SUM_USER + DELTA_SUM_SYSTEM exceeding
DELTA_SUM_TOTAL, an illogical/inappropriate condition.

So this commit moves former protections for individual
cpus to the stat_derive_unique() function and modifies
it to also extend protections to the 'derived' values.
In the process we now protect the cpu 'summary' counts
which were unfortunately previously overlooked (oops).

Reference(s):
. 'derived' types introduced
commit 2c86c4984a
2017-03-29 22:10:56 +11:00

1283 lines
44 KiB
C

/*
* libprocps - Library to read proc filesystem
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef NUMA_DISABLE
#include <dlfcn.h>
#endif
#include <errno.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <unistd.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <proc/sysinfo.h>
#include <proc/procps-private.h>
#include <proc/stat.h>
#define STAT_FILE "/proc/stat"
#define BUFFER_INCR 4096 // amount i/p buffer allocations grow
#define STACKS_INCR 32 // amount reap stack allocations grow
#define NEWOLD_INCR 32 // amount jiffs hist allocations grow
/* ------------------------------------------------------------------------- +
a strictly development #define, existing specifically for the top program |
( and it has no affect if ./configure --disable-numa has been specified ) | */
//#define PRETEND_NUMA // pretend there are 3 'discontiguous' numa nodes |
// ------------------------------------------------------------------------- +
/* ------------------------------------------------------------------------- +
because 'reap' would be forced to duplicate the global SYS stuff in every |
TIC type results stack, the following #define can be used to enforce that |
only STAT_noop and STAT_extra plus all the STAT_TIC items will be allowed | */
//#define ENFORCE_LOGICAL // ensure only logical items are accepted by reap |
// ------------------------------------------------------------------------- +
struct stat_jifs {
unsigned long long user, nice, system, idle, iowait, irq, sirq, stolen, guest, gnice;
unsigned long long xtot, xbsy, xidl, xusr, xsys;
};
struct stat_data {
unsigned long intr;
unsigned long ctxt;
unsigned long btime;
unsigned long procs_created;
unsigned long procs_blocked;
unsigned long procs_running;
};
struct hist_sys {
struct stat_data new;
struct stat_data old;
};
struct hist_tic {
int id;
int numa_node;
int count;
struct stat_jifs new;
struct stat_jifs old;
};
struct stacks_extent {
int ext_numstacks;
struct stacks_extent *next;
struct stat_stack **stacks;
};
struct item_support {
int num; // includes 'logical_end' delimiter
enum stat_item *enums; // includes 'logical_end' delimiter
};
struct ext_support {
struct item_support *items; // how these stacks are configured
struct stacks_extent *extents; // anchor for these extents
int dirty_stacks;
};
struct tic_support {
int n_alloc; // number of below structs allocated
int n_inuse; // number of below structs occupied
struct hist_tic *tics; // actual new/old jiffies
};
struct reap_support {
int total; // independently obtained # of cpus/nodes
struct ext_support fetch; // extents plus items details
struct tic_support hist; // cpu and node jiffies management
int n_alloc; // last known anchor pointers allocation
struct stat_stack **anchor; // reapable stacks (consolidated extents)
int n_alloc_save; // last known results.stacks allocation
struct stat_reap result; // summary + stacks returned to caller
};
struct stat_info {
int refcount;
FILE *stat_fp;
char *stat_buf; // grows to accommodate all /proc/stat
int stat_buf_size; // current size for the above stat_buf
struct hist_sys sys_hist; // SYS type management
struct hist_tic cpu_hist; // TIC type management for cpu summary
struct reap_support cpus; // TIC type management for real cpus
struct reap_support nodes; // TIC type management for numa nodes
struct ext_support cpu_summary; // supports /proc/stat line #1 results
struct ext_support select; // support for 'procps_stat_select()'
struct stat_reaped results; // for return to caller after a reap
#ifndef NUMA_DISABLE
void *libnuma_handle; // if dlopen() for libnuma succeessful
int (*our_max_node)(void); // a libnuma function call via dlsym()
int (*our_node_of_cpu)(int); // a libnuma function call via dlsym()
#endif
struct stat_result get_this; // for return to caller after a get
struct item_support reap_items; // items used for reap (shared among 3)
struct item_support select_items; // items unique to select
};
// ___ Results 'Set' Support ||||||||||||||||||||||||||||||||||||||||||||||||||
#define setNAME(e) set_stat_ ## e
#define setDECL(e) static void setNAME(e) \
(struct stat_result *R, struct hist_sys *S, struct hist_tic *T)
// regular assignment
#define TIC_set(e,t,x) setDECL(e) { \
(void)S; R->result. t = T->new . x; }
#define SYS_set(e,t,x) setDECL(e) { \
(void)T; R->result. t = S->new . x; }
// delta assignment
#define TICsetH(e,t,x) setDECL(e) { \
(void)S; R->result. t = ( T->new . x - T->old. x ); \
if (R->result. t < 0) R->result. t = 0; }
#define SYSsetH(e,t,x) setDECL(e) { \
(void)T; R->result. t = ( S->new . x - S->old. x ); \
if (R->result. t < 0) R->result. t = 0; }
setDECL(noop) { (void)R; (void)S; (void)T; }
setDECL(extra) { (void)R; (void)S; (void)T; }
setDECL(TIC_ID) { (void)S; R->result.s_int = T->id; }
setDECL(TIC_NUMA_NODE) { (void)S; R->result.s_int = T->numa_node; }
setDECL(TIC_NUM_CONTRIBUTORS) { (void)S; R->result.s_int = T->count; }
TIC_set(TIC_USER, ull_int, user)
TIC_set(TIC_NICE, ull_int, nice)
TIC_set(TIC_SYSTEM, ull_int, system)
TIC_set(TIC_IDLE, ull_int, idle)
TIC_set(TIC_IOWAIT, ull_int, iowait)
TIC_set(TIC_IRQ, ull_int, irq)
TIC_set(TIC_SOFTIRQ, ull_int, sirq)
TIC_set(TIC_STOLEN, ull_int, stolen)
TIC_set(TIC_GUEST, ull_int, guest)
TIC_set(TIC_GUEST_NICE, ull_int, gnice)
TIC_set(TIC_SUM_TOTAL, ull_int, xtot)
TIC_set(TIC_SUM_BUSY, ull_int, xbsy)
TIC_set(TIC_SUM_IDLE, ull_int, xidl)
TIC_set(TIC_SUM_USER, ull_int, xusr)
TIC_set(TIC_SUM_SYSTEM, ull_int, xsys)
TICsetH(TIC_DELTA_USER, sl_int, user)
TICsetH(TIC_DELTA_NICE, sl_int, nice)
TICsetH(TIC_DELTA_SYSTEM, sl_int, system)
TICsetH(TIC_DELTA_IDLE, sl_int, idle)
TICsetH(TIC_DELTA_IOWAIT, sl_int, iowait)
TICsetH(TIC_DELTA_IRQ, sl_int, irq)
TICsetH(TIC_DELTA_SOFTIRQ, sl_int, sirq)
TICsetH(TIC_DELTA_STOLEN, sl_int, stolen)
TICsetH(TIC_DELTA_GUEST, sl_int, guest)
TICsetH(TIC_DELTA_GUEST_NICE, sl_int, gnice)
TICsetH(TIC_DELTA_SUM_TOTAL, sl_int, xtot)
TICsetH(TIC_DELTA_SUM_BUSY, sl_int, xbsy)
TICsetH(TIC_DELTA_SUM_IDLE, sl_int, xidl)
TICsetH(TIC_DELTA_SUM_USER, sl_int, xusr)
TICsetH(TIC_DELTA_SUM_SYSTEM, sl_int, xsys)
SYS_set(SYS_CTX_SWITCHES, ul_int, ctxt)
SYS_set(SYS_INTERRUPTS, ul_int, intr)
SYS_set(SYS_PROC_BLOCKED, ul_int, procs_blocked)
SYS_set(SYS_PROC_CREATED, ul_int, procs_created)
SYS_set(SYS_PROC_RUNNING, ul_int, procs_running)
SYS_set(SYS_TIME_OF_BOOT, ul_int, btime)
SYSsetH(SYS_DELTA_CTX_SWITCHES, s_int, ctxt)
SYSsetH(SYS_DELTA_INTERRUPTS, s_int, intr)
setDECL(SYS_DELTA_PROC_BLOCKED) { (void)T; R->result.s_int = S->new.procs_blocked - S->old.procs_blocked; }
SYSsetH(SYS_DELTA_PROC_CREATED, s_int, procs_created)
setDECL(SYS_DELTA_PROC_RUNNING) { (void)T; R->result.s_int = S->new.procs_running - S->old.procs_running; }
#undef setDECL
#undef TIC_set
#undef SYS_set
#undef TICsetH
#undef SYSsetH
// ___ Sorting Support ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
struct sort_parms {
int offset;
enum stat_sort_order order;
};
#define srtNAME(t) sort_stat_ ## t
#define srtDECL(t) static int srtNAME(t) \
(const struct stat_stack **A, const struct stat_stack **B, struct sort_parms *P)
srtDECL(s_int) {
const struct stat_result *a = (*A)->head + P->offset; \
const struct stat_result *b = (*B)->head + P->offset; \
return P->order * (a->result.s_int - b->result.s_int);
}
srtDECL(sl_int) {
const struct stat_result *a = (*A)->head + P->offset; \
const struct stat_result *b = (*B)->head + P->offset; \
return P->order * (a->result.sl_int - b->result.sl_int);
}
srtDECL(ul_int) {
const struct stat_result *a = (*A)->head + P->offset; \
const struct stat_result *b = (*B)->head + P->offset; \
if ( a->result.ul_int > b->result.ul_int ) return P->order > 0 ? 1 : -1; \
if ( a->result.ul_int < b->result.ul_int ) return P->order > 0 ? -1 : 1; \
return 0;
}
srtDECL(ull_int) {
const struct stat_result *a = (*A)->head + P->offset; \
const struct stat_result *b = (*B)->head + P->offset; \
if ( a->result.ull_int > b->result.ull_int ) return P->order > 0 ? 1 : -1; \
if ( a->result.ull_int < b->result.ull_int ) return P->order > 0 ? -1 : 1; \
return 0;
}
srtDECL(noop) { \
(void)A; (void)B; (void)P; \
return 0;
}
#undef srtDECL
// ___ Controlling Table ||||||||||||||||||||||||||||||||||||||||||||||||||||||
typedef void (*SET_t)(struct stat_result *, struct hist_sys *, struct hist_tic *);
#define RS(e) (SET_t)setNAME(e)
typedef int (*QSR_t)(const void *, const void *, void *);
#define QS(t) (QSR_t)srtNAME(t)
#define TS(t) STRINGIFY(t)
#define TS_noop ""
/*
* Need it be said?
* This table must be kept in the exact same order as
* those 'enum stat_item' guys ! */
static struct {
SET_t setsfunc; // the actual result setting routine
QSR_t sortfunc; // sort cmp func for a specific type
char *type2str; // the result type as a string value
} Item_table[] = {
/* setsfunc sortfunc type2str
--------------------------- ------------ ----------- */
{ RS(noop), QS(noop), TS_noop },
{ RS(extra), QS(ull_int), TS_noop },
{ RS(TIC_ID), QS(s_int), TS(s_int) },
{ RS(TIC_NUMA_NODE), QS(s_int), TS(s_int) },
{ RS(TIC_NUM_CONTRIBUTORS), QS(s_int), TS(s_int) },
{ RS(TIC_USER), QS(ull_int), TS(ull_int) },
{ RS(TIC_NICE), QS(ull_int), TS(ull_int) },
{ RS(TIC_SYSTEM), QS(ull_int), TS(ull_int) },
{ RS(TIC_IDLE), QS(ull_int), TS(ull_int) },
{ RS(TIC_IOWAIT), QS(ull_int), TS(ull_int) },
{ RS(TIC_IRQ), QS(ull_int), TS(ull_int) },
{ RS(TIC_SOFTIRQ), QS(ull_int), TS(ull_int) },
{ RS(TIC_STOLEN), QS(ull_int), TS(ull_int) },
{ RS(TIC_GUEST), QS(ull_int), TS(ull_int) },
{ RS(TIC_GUEST_NICE), QS(ull_int), TS(ull_int) },
{ RS(TIC_SUM_TOTAL), QS(ull_int), TS(ull_int) },
{ RS(TIC_SUM_BUSY), QS(ull_int), TS(ull_int) },
{ RS(TIC_SUM_IDLE), QS(ull_int), TS(ull_int) },
{ RS(TIC_SUM_USER), QS(ull_int), TS(ull_int) },
{ RS(TIC_SUM_SYSTEM), QS(ull_int), TS(ull_int) },
{ RS(TIC_DELTA_USER), QS(sl_int), TS(sl_int) },
{ RS(TIC_DELTA_NICE), QS(sl_int), TS(sl_int) },
{ RS(TIC_DELTA_SYSTEM), QS(sl_int), TS(sl_int) },
{ RS(TIC_DELTA_IDLE), QS(sl_int), TS(sl_int) },
{ RS(TIC_DELTA_IOWAIT), QS(sl_int), TS(sl_int) },
{ RS(TIC_DELTA_IRQ), QS(sl_int), TS(sl_int) },
{ RS(TIC_DELTA_SOFTIRQ), QS(sl_int), TS(sl_int) },
{ RS(TIC_DELTA_STOLEN), QS(sl_int), TS(sl_int) },
{ RS(TIC_DELTA_GUEST), QS(sl_int), TS(sl_int) },
{ RS(TIC_DELTA_GUEST_NICE), QS(sl_int), TS(sl_int) },
{ RS(TIC_DELTA_SUM_TOTAL), QS(sl_int), TS(sl_int) },
{ RS(TIC_DELTA_SUM_BUSY), QS(sl_int), TS(sl_int) },
{ RS(TIC_DELTA_SUM_IDLE), QS(sl_int), TS(sl_int) },
{ RS(TIC_DELTA_SUM_USER), QS(sl_int), TS(sl_int) },
{ RS(TIC_DELTA_SUM_SYSTEM), QS(sl_int), TS(sl_int) },
{ RS(SYS_CTX_SWITCHES), QS(ul_int), TS(ul_int) },
{ RS(SYS_INTERRUPTS), QS(ul_int), TS(ul_int) },
{ RS(SYS_PROC_BLOCKED), QS(ul_int), TS(ul_int) },
{ RS(SYS_PROC_CREATED), QS(ul_int), TS(ul_int) },
{ RS(SYS_PROC_RUNNING), QS(ul_int), TS(ul_int) },
{ RS(SYS_TIME_OF_BOOT), QS(ul_int), TS(ul_int) },
{ RS(SYS_DELTA_CTX_SWITCHES), QS(s_int), TS(s_int) },
{ RS(SYS_DELTA_INTERRUPTS), QS(s_int), TS(s_int) },
{ RS(SYS_DELTA_PROC_BLOCKED), QS(s_int), TS(s_int) },
{ RS(SYS_DELTA_PROC_CREATED), QS(s_int), TS(s_int) },
{ RS(SYS_DELTA_PROC_RUNNING), QS(s_int), TS(s_int) },
// dummy entry corresponding to STAT_logical_end ...
{ NULL, NULL, NULL }
};
/* please note,
* 1st enum MUST be kept in sync with highest TIC type
* 2nd enum MUST be 1 greater than the highest value of any enum */
#ifdef ENFORCE_LOGICAL
enum stat_item STAT_TIC_highest = STAT_TIC_DELTA_GUEST_NICE;
#endif
enum stat_item STAT_logical_end = STAT_SYS_DELTA_PROC_RUNNING + 1;
#undef setNAME
#undef srtNAME
#undef RS
#undef QS
// ___ Private Functions ||||||||||||||||||||||||||||||||||||||||||||||||||||||
#ifndef NUMA_DISABLE
#ifdef PRETEND_NUMA
static int fake_max_node (void) { return 3; }
static int fake_node_of_cpu (int n) { return (1 == (n % 4)) ? 0 : (n % 4); }
#endif
#endif
static inline void stat_assign_results (
struct stat_stack *stack,
struct hist_sys *sys_hist,
struct hist_tic *tic_hist)
{
struct stat_result *this = stack->head;
for (;;) {
enum stat_item item = this->item;
if (item >= STAT_logical_end)
break;
Item_table[item].setsfunc(this, sys_hist, tic_hist);
++this;
}
return;
} // end: stat_assign_results
static inline void stat_cleanup_stack (
struct stat_result *this)
{
for (;;) {
if (this->item >= STAT_logical_end)
break;
if (this->item > STAT_noop)
this->result.ull_int = 0;
++this;
}
} // end: stat_cleanup_stack
static inline void stat_cleanup_stacks_all (
struct ext_support *this)
{
struct stacks_extent *ext = this->extents;
int i;
while (ext) {
for (i = 0; ext->stacks[i]; i++)
stat_cleanup_stack(ext->stacks[i]->head);
ext = ext->next;
};
this->dirty_stacks = 0;
} // end: stat_cleanup_stacks_all
static inline int stat_derive_unique (
struct hist_tic *this)
{
/* note: we exclude guest tics from xtot since ...
'user' already includes 'guest'
'nice' already includes 'gnice'
( see: ./kernel/sched/cputime.c, account_guest_time ) */
this->new.xtot
= this->new.user
+ this->new.nice
+ this->new.system
+ this->new.idle
+ this->new.iowait
+ this->new.irq
+ this->new.sirq
+ this->new.stolen;
this->new.xusr = this->new.user + this->new.nice;
/* this stolen guy is one i'm not sure of yet, but it's documented as:
"the time spent in other operating systems
when running in a virtualized environment"
so it would seem to apply to an 'involuntary wait' for a guest OS */
this->new.xidl = this->new.idle + this->new.iowait + this->new.stolen;
this->new.xbsy = this->new.xtot - this->new.xidl;
this->new.xsys = this->new.xbsy - this->new.xusr;
// don't distort deltas when cpus are taken offline or brought online
if (this->new.xtot < this->old.xtot
|| (this->new.xusr < this->old.xusr)
|| (this->new.xidl < this->old.xidl)
|| (this->new.xbsy < this->old.xbsy)
|| (this->new.xsys < this->old.xsys))
memcpy(&this->old, &this->new, sizeof(struct stat_jifs));
} // end: stat_derive_unique
static void stat_extents_free_all (
struct ext_support *this)
{
while (this->extents) {
struct stacks_extent *p = this->extents;
this->extents = this->extents->next;
free(p);
};
} // end: stat_extents_free_all
static inline struct stat_result *stat_itemize_stack (
struct stat_result *p,
int depth,
enum stat_item *items)
{
struct stat_result *p_sav = p;
int i;
for (i = 0; i < depth; i++) {
p->item = items[i];
p->result.ull_int = 0;
++p;
}
return p_sav;
} // end: stat_itemize_stack
static inline int stat_items_check_failed (
int numitems,
enum stat_item *items)
{
int i;
/* if an enum is passed instead of an address of one or more enums, ol' gcc
* will silently convert it to an address (possibly NULL). only clang will
* offer any sort of warning like the following:
*
* warning: incompatible integer to pointer conversion passing 'int' to parameter of type 'enum stat_item *'
* my_stack = procps_stat_select(info, STAT_noop, num);
* ^~~~~~~~~~~~~~~~
*/
if (numitems < 1
|| (void *)items < (void *)(unsigned long)(2 * STAT_logical_end))
return -1;
for (i = 0; i < numitems; i++) {
// a stat_item is currently unsigned, but we'll protect our future
if (items[i] < 0)
return -1;
if (items[i] >= STAT_logical_end) {
return -1;
}
}
return 0;
} // end: stat_items_check_failed
static int stat_make_numa_hist (
struct stat_info *info)
{
#ifndef NUMA_DISABLE
struct hist_tic *cpu_ptr, *nod_ptr;
int i, node;
if (info->libnuma_handle == NULL)
return 0;
/* are numa nodes dynamic like online cpus can be?
( and be careful, this libnuma call returns the highest node id in use, )
( NOT an actual number of nodes - some of those 'slots' might be unused ) */
info->nodes.total = info->our_max_node() + 1;
if (info->nodes.hist.n_alloc == 0
|| (info->nodes.total >= info->nodes.hist.n_alloc)) {
info->nodes.hist.n_alloc = info->nodes.total + NEWOLD_INCR;
info->nodes.hist.tics = realloc(info->nodes.hist.tics, info->nodes.hist.n_alloc * sizeof(struct hist_tic));
if (info->nodes.hist.tics == NULL)
return -ENOMEM;
}
// forget all of the prior node statistics & anticipate unassigned slots
memset(info->nodes.hist.tics, 0, info->nodes.hist.n_alloc * sizeof(struct hist_tic));
nod_ptr = info->nodes.hist.tics;
for (i = 0; i < info->nodes.total; i++) {
nod_ptr->id = nod_ptr->numa_node = STAT_NODE_INVALID;
++nod_ptr;
}
// spin thru each cpu and value the jiffs for it's numa node
for (i = 0; i < info->cpus.hist.n_inuse; i++) {
cpu_ptr = info->cpus.hist.tics + i;
if (-1 < (node = info->our_node_of_cpu(cpu_ptr->id))) {
nod_ptr = info->nodes.hist.tics + node;
nod_ptr->new.user += cpu_ptr->new.user; nod_ptr->old.user += cpu_ptr->old.user;
nod_ptr->new.nice += cpu_ptr->new.nice; nod_ptr->old.nice += cpu_ptr->old.nice;
nod_ptr->new.system += cpu_ptr->new.system; nod_ptr->old.system += cpu_ptr->old.system;
nod_ptr->new.idle += cpu_ptr->new.idle; nod_ptr->old.idle += cpu_ptr->old.idle;
nod_ptr->new.iowait += cpu_ptr->new.iowait; nod_ptr->old.iowait += cpu_ptr->old.iowait;
nod_ptr->new.irq += cpu_ptr->new.irq; nod_ptr->old.irq += cpu_ptr->old.irq;
nod_ptr->new.sirq += cpu_ptr->new.sirq; nod_ptr->old.sirq += cpu_ptr->old.sirq;
nod_ptr->new.stolen += cpu_ptr->new.stolen; nod_ptr->old.stolen += cpu_ptr->old.stolen;
nod_ptr->new.guest += cpu_ptr->new.guest; nod_ptr->old.guest += cpu_ptr->old.guest;
nod_ptr->new.gnice += cpu_ptr->new.gnice; nod_ptr->old.gnice += cpu_ptr->old.gnice;
nod_ptr->new.xtot += cpu_ptr->new.xtot; nod_ptr->old.xtot += cpu_ptr->old.xtot;
nod_ptr->new.xbsy += cpu_ptr->new.xbsy; nod_ptr->old.xbsy += cpu_ptr->old.xbsy;
nod_ptr->new.xidl += cpu_ptr->new.xidl; nod_ptr->old.xidl += cpu_ptr->old.xidl;
nod_ptr->new.xusr += cpu_ptr->new.xusr; nod_ptr->old.xusr += cpu_ptr->old.xusr;
nod_ptr->new.xsys += cpu_ptr->new.xsys; nod_ptr->old.xsys += cpu_ptr->old.xsys;
cpu_ptr->numa_node = node;
nod_ptr->id = node;
nod_ptr->count++; ;
}
}
info->nodes.hist.n_inuse = info->nodes.total;
return info->nodes.hist.n_inuse;
#else
return 0;
#endif
} // end: stat_make_numa_hist
static int stat_read_failed (
struct stat_info *info)
{
struct hist_tic *sum_ptr, *cpu_ptr;
char *bp, *b;
int i, rc, num, tot_read;
unsigned long long llnum;
if (info == NULL)
return -EINVAL;
if (!info->cpus.hist.n_alloc) {
info->cpus.hist.tics = calloc(NEWOLD_INCR, sizeof(struct hist_tic));
if (!(info->cpus.hist.tics))
return -ENOMEM;
info->cpus.hist.n_alloc = NEWOLD_INCR;
info->cpus.hist.n_inuse = 0;
}
if (!info->stat_fp
&& (!(info->stat_fp = fopen(STAT_FILE, "r"))))
return -errno;
fflush(info->stat_fp);
rewind(info->stat_fp);
#define maxSIZ info->stat_buf_size
#define curSIZ ( maxSIZ - tot_read )
#define curPOS ( info->stat_buf + tot_read )
/* we slurp in the entire directory thus avoiding repeated calls to fread, |
especially in a massively parallel environment. additionally, each cpu |
line is then frozen in time rather than changing until we get around to |
accessing it. this helps to minimize (not eliminate) some distortions. | */
tot_read = errno = 0;
while ((0 < (num = fread(curPOS, 1, curSIZ, info->stat_fp)))) {
tot_read += num;
if (tot_read < maxSIZ)
break;
maxSIZ += BUFFER_INCR;
if (!(info->stat_buf = realloc(info->stat_buf, maxSIZ)))
return -ENOMEM;
};
#undef maxSIZ
#undef curSIZ
#undef curPOS
if (!feof(info->stat_fp))
return -errno;
info->stat_buf[tot_read] = '\0';
bp = info->stat_buf;
sum_ptr = &info->cpu_hist;
// remember summary from last time around
memcpy(&sum_ptr->old, &sum_ptr->new, sizeof(struct stat_jifs));
sum_ptr->id = STAT_SUMMARY_ID; // mark as summary
sum_ptr->numa_node = STAT_NODE_INVALID; // mark as invalid
// now value the cpu summary tics from line #1
if (8 > sscanf(bp, "cpu %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu"
, &sum_ptr->new.user, &sum_ptr->new.nice, &sum_ptr->new.system
, &sum_ptr->new.idle, &sum_ptr->new.iowait, &sum_ptr->new.irq
, &sum_ptr->new.sirq, &sum_ptr->new.stolen
, &sum_ptr->new.guest, &sum_ptr->new.gnice))
return -1;
stat_derive_unique(sum_ptr);
i = 0;
reap_em_again:
cpu_ptr = info->cpus.hist.tics + i; // adapt to relocated if reap_em_again
do {
bp = 1 + strchr(bp, '\n');
// remember this cpu from last time around
memcpy(&cpu_ptr->old, &cpu_ptr->new, sizeof(struct stat_jifs));
// next can be overridden under 'stat_make_numa_hist'
cpu_ptr->numa_node = STAT_NODE_INVALID;
cpu_ptr->count = 1;
if (8 > (rc = sscanf(bp, "cpu%d %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu"
, &cpu_ptr->id
, &cpu_ptr->new.user, &cpu_ptr->new.nice, &cpu_ptr->new.system
, &cpu_ptr->new.idle, &cpu_ptr->new.iowait, &cpu_ptr->new.irq
, &cpu_ptr->new.sirq, &cpu_ptr->new.stolen
, &cpu_ptr->new.guest, &cpu_ptr->new.gnice))) {
break; // we must tolerate cpus taken offline
}
stat_derive_unique(cpu_ptr);
++cpu_ptr;
++i;
} while (i < info->cpus.hist.n_alloc);
if (i == info->cpus.hist.n_alloc && rc >= 8) {
info->cpus.hist.n_alloc += NEWOLD_INCR;
info->cpus.hist.tics = realloc(info->cpus.hist.tics, info->cpus.hist.n_alloc * sizeof(struct hist_tic));
if (!(info->cpus.hist.tics))
return -ENOMEM;
goto reap_em_again;
}
info->cpus.total = info->cpus.hist.n_inuse = sum_ptr->count = i;
// remember sys_hist stuff from last time around
memcpy(&info->sys_hist.old, &info->sys_hist.new, sizeof(struct stat_data));
llnum = 0;
if ((b = strstr(bp, "intr ")))
sscanf(b, "intr %llu", &llnum);
info->sys_hist.new.intr = llnum;
llnum = 0;
if ((b = strstr(bp, "ctxt ")))
sscanf(b, "ctxt %llu", &llnum);
info->sys_hist.new.ctxt = llnum;
llnum = 0;
if ((b = strstr(bp, "btime ")))
sscanf(b, "btime %llu", &llnum);
info->sys_hist.new.btime = llnum;
llnum = 0;
if ((b = strstr(bp, "processes ")))
sscanf(b, "processes %llu", &llnum);
info->sys_hist.new.procs_created = llnum;
llnum = 0;
if ((b = strstr(bp, "procs_blocked ")))
sscanf(b, "procs_blocked %llu", &llnum);
info->sys_hist.new.procs_blocked = llnum;
llnum = 0;
if ((b = strstr(bp, "procs_running ")))
sscanf(b, "procs_running %llu", &llnum);
info->sys_hist.new.procs_running = llnum;
return 0;
} // end: stat_read_failed
/*
* stat_stacks_alloc():
*
* Allocate and initialize one or more stacks each of which is anchored in an
* associated context structure.
*
* All such stacks will have their result structures properly primed with
* 'items', while the result itself will be zeroed.
*
* Returns a stack_extent struct anchoring the 'heads' of each new stack.
*/
static struct stacks_extent *stat_stacks_alloc (
struct ext_support *this,
int maxstacks)
{
struct stacks_extent *p_blob;
struct stat_stack **p_vect;
struct stat_stack *p_head;
size_t vect_size, head_size, list_size, blob_size;
void *v_head, *v_list;
int i;
if (this == NULL || this->items == NULL)
return NULL;
if (maxstacks < 1)
return NULL;
vect_size = sizeof(void *) * maxstacks; // size of the addr vectors |
vect_size += sizeof(void *); // plus NULL addr delimiter |
head_size = sizeof(struct stat_stack); // size of that head struct |
list_size = sizeof(struct stat_result) * this->items->num; // any single results stack |
blob_size = sizeof(struct stacks_extent); // the extent anchor itself |
blob_size += vect_size; // plus room for addr vects |
blob_size += head_size * maxstacks; // plus room for head thing |
blob_size += list_size * maxstacks; // plus room for our stacks |
/* note: all of our memory is allocated in one single blob, facilitating a later free(). |
as a minimum, it is important that those result structures themselves always be |
contiguous within each stack since they are accessed through relative position. | */
if (NULL == (p_blob = calloc(1, blob_size)))
return NULL;
p_blob->next = this->extents; // push this extent onto... |
this->extents = p_blob; // ...some existing extents |
p_vect = (void *)p_blob + sizeof(struct stacks_extent); // prime our vector pointer |
p_blob->stacks = p_vect; // set actual vectors start |
v_head = (void *)p_vect + vect_size; // prime head pointer start |
v_list = v_head + (head_size * maxstacks); // prime our stacks pointer |
for (i = 0; i < maxstacks; i++) {
p_head = (struct stat_stack *)v_head;
p_head->head = stat_itemize_stack((struct stat_result *)v_list, this->items->num, this->items->enums);
p_blob->stacks[i] = p_head;
v_list += list_size;
v_head += head_size;
}
p_blob->ext_numstacks = maxstacks;
return p_blob;
} // end: stat_stacks_alloc
static int stat_stacks_fetch (
struct stat_info *info,
struct reap_support *this)
{
#define n_alloc this->n_alloc
#define n_inuse this->hist.n_inuse
#define n_saved this->n_alloc_save
struct stacks_extent *ext;
int i;
if (this == NULL)
return -EINVAL;
// initialize stuff -----------------------------------
if (!this->anchor) {
if (!(this->anchor = calloc(sizeof(void *), STACKS_INCR)))
return -ENOMEM;
n_alloc = STACKS_INCR;
}
if (!this->fetch.extents) {
if (!(ext = stat_stacks_alloc(&this->fetch, n_alloc)))
return -ENOMEM;
memcpy(this->anchor, ext->stacks, sizeof(void *) * n_alloc);
}
if (this->fetch.dirty_stacks)
stat_cleanup_stacks_all(&this->fetch);
// iterate stuff --------------------------------------
for (i = 0; i < n_inuse; i++) {
if (!(i < n_alloc)) {
n_alloc += STACKS_INCR;
if ((!(this->anchor = realloc(this->anchor, sizeof(void *) * n_alloc)))
|| (!(ext = stat_stacks_alloc(&this->fetch, STACKS_INCR)))) {
return -ENOMEM;
}
memcpy(this->anchor + i, ext->stacks, sizeof(void *) * STACKS_INCR);
}
stat_assign_results(this->anchor[i], &info->sys_hist, &this->hist.tics[i]);
}
// finalize stuff -------------------------------------
/* note: we go to this trouble of maintaining a duplicate of the consolidated |
extent stacks addresses represented as our 'anchor' since these ptrs |
are exposed to a user (um, not that we don't trust 'em or anything). |
plus, we can NULL delimit these ptrs which we couldn't do otherwise. | */
if (n_saved < i + 1) {
n_saved = i + 1;
if (!(this->result.stacks = realloc(this->result.stacks, sizeof(void *) * n_saved)))
return -ENOMEM;
}
memcpy(this->result.stacks, this->anchor, sizeof(void *) * i);
this->result.stacks[i] = NULL;
this->result.total = i;
this->fetch.dirty_stacks = 1;
// callers beware, this might be zero (maybe no libnuma.so) ...
return this->result.total;
#undef n_alloc
#undef n_inuse
#undef n_saved
} // end: stat_stacks_fetch
static int stat_stacks_reconfig_maybe (
struct ext_support *this,
enum stat_item *items,
int numitems)
{
if (stat_items_check_failed(numitems, items))
return -EINVAL;
/* is this the first time or have things changed since we were last called?
if so, gotta' redo all of our stacks stuff ... */
if (this->items->num != numitems + 1
|| memcmp(this->items->enums, items, sizeof(enum stat_item) * numitems)) {
// allow for our STAT_logical_end
if (!(this->items->enums = realloc(this->items->enums, sizeof(enum stat_item) * (numitems + 1))))
return -ENOMEM;
memcpy(this->items->enums, items, sizeof(enum stat_item) * numitems);
this->items->enums[numitems] = STAT_logical_end;
this->items->num = numitems + 1;
stat_extents_free_all(this);
return 1;
}
return 0;
} // end: stat_stacks_reconfig_maybe
static struct stat_stack *stat_update_single_stack (
struct stat_info *info,
struct ext_support *this)
{
if (!this->extents
&& !(stat_stacks_alloc(this, 1)))
return NULL;
if (this->dirty_stacks)
stat_cleanup_stacks_all(this);
stat_assign_results(this->extents->stacks[0], &info->sys_hist, &info->cpu_hist);
this->dirty_stacks = 1;
return this->extents->stacks[0];
} // end: stat_update_single_stack
#if defined(PRETEND_NUMA) && defined(NUMA_DISABLE)
# warning 'PRETEND_NUMA' ignored, 'NUMA_DISABLE' is active
#endif
// ___ Public Functions |||||||||||||||||||||||||||||||||||||||||||||||||||||||
// --- standard required functions --------------------------------------------
/*
* procps_stat_new:
*
* Create a new container to hold the stat information
*
* The initial refcount is 1, and needs to be decremented
* to release the resources of the structure.
*
* Returns: < 0 on failure, 0 on success along with
* a pointer to a new context struct
*/
PROCPS_EXPORT int procps_stat_new (
struct stat_info **info)
{
struct stat_info *p;
int rc;
if (info == NULL || *info != NULL)
return -EINVAL;
if (!(p = calloc(1, sizeof(struct stat_info))))
return -ENOMEM;
if (!(p->stat_buf = calloc(1, BUFFER_INCR))) {
free(p);
return -ENOMEM;
}
p->stat_buf_size = BUFFER_INCR;
p->refcount = 1;
p->results.cpus = &p->cpus.result;
p->results.nodes = &p->nodes.result;
p->cpus.total = procps_cpu_count();
// these 3 are for reap, sharing a single set of items
p->cpu_summary.items = p->cpus.fetch.items = p->nodes.fetch.items = &p->reap_items;
// the select guy has its own set of items
p->select.items = &p->select_items;
#ifndef NUMA_DISABLE
#ifndef PRETEND_NUMA
// we'll try for the most recent version, then a version we know works...
if ((p->libnuma_handle = dlopen("libnuma.so", RTLD_LAZY))
|| (p->libnuma_handle = dlopen("libnuma.so.1", RTLD_LAZY))) {
p->our_max_node = dlsym(p->libnuma_handle, "numa_max_node");
p->our_node_of_cpu = dlsym(p->libnuma_handle, "numa_node_of_cpu");
if (p->our_max_node == NULL
|| (p->our_node_of_cpu == NULL)) {
// this dlclose is safe - we've yet to call numa_node_of_cpu
// ( there's one other dlclose which has now been disabled )
dlclose(p->libnuma_handle);
p->libnuma_handle = NULL;
}
}
#else
p->libnuma_handle = (void *)-1;
p->our_max_node = fake_max_node;
p->our_node_of_cpu = fake_node_of_cpu;
#endif
#endif
/* do a priming read here for the following potential benefits: |
1) ensure there will be no problems with subsequent access |
2) make delta results potentially useful, even if 1st time |
3) elimnate need for history distortions 1st time 'switch' | */
if ((rc = stat_read_failed(p))) {
procps_stat_unref(&p);
return rc;
}
*info = p;
return 0;
} // end :procps_stat_new
PROCPS_EXPORT int procps_stat_ref (
struct stat_info *info)
{
if (info == NULL)
return -EINVAL;
info->refcount++;
return info->refcount;
} // end: procps_stat_ref
PROCPS_EXPORT int procps_stat_unref (
struct stat_info **info)
{
if (info == NULL || *info == NULL)
return -EINVAL;
(*info)->refcount--;
if ((*info)->refcount < 1) {
if ((*info)->stat_fp)
fclose((*info)->stat_fp);
if ((*info)->stat_buf)
free((*info)->stat_buf);
if ((*info)->cpus.anchor)
free((*info)->cpus.anchor);
if ((*info)->cpus.result.stacks)
free((*info)->cpus.result.stacks);
if ((*info)->cpus.hist.tics)
free((*info)->cpus.hist.tics);
if ((*info)->cpus.fetch.extents)
stat_extents_free_all(&(*info)->cpus.fetch);
if ((*info)->nodes.anchor)
free((*info)->nodes.anchor);
if ((*info)->nodes.result.stacks)
free((*info)->nodes.result.stacks);
if ((*info)->nodes.hist.tics)
free((*info)->nodes.hist.tics);
if ((*info)->nodes.fetch.extents)
stat_extents_free_all(&(*info)->nodes.fetch);
if ((*info)->cpu_summary.extents)
stat_extents_free_all(&(*info)->cpu_summary);
if ((*info)->select.extents)
stat_extents_free_all(&(*info)->select);
if ((*info)->reap_items.enums)
free((*info)->reap_items.enums);
if ((*info)->select_items.enums)
free((*info)->select_items.enums);
#ifndef NUMA_DISABLE
#ifndef PRETEND_NUMA
/* note: we'll skip a dlcose() to avoid the following libnuma memory
* leak which is triggered after a call to numa_node_of_cpu():
* ==1234== LEAK SUMMARY:
* ==1234== definitely lost: 512 bytes in 1 blocks
* ==1234== indirectly lost: 48 bytes in 2 blocks
* ==1234== ...
* [ thanks very much libnuma, for all the pain you've caused ]
*/
// if ((*info)->libnuma_handle)
// dlclose((*info)->libnuma_handle);
#endif
#endif
free(*info);
*info = NULL;
return 0;
}
return (*info)->refcount;
} // end: procps_stat_unref
// --- variable interface functions -------------------------------------------
PROCPS_EXPORT struct stat_result *procps_stat_get (
struct stat_info *info,
enum stat_item item)
{
static time_t sav_secs;
time_t cur_secs;
if (info == NULL)
return NULL;
if (item < 0 || item >= STAT_logical_end)
return NULL;
/* we will NOT read the source file with every call - rather, we'll offer
a granularity of 1 second between reads ... */
cur_secs = time(NULL);
if (1 <= cur_secs - sav_secs) {
if (stat_read_failed(info))
return NULL;
sav_secs = cur_secs;
}
info->get_this.item = item;
// with 'get', we must NOT honor the usual 'noop' guarantee
// if (item > STAT_noop)
info->get_this.result.ull_int = 0;
Item_table[item].setsfunc(&info->get_this, &info->sys_hist, &info->cpu_hist);
return &info->get_this;
} // end: procps_stat_get
/* procps_stat_reap():
*
* Harvest all the requested NUMA NODE and/or CPU information providing the
* result stacks along with totals and the cpu summary.
*
* Returns: pointer to a stat_reaped struct on success, NULL on error.
*/
PROCPS_EXPORT struct stat_reaped *procps_stat_reap (
struct stat_info *info,
enum stat_reap_type what,
enum stat_item *items,
int numitems)
{
int rc;
if (info == NULL || items == NULL)
return NULL;
if (what != STAT_REAP_CPUS_ONLY && what != STAT_REAP_CPUS_AND_NODES)
return NULL;
#ifdef ENFORCE_LOGICAL
{ int i;
// those STAT_SYS_type enum's make sense only to 'select' ...
for (i = 0; i < numitems; i++) {
if (items[i] > STAT_TIC_highest)
return NULL;
}
}
#endif
if (0 > (rc = stat_stacks_reconfig_maybe(&info->cpu_summary, items, numitems)))
return NULL;
if (rc) {
stat_extents_free_all(&info->cpus.fetch);
stat_extents_free_all(&info->nodes.fetch);
}
if (stat_read_failed(info))
return NULL;
info->results.summary = stat_update_single_stack(info, &info->cpu_summary);
/* unlike the other 'reap' functions, <stat> provides for two separate |
stacks pointer arrays exposed to callers. Thus, to keep our promise |
of NULL delimit we must ensure a minimal array for the optional one | */
if (!info->nodes.result.stacks
&& (!(info->nodes.result.stacks = malloc(sizeof(void *)))))
return NULL;
info->nodes.result.total = 0;
info->nodes.result.stacks[0] = NULL;
switch (what) {
case STAT_REAP_CPUS_ONLY:
if (!stat_stacks_fetch(info, &info->cpus))
return NULL;
break;
case STAT_REAP_CPUS_AND_NODES:
#ifndef NUMA_DISABLE
/* note: if we're doing numa at all, we must do this numa history |
before we build (fetch) the cpu stacks since the read_stat guy |
will have marked (temporarily) all the cpu node ids as invalid | */
if (0 > stat_make_numa_hist(info))
return NULL;
// tolerate an unexpected absence of libnuma.so ...
stat_stacks_fetch(info, &info->nodes);
#endif
if (!stat_stacks_fetch(info, &info->cpus))
return NULL;
break;
default:
return NULL;
};
return &info->results;
} // end: procps_stat_reap
/* procps_stat_select():
*
* Harvest all the requested TIC and/or SYS information then return
* it in a results stack.
*
* Returns: pointer to a stat_stack struct on success, NULL on error.
*/
PROCPS_EXPORT struct stat_stack *procps_stat_select (
struct stat_info *info,
enum stat_item *items,
int numitems)
{
if (info == NULL || items == NULL)
return NULL;
if (0 > stat_stacks_reconfig_maybe(&info->select, items, numitems))
return NULL;
if (stat_read_failed(info))
return NULL;
return stat_update_single_stack(info, &info->select);
} // end: procps_stat_select
/*
* procps_stat_sort():
*
* Sort stacks anchored in the passed stack pointers array
* based on the designated sort enumerator and specified order.
*
* Returns those same addresses sorted.
*
* Note: all of the stacks must be homogeneous (of equal length and content).
*/
PROCPS_EXPORT struct stat_stack **procps_stat_sort (
struct stat_info *info,
struct stat_stack *stacks[],
int numstacked,
enum stat_item sortitem,
enum stat_sort_order order)
{
struct stat_result *p;
struct sort_parms parms;
int offset;
if (info == NULL || stacks == NULL)
return NULL;
// a stat_item is currently unsigned, but we'll protect our future
if (sortitem < 0 || sortitem >= STAT_logical_end)
return NULL;
if (order != STAT_SORT_ASCEND && order != STAT_SORT_DESCEND)
return NULL;
if (numstacked < 2)
return stacks;
offset = 0;
p = stacks[0]->head;
for (;;) {
if (p->item == sortitem)
break;
++offset;
if (p->item >= STAT_logical_end)
return NULL;
++p;
}
parms.offset = offset;
parms.order = order;
qsort_r(stacks, numstacked, sizeof(void *), (QSR_t)Item_table[p->item].sortfunc, &parms);
return stacks;
} // end: procps_stat_sort
// --- special debugging function(s) ------------------------------------------
/*
* The following isn't part of the normal programming interface. Rather,
* it exists to validate result types referenced in application programs.
*
* It's used only when:
* 1) the 'XTRA_PROCPS_DEBUG' has been defined, or
* 2) the '#include <proc/xtra-procps-debug.h>' used
*/
PROCPS_EXPORT struct stat_result *xtra_stat_get (
struct stat_info *info,
enum stat_item actual_enum,
const char *typestr,
const char *file,
int lineno)
{
struct stat_result *r = procps_stat_get(info, actual_enum);
if (actual_enum < 0 || actual_enum >= STAT_logical_end) {
fprintf(stderr, "%s line %d: invalid item = %d, type = %s\n"
, file, lineno, actual_enum, typestr);
}
if (r) {
char *str = Item_table[r->item].type2str;
if (str[0]
&& (strcmp(typestr, str)))
fprintf(stderr, "%s line %d: was %s, expected %s\n", file, lineno, typestr, str);
}
return r;
} // end: xtra_stat_get_
PROCPS_EXPORT struct stat_result *xtra_stat_val (
int relative_enum,
const char *typestr,
const struct stat_stack *stack,
struct stat_info *info,
const char *file,
int lineno)
{
char *str;
int i;
for (i = 0; stack->head[i].item < STAT_logical_end; i++)
;
if (relative_enum < 0 || relative_enum >= i) {
fprintf(stderr, "%s line %d: invalid relative_enum = %d, type = %s\n"
, file, lineno, relative_enum, typestr);
return NULL;
}
str = Item_table[stack->head[relative_enum].item].type2str;
if (str[0]
&& (strcmp(typestr, str))) {
fprintf(stderr, "%s line %d: was %s, expected %s\n", file, lineno, typestr, str);
}
return &stack->head[relative_enum];
} // end: xtra_stat_val