procps/proc/stat.c
Jim Warner ea930f6f9e library <stat>: input file buffer size must be dynamic
Since its introduction, our evolved /proc/stat API has
relied on a static buffer of 8192 bytes. This approach
is probably Ok for other /proc files but it would only
accommodate around 100 processors. If such a threshold
were exceeded then this interface could never succeed.

Now days 100 processors doesn't seem at all excessive.

So this commit trades that static buffer for a dynamic
self-tuning one. And since so much former top CPU code
was already rolled into this module, we just stole the
already proven top dynamic buffer management code too.

[ this also meant switching low level unbuffered I/O ]
[ calls to standard library buffered I/O calls. that ]
[ is exactly what <slabinfo> and <diskstats> employ. ]

Reference(s):
. 1st gen readstat introduction
commit a410e236ab

Signed-off-by: Jim Warner <james.warner@comcast.net>
2017-01-05 06:48:38 +11:00

1281 lines
44 KiB
C

/*
* libprocps - Library to read proc filesystem
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef NUMA_DISABLE
#include <dlfcn.h>
#endif
#include <errno.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <unistd.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <proc/sysinfo.h>
#include <proc/procps-private.h>
#include <proc/stat.h>
#define STAT_FILE "/proc/stat"
#define BUFFER_INCR 4096 // amount i/p buffer allocations grow
#define STACKS_INCR 32 // amount reap stack allocations grow
#define NEWOLD_INCR 32 // amount jiffs hist allocations grow
/* ------------------------------------------------------------------------- +
a strictly development #define, existing specifically for the top program |
( and it has no affect if ./configure --disable-numa has been specified ) | */
//#define PRETEND_NUMA // pretend there are 3 'discontiguous' numa nodes |
// ------------------------------------------------------------------------- +
/* ------------------------------------------------------------------------- +
because 'reap' would be forced to duplicate the global SYS stuff in every |
TIC type results stack, the following #define can be used to enforce that |
only STAT_noop and STAT_extra plus all the STAT_TIC items will be allowed | */
//#define ENFORCE_LOGICAL // ensure only logical items are accepted by reap |
// ------------------------------------------------------------------------- +
struct stat_jifs {
unsigned long long user, nice, system, idle, iowait, irq, sirq, stolen, guest, gnice;
unsigned long long xtot, xbsy, xidl, xusr, xsys;
};
struct stat_data {
unsigned long intr;
unsigned long ctxt;
unsigned long btime;
unsigned long procs_created;
unsigned long procs_blocked;
unsigned long procs_running;
};
struct hist_sys {
struct stat_data new;
struct stat_data old;
};
struct hist_tic {
int id;
int id_sav;
int numa_node;
int count;
struct stat_jifs new;
struct stat_jifs old;
};
struct stacks_extent {
int ext_numstacks;
struct stacks_extent *next;
struct stat_stack **stacks;
};
struct item_support {
int num; // includes 'logical_end' delimiter
enum stat_item *enums; // includes 'logical_end' delimiter
};
struct ext_support {
struct item_support *items; // how these stacks are configured
struct stacks_extent *extents; // anchor for these extents
int dirty_stacks;
};
struct tic_support {
int n_alloc; // number of below structs allocated
int n_inuse; // number of below structs occupied
struct hist_tic *tics; // actual new/old jiffies
};
struct reap_support {
int total; // independently obtained # of cpus/nodes
struct ext_support fetch; // extents plus items details
struct tic_support hist; // cpu and node jiffies management
int n_alloc; // last known anchor pointers allocation
struct stat_stack **anchor; // reapable stacks (consolidated extents)
int n_alloc_save; // last known results.stacks allocation
struct stat_reap result; // summary + stacks returned to caller
};
struct stat_info {
int refcount;
FILE *stat_fp;
char *stat_buf; // grows to accommodate all /proc/stat
int stat_buf_size; // current size for the above stat_buf
struct hist_sys sys_hist; // SYS type management
struct hist_tic cpu_hist; // TIC type management for cpu summary
struct reap_support cpus; // TIC type management for real cpus
struct reap_support nodes; // TIC type management for numa nodes
struct ext_support cpu_summary; // supports /proc/stat line #1 results
struct ext_support select; // support for 'procps_stat_select()'
struct stat_reaped results; // for return to caller after a reap
#ifndef NUMA_DISABLE
void *libnuma_handle; // if dlopen() for libnuma succeessful
int (*our_max_node)(void); // a libnuma function call via dlsym()
int (*our_node_of_cpu)(int); // a libnuma function call via dlsym()
#endif
struct stat_result get_this; // for return to caller after a get
struct item_support reap_items; // items used for reap (shared among 3)
struct item_support select_items; // items unique to select
};
// ___ Results 'Set' Support ||||||||||||||||||||||||||||||||||||||||||||||||||
#define setNAME(e) set_stat_ ## e
#define setDECL(e) static void setNAME(e) \
(struct stat_result *R, struct hist_sys *S, struct hist_tic *T)
// regular assignment
#define TIC_set(e,t,x) setDECL(e) { \
(void)S; R->result. t = T->new . x; }
#define SYS_set(e,t,x) setDECL(e) { \
(void)T; R->result. t = S->new . x; }
// delta assignment
#define TICsetH(e,t,x) setDECL(e) { \
(void)S; R->result. t = ( T->new . x - T->old. x ); \
if (R->result. t < 0) R->result. t = 0; }
#define SYSsetH(e,t,x) setDECL(e) { \
(void)T; R->result. t = ( S->new . x - S->old. x ); \
if (R->result. t < 0) R->result. t = 0; }
setDECL(noop) { (void)R; (void)S; (void)T; }
setDECL(extra) { (void)R; (void)S; (void)T; }
setDECL(TIC_ID) { (void)S; R->result.s_int = T->id; }
setDECL(TIC_NUMA_NODE) { (void)S; R->result.s_int = T->numa_node; }
setDECL(TIC_NUM_CONTRIBUTORS) { (void)S; R->result.s_int = T->count; }
TIC_set(TIC_USER, ull_int, user)
TIC_set(TIC_NICE, ull_int, nice)
TIC_set(TIC_SYSTEM, ull_int, system)
TIC_set(TIC_IDLE, ull_int, idle)
TIC_set(TIC_IOWAIT, ull_int, iowait)
TIC_set(TIC_IRQ, ull_int, irq)
TIC_set(TIC_SOFTIRQ, ull_int, sirq)
TIC_set(TIC_STOLEN, ull_int, stolen)
TIC_set(TIC_GUEST, ull_int, guest)
TIC_set(TIC_GUEST_NICE, ull_int, gnice)
TIC_set(TIC_SUM_TOTAL, ull_int, xtot)
TIC_set(TIC_SUM_BUSY, ull_int, xbsy)
TIC_set(TIC_SUM_IDLE, ull_int, xidl)
TIC_set(TIC_SUM_USER, ull_int, xusr)
TIC_set(TIC_SUM_SYSTEM, ull_int, xsys)
TICsetH(TIC_DELTA_USER, sl_int, user)
TICsetH(TIC_DELTA_NICE, sl_int, nice)
TICsetH(TIC_DELTA_SYSTEM, sl_int, system)
TICsetH(TIC_DELTA_IDLE, sl_int, idle)
TICsetH(TIC_DELTA_IOWAIT, sl_int, iowait)
TICsetH(TIC_DELTA_IRQ, sl_int, irq)
TICsetH(TIC_DELTA_SOFTIRQ, sl_int, sirq)
TICsetH(TIC_DELTA_STOLEN, sl_int, stolen)
TICsetH(TIC_DELTA_GUEST, sl_int, guest)
TICsetH(TIC_DELTA_GUEST_NICE, sl_int, gnice)
TICsetH(TIC_DELTA_SUM_TOTAL, sl_int, xtot)
TICsetH(TIC_DELTA_SUM_BUSY, sl_int, xbsy)
TICsetH(TIC_DELTA_SUM_IDLE, sl_int, xidl)
TICsetH(TIC_DELTA_SUM_USER, sl_int, xusr)
TICsetH(TIC_DELTA_SUM_SYSTEM, sl_int, xsys)
SYS_set(SYS_CTX_SWITCHES, ul_int, ctxt)
SYS_set(SYS_INTERRUPTS, ul_int, intr)
SYS_set(SYS_PROC_BLOCKED, ul_int, procs_blocked)
SYS_set(SYS_PROC_CREATED, ul_int, procs_created)
SYS_set(SYS_PROC_RUNNING, ul_int, procs_running)
SYS_set(SYS_TIME_OF_BOOT, ul_int, btime)
SYSsetH(SYS_DELTA_CTX_SWITCHES, s_int, ctxt)
SYSsetH(SYS_DELTA_INTERRUPTS, s_int, intr)
setDECL(SYS_DELTA_PROC_BLOCKED) { (void)T; R->result.s_int = S->new.procs_blocked - S->old.procs_blocked; }
SYSsetH(SYS_DELTA_PROC_CREATED, s_int, procs_created)
setDECL(SYS_DELTA_PROC_RUNNING) { (void)T; R->result.s_int = S->new.procs_running - S->old.procs_running; }
#undef setDECL
#undef TIC_set
#undef SYS_set
#undef TICsetH
#undef SYSsetH
// ___ Sorting Support ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
struct sort_parms {
int offset;
enum stat_sort_order order;
};
#define srtNAME(t) sort_stat_ ## t
#define srtDECL(t) static int srtNAME(t) \
(const struct stat_stack **A, const struct stat_stack **B, struct sort_parms *P)
srtDECL(s_int) {
const struct stat_result *a = (*A)->head + P->offset; \
const struct stat_result *b = (*B)->head + P->offset; \
return P->order * (a->result.s_int - b->result.s_int);
}
srtDECL(sl_int) {
const struct stat_result *a = (*A)->head + P->offset; \
const struct stat_result *b = (*B)->head + P->offset; \
return P->order * (a->result.sl_int - b->result.sl_int);
}
srtDECL(ul_int) {
const struct stat_result *a = (*A)->head + P->offset; \
const struct stat_result *b = (*B)->head + P->offset; \
if ( a->result.ul_int > b->result.ul_int ) return P->order > 0 ? 1 : -1; \
if ( a->result.ul_int < b->result.ul_int ) return P->order > 0 ? -1 : 1; \
return 0;
}
srtDECL(ull_int) {
const struct stat_result *a = (*A)->head + P->offset; \
const struct stat_result *b = (*B)->head + P->offset; \
if ( a->result.ull_int > b->result.ull_int ) return P->order > 0 ? 1 : -1; \
if ( a->result.ull_int < b->result.ull_int ) return P->order > 0 ? -1 : 1; \
return 0;
}
srtDECL(noop) { \
(void)A; (void)B; (void)P; \
return 0;
}
#undef srtDECL
// ___ Controlling Table ||||||||||||||||||||||||||||||||||||||||||||||||||||||
typedef void (*SET_t)(struct stat_result *, struct hist_sys *, struct hist_tic *);
#define RS(e) (SET_t)setNAME(e)
typedef int (*QSR_t)(const void *, const void *, void *);
#define QS(t) (QSR_t)srtNAME(t)
#define TS(t) STRINGIFY(t)
#define TS_noop ""
/*
* Need it be said?
* This table must be kept in the exact same order as
* those 'enum stat_item' guys ! */
static struct {
SET_t setsfunc; // the actual result setting routine
QSR_t sortfunc; // sort cmp func for a specific type
char *type2str; // the result type as a string value
} Item_table[] = {
/* setsfunc sortfunc type2str
--------------------------- ------------ ----------- */
{ RS(noop), QS(noop), TS_noop },
{ RS(extra), QS(ull_int), TS_noop },
{ RS(TIC_ID), QS(s_int), TS(s_int) },
{ RS(TIC_NUMA_NODE), QS(s_int), TS(s_int) },
{ RS(TIC_NUM_CONTRIBUTORS), QS(s_int), TS(s_int) },
{ RS(TIC_USER), QS(ull_int), TS(ull_int) },
{ RS(TIC_NICE), QS(ull_int), TS(ull_int) },
{ RS(TIC_SYSTEM), QS(ull_int), TS(ull_int) },
{ RS(TIC_IDLE), QS(ull_int), TS(ull_int) },
{ RS(TIC_IOWAIT), QS(ull_int), TS(ull_int) },
{ RS(TIC_IRQ), QS(ull_int), TS(ull_int) },
{ RS(TIC_SOFTIRQ), QS(ull_int), TS(ull_int) },
{ RS(TIC_STOLEN), QS(ull_int), TS(ull_int) },
{ RS(TIC_GUEST), QS(ull_int), TS(ull_int) },
{ RS(TIC_GUEST_NICE), QS(ull_int), TS(ull_int) },
{ RS(TIC_SUM_TOTAL), QS(ull_int), TS(ull_int) },
{ RS(TIC_SUM_BUSY), QS(ull_int), TS(ull_int) },
{ RS(TIC_SUM_IDLE), QS(ull_int), TS(ull_int) },
{ RS(TIC_SUM_USER), QS(ull_int), TS(ull_int) },
{ RS(TIC_SUM_SYSTEM), QS(ull_int), TS(ull_int) },
{ RS(TIC_DELTA_USER), QS(sl_int), TS(sl_int) },
{ RS(TIC_DELTA_NICE), QS(sl_int), TS(sl_int) },
{ RS(TIC_DELTA_SYSTEM), QS(sl_int), TS(sl_int) },
{ RS(TIC_DELTA_IDLE), QS(sl_int), TS(sl_int) },
{ RS(TIC_DELTA_IOWAIT), QS(sl_int), TS(sl_int) },
{ RS(TIC_DELTA_IRQ), QS(sl_int), TS(sl_int) },
{ RS(TIC_DELTA_SOFTIRQ), QS(sl_int), TS(sl_int) },
{ RS(TIC_DELTA_STOLEN), QS(sl_int), TS(sl_int) },
{ RS(TIC_DELTA_GUEST), QS(sl_int), TS(sl_int) },
{ RS(TIC_DELTA_GUEST_NICE), QS(sl_int), TS(sl_int) },
{ RS(TIC_DELTA_SUM_TOTAL), QS(sl_int), TS(sl_int) },
{ RS(TIC_DELTA_SUM_BUSY), QS(sl_int), TS(sl_int) },
{ RS(TIC_DELTA_SUM_IDLE), QS(sl_int), TS(sl_int) },
{ RS(TIC_DELTA_SUM_USER), QS(sl_int), TS(sl_int) },
{ RS(TIC_DELTA_SUM_SYSTEM), QS(sl_int), TS(sl_int) },
{ RS(SYS_CTX_SWITCHES), QS(ul_int), TS(ul_int) },
{ RS(SYS_INTERRUPTS), QS(ul_int), TS(ul_int) },
{ RS(SYS_PROC_BLOCKED), QS(ul_int), TS(ul_int) },
{ RS(SYS_PROC_CREATED), QS(ul_int), TS(ul_int) },
{ RS(SYS_PROC_RUNNING), QS(ul_int), TS(ul_int) },
{ RS(SYS_TIME_OF_BOOT), QS(ul_int), TS(ul_int) },
{ RS(SYS_DELTA_CTX_SWITCHES), QS(s_int), TS(s_int) },
{ RS(SYS_DELTA_INTERRUPTS), QS(s_int), TS(s_int) },
{ RS(SYS_DELTA_PROC_BLOCKED), QS(s_int), TS(s_int) },
{ RS(SYS_DELTA_PROC_CREATED), QS(s_int), TS(s_int) },
{ RS(SYS_DELTA_PROC_RUNNING), QS(s_int), TS(s_int) },
// dummy entry corresponding to STAT_logical_end ...
{ NULL, NULL, NULL }
};
/* please note,
* 1st enum MUST be kept in sync with highest TIC type
* 2nd enum MUST be 1 greater than the highest value of any enum */
#ifdef ENFORCE_LOGICAL
enum stat_item STAT_TIC_highest = STAT_TIC_DELTA_GUEST_NICE;
#endif
enum stat_item STAT_logical_end = STAT_SYS_DELTA_PROC_RUNNING + 1;
#undef setNAME
#undef srtNAME
#undef RS
#undef QS
// ___ Private Functions ||||||||||||||||||||||||||||||||||||||||||||||||||||||
#ifndef NUMA_DISABLE
#ifdef PRETEND_NUMA
static int fake_max_node (void) { return 3; }
static int fake_node_of_cpu (int n) { return (1 == (n % 4)) ? 0 : (n % 4); }
#endif
#endif
static inline void stat_assign_results (
struct stat_stack *stack,
struct hist_sys *sys_hist,
struct hist_tic *tic_hist)
{
struct stat_result *this = stack->head;
for (;;) {
enum stat_item item = this->item;
if (item >= STAT_logical_end)
break;
Item_table[item].setsfunc(this, sys_hist, tic_hist);
++this;
}
return;
} // end: stat_assign_results
static inline void stat_cleanup_stack (
struct stat_result *this)
{
for (;;) {
if (this->item >= STAT_logical_end)
break;
if (this->item > STAT_noop)
this->result.ull_int = 0;
++this;
}
} // end: stat_cleanup_stack
static inline void stat_cleanup_stacks_all (
struct ext_support *this)
{
struct stacks_extent *ext = this->extents;
int i;
while (ext) {
for (i = 0; ext->stacks[i]; i++)
stat_cleanup_stack(ext->stacks[i]->head);
ext = ext->next;
};
this->dirty_stacks = 0;
} // end: stat_cleanup_stacks_all
static inline int stat_derive_unique (
struct hist_tic *this)
{
/* note: we exclude guest tics from xtot since ...
'user' already includes 'guest'
'nice' already includes 'gnice'
( see: ./kernel/sched/cputime.c, account_guest_time ) */
this->new.xtot
= this->new.user
+ this->new.nice
+ this->new.system
+ this->new.idle
+ this->new.iowait
+ this->new.irq
+ this->new.sirq
+ this->new.stolen;
this->new.xusr = this->new.user + this->new.nice;
/* this stolen guy is one i'm not sure of yet, but it's documented as:
"the time spent in other operating systems
when running in a virtualized environment"
so it would seem to apply to an 'involuntary wait' for a guest OS */
this->new.xidl = this->new.idle + this->new.iowait + this->new.stolen;
this->new.xbsy = this->new.xtot - this->new.xidl;
this->new.xsys = this->new.xbsy - this->new.xusr;
} // end: stat_derive_unique
static void stat_extents_free_all (
struct ext_support *this)
{
while (this->extents) {
struct stacks_extent *p = this->extents;
this->extents = this->extents->next;
free(p);
};
} // end: stat_extents_free_all
static inline struct stat_result *stat_itemize_stack (
struct stat_result *p,
int depth,
enum stat_item *items)
{
struct stat_result *p_sav = p;
int i;
for (i = 0; i < depth; i++) {
p->item = items[i];
p->result.ull_int = 0;
++p;
}
return p_sav;
} // end: stat_itemize_stack
static inline int stat_items_check_failed (
int numitems,
enum stat_item *items)
{
int i;
/* if an enum is passed instead of an address of one or more enums, ol' gcc
* will silently convert it to an address (possibly NULL). only clang will
* offer any sort of warning like the following:
*
* warning: incompatible integer to pointer conversion passing 'int' to parameter of type 'enum stat_item *'
* my_stack = procps_stat_select(info, STAT_noop, num);
* ^~~~~~~~~~~~~~~~
*/
if (numitems < 1
|| (void *)items < (void *)(unsigned long)(2 * STAT_logical_end))
return -1;
for (i = 0; i < numitems; i++) {
// a stat_item is currently unsigned, but we'll protect our future
if (items[i] < 0)
return -1;
if (items[i] >= STAT_logical_end) {
return -1;
}
}
return 0;
} // end: stat_items_check_failed
static int stat_make_numa_hist (
struct stat_info *info)
{
#ifndef NUMA_DISABLE
struct hist_tic *cpu_ptr, *nod_ptr;
int i, node;
if (info->libnuma_handle == NULL)
return 0;
/* are numa nodes dynamic like online cpus can be?
( and be careful, this libnuma call returns the highest node id in use, )
( NOT an actual number of nodes - some of those 'slots' might be unused ) */
info->nodes.total = info->our_max_node() + 1;
if (info->nodes.hist.n_alloc == 0
|| (info->nodes.total >= info->nodes.hist.n_alloc)) {
info->nodes.hist.n_alloc = info->nodes.total + NEWOLD_INCR;
info->nodes.hist.tics = realloc(info->nodes.hist.tics, info->nodes.hist.n_alloc * sizeof(struct hist_tic));
if (info->nodes.hist.tics == NULL)
return -ENOMEM;
}
// forget all of the prior node statistics & anticipate unassigned slots
memset(info->nodes.hist.tics, 0, info->nodes.hist.n_alloc * sizeof(struct hist_tic));
nod_ptr = info->nodes.hist.tics;
for (i = 0; i < info->nodes.total; i++) {
nod_ptr->id = nod_ptr->numa_node = STAT_NODE_INVALID;
++nod_ptr;
}
// spin thru each cpu and value the jiffs for it's numa node
for (i = 0; i < info->cpus.hist.n_inuse; i++) {
cpu_ptr = info->cpus.hist.tics + i;
if (-1 < (node = info->our_node_of_cpu(cpu_ptr->id))) {
nod_ptr = info->nodes.hist.tics + node;
nod_ptr->new.user += cpu_ptr->new.user; nod_ptr->old.user += cpu_ptr->old.user;
nod_ptr->new.nice += cpu_ptr->new.nice; nod_ptr->old.nice += cpu_ptr->old.nice;
nod_ptr->new.system += cpu_ptr->new.system; nod_ptr->old.system += cpu_ptr->old.system;
nod_ptr->new.idle += cpu_ptr->new.idle; nod_ptr->old.idle += cpu_ptr->old.idle;
nod_ptr->new.iowait += cpu_ptr->new.iowait; nod_ptr->old.iowait += cpu_ptr->old.iowait;
nod_ptr->new.irq += cpu_ptr->new.irq; nod_ptr->old.irq += cpu_ptr->old.irq;
nod_ptr->new.sirq += cpu_ptr->new.sirq; nod_ptr->old.sirq += cpu_ptr->old.sirq;
nod_ptr->new.stolen += cpu_ptr->new.stolen; nod_ptr->old.stolen += cpu_ptr->old.stolen;
nod_ptr->new.guest += cpu_ptr->new.guest; nod_ptr->old.guest += cpu_ptr->old.guest;
nod_ptr->new.gnice += cpu_ptr->new.gnice; nod_ptr->old.gnice += cpu_ptr->old.gnice;
nod_ptr->new.xtot += cpu_ptr->new.xtot; nod_ptr->old.xtot += cpu_ptr->old.xtot;
nod_ptr->new.xbsy += cpu_ptr->new.xbsy; nod_ptr->old.xbsy += cpu_ptr->old.xbsy;
nod_ptr->new.xidl += cpu_ptr->new.xidl; nod_ptr->old.xidl += cpu_ptr->old.xidl;
nod_ptr->new.xusr += cpu_ptr->new.xusr; nod_ptr->old.xusr += cpu_ptr->old.xusr;
nod_ptr->new.xsys += cpu_ptr->new.xsys; nod_ptr->old.xsys += cpu_ptr->old.xsys;
cpu_ptr->numa_node = node;
nod_ptr->id = node;
nod_ptr->count++; ;
}
}
info->nodes.hist.n_inuse = info->nodes.total;
return info->nodes.hist.n_inuse;
#else
return 0;
#endif
} // end: stat_make_numa_hist
static int stat_read_failed (
struct stat_info *info)
{
struct hist_tic *sum_ptr, *cpu_ptr;
char *bp, *b;
int i, rc, num, tot_read;
unsigned long long llnum;
if (info == NULL)
return -EINVAL;
if (!info->cpus.hist.n_alloc) {
info->cpus.hist.tics = calloc(NEWOLD_INCR, sizeof(struct hist_tic));
if (!(info->cpus.hist.tics))
return -ENOMEM;
info->cpus.hist.n_alloc = NEWOLD_INCR;
info->cpus.hist.n_inuse = 0;
}
if (!info->stat_fp
&& (!(info->stat_fp = fopen(STAT_FILE, "r"))))
return -errno;
fflush(info->stat_fp);
rewind(info->stat_fp);
#define maxSIZ info->stat_buf_size
#define curSIZ ( maxSIZ - tot_read )
#define curPOS ( info->stat_buf + tot_read )
/* we slurp in the entire directory thus avoiding repeated calls to fread, |
especially in a massively parallel environment. additionally, each cpu |
line is then frozen in time rather than changing until we get around to |
accessing it. this helps to minimize (not eliminate) some distortions. | */
tot_read = errno = 0;
while ((0 < (num = fread(curPOS, 1, curSIZ, info->stat_fp)))) {
tot_read += num;
if (tot_read < maxSIZ)
break;
maxSIZ += BUFFER_INCR;
if (!(info->stat_buf = realloc(info->stat_buf, maxSIZ)))
return -ENOMEM;
};
#undef maxSIZ
#undef curSIZ
#undef curPOS
if (!feof(info->stat_fp))
return -errno;
info->stat_buf[tot_read] = '\0';
bp = info->stat_buf;
sum_ptr = &info->cpu_hist;
// remember summary from last time around
memcpy(&sum_ptr->old, &sum_ptr->new, sizeof(struct stat_jifs));
sum_ptr->id = STAT_SUMMARY_ID; // mark as summary
sum_ptr->numa_node = STAT_NODE_INVALID; // mark as invalid
// now value the cpu summary tics from line #1
if (8 > sscanf(bp, "cpu %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu"
, &sum_ptr->new.user, &sum_ptr->new.nice, &sum_ptr->new.system
, &sum_ptr->new.idle, &sum_ptr->new.iowait, &sum_ptr->new.irq
, &sum_ptr->new.sirq, &sum_ptr->new.stolen
, &sum_ptr->new.guest, &sum_ptr->new.gnice))
return -1;
stat_derive_unique(sum_ptr);
i = 0;
reap_em_again:
cpu_ptr = info->cpus.hist.tics + i; // adapt to relocated if reap_em_again
do {
bp = 1 + strchr(bp, '\n');
// remember this cpu from last time around
memcpy(&cpu_ptr->old, &cpu_ptr->new, sizeof(struct stat_jifs));
// next can be overridden under 'stat_make_numa_hist'
cpu_ptr->numa_node = STAT_NODE_INVALID;
cpu_ptr->count = 1;
if (8 > (rc = sscanf(bp, "cpu%d %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu"
, &cpu_ptr->id
, &cpu_ptr->new.user, &cpu_ptr->new.nice, &cpu_ptr->new.system
, &cpu_ptr->new.idle, &cpu_ptr->new.iowait, &cpu_ptr->new.irq
, &cpu_ptr->new.sirq, &cpu_ptr->new.stolen
, &cpu_ptr->new.guest, &cpu_ptr->new.gnice))) {
cpu_ptr->id_sav = -1;
break; // we must tolerate cpus taken offline
}
stat_derive_unique(cpu_ptr);
// don't distort deltas when a cpu is taken offline or brought online
if (cpu_ptr->id != cpu_ptr->id_sav)
memcpy(&cpu_ptr->old, &cpu_ptr->new, sizeof(struct stat_jifs));
cpu_ptr->id_sav = cpu_ptr->id;
++cpu_ptr;
++i;
} while (i < info->cpus.hist.n_alloc);
if (i == info->cpus.hist.n_alloc && rc >= 8) {
info->cpus.hist.n_alloc += NEWOLD_INCR;
info->cpus.hist.tics = realloc(info->cpus.hist.tics, info->cpus.hist.n_alloc * sizeof(struct hist_tic));
if (!(info->cpus.hist.tics))
return -ENOMEM;
goto reap_em_again;
}
info->cpus.total = info->cpus.hist.n_inuse = sum_ptr->count = i;
// remember sys_hist stuff from last time around
memcpy(&info->sys_hist.old, &info->sys_hist.new, sizeof(struct stat_data));
llnum = 0;
if ((b = strstr(bp, "intr ")))
sscanf(b, "intr %llu", &llnum);
info->sys_hist.new.intr = llnum;
llnum = 0;
if ((b = strstr(bp, "ctxt ")))
sscanf(b, "ctxt %llu", &llnum);
info->sys_hist.new.ctxt = llnum;
llnum = 0;
if ((b = strstr(bp, "btime ")))
sscanf(b, "btime %llu", &llnum);
info->sys_hist.new.btime = llnum;
llnum = 0;
if ((b = strstr(bp, "processes ")))
sscanf(b, "processes %llu", &llnum);
info->sys_hist.new.procs_created = llnum;
llnum = 0;
if ((b = strstr(bp, "procs_blocked ")))
sscanf(b, "procs_blocked %llu", &llnum);
info->sys_hist.new.procs_blocked = llnum;
llnum = 0;
if ((b = strstr(bp, "procs_running ")))
sscanf(b, "procs_running %llu", &llnum);
info->sys_hist.new.procs_running = llnum;
return 0;
} // end: stat_read_failed
/*
* stat_stacks_alloc():
*
* Allocate and initialize one or more stacks each of which is anchored in an
* associated context structure.
*
* All such stacks will have their result structures properly primed with
* 'items', while the result itself will be zeroed.
*
* Returns a stack_extent struct anchoring the 'heads' of each new stack.
*/
static struct stacks_extent *stat_stacks_alloc (
struct ext_support *this,
int maxstacks)
{
struct stacks_extent *p_blob;
struct stat_stack **p_vect;
struct stat_stack *p_head;
size_t vect_size, head_size, list_size, blob_size;
void *v_head, *v_list;
int i;
if (this == NULL || this->items == NULL)
return NULL;
if (maxstacks < 1)
return NULL;
vect_size = sizeof(void *) * maxstacks; // size of the addr vectors |
vect_size += sizeof(void *); // plus NULL addr delimiter |
head_size = sizeof(struct stat_stack); // size of that head struct |
list_size = sizeof(struct stat_result) * this->items->num; // any single results stack |
blob_size = sizeof(struct stacks_extent); // the extent anchor itself |
blob_size += vect_size; // plus room for addr vects |
blob_size += head_size * maxstacks; // plus room for head thing |
blob_size += list_size * maxstacks; // plus room for our stacks |
/* note: all of our memory is allocated in one single blob, facilitating a later free(). |
as a minimum, it is important that those result structures themselves always be |
contiguous within each stack since they are accessed through relative position. | */
if (NULL == (p_blob = calloc(1, blob_size)))
return NULL;
p_blob->next = this->extents; // push this extent onto... |
this->extents = p_blob; // ...some existing extents |
p_vect = (void *)p_blob + sizeof(struct stacks_extent); // prime our vector pointer |
p_blob->stacks = p_vect; // set actual vectors start |
v_head = (void *)p_vect + vect_size; // prime head pointer start |
v_list = v_head + (head_size * maxstacks); // prime our stacks pointer |
for (i = 0; i < maxstacks; i++) {
p_head = (struct stat_stack *)v_head;
p_head->head = stat_itemize_stack((struct stat_result *)v_list, this->items->num, this->items->enums);
p_blob->stacks[i] = p_head;
v_list += list_size;
v_head += head_size;
}
p_blob->ext_numstacks = maxstacks;
return p_blob;
} // end: stat_stacks_alloc
static int stat_stacks_fetch (
struct stat_info *info,
struct reap_support *this)
{
#define n_alloc this->n_alloc
#define n_inuse this->hist.n_inuse
#define n_saved this->n_alloc_save
struct stacks_extent *ext;
int i;
if (this == NULL)
return -EINVAL;
// initialize stuff -----------------------------------
if (!this->anchor) {
if (!(this->anchor = calloc(sizeof(void *), STACKS_INCR)))
return -ENOMEM;
n_alloc = STACKS_INCR;
}
if (!this->fetch.extents) {
if (!(ext = stat_stacks_alloc(&this->fetch, n_alloc)))
return -ENOMEM;
memcpy(this->anchor, ext->stacks, sizeof(void *) * n_alloc);
}
if (this->fetch.dirty_stacks)
stat_cleanup_stacks_all(&this->fetch);
// iterate stuff --------------------------------------
for (i = 0; i < n_inuse; i++) {
if (!(i < n_alloc)) {
n_alloc += STACKS_INCR;
if ((!(this->anchor = realloc(this->anchor, sizeof(void *) * n_alloc)))
|| (!(ext = stat_stacks_alloc(&this->fetch, STACKS_INCR)))) {
return -ENOMEM;
}
memcpy(this->anchor + i, ext->stacks, sizeof(void *) * STACKS_INCR);
}
stat_assign_results(this->anchor[i], &info->sys_hist, &this->hist.tics[i]);
}
// finalize stuff -------------------------------------
/* note: we go to this trouble of maintaining a duplicate of the consolidated |
extent stacks addresses represented as our 'anchor' since these ptrs |
are exposed to a user (um, not that we don't trust 'em or anything). |
plus, we can NULL delimit these ptrs which we couldn't do otherwise. | */
if (n_saved < i + 1) {
n_saved = i + 1;
if (!(this->result.stacks = realloc(this->result.stacks, sizeof(void *) * n_saved)))
return -ENOMEM;
}
memcpy(this->result.stacks, this->anchor, sizeof(void *) * i);
this->result.stacks[i] = NULL;
this->result.total = i;
this->fetch.dirty_stacks = 1;
// callers beware, this might be zero (maybe no libnuma.so) ...
return this->result.total;
#undef n_alloc
#undef n_inuse
#undef n_saved
} // end: stat_stacks_fetch
static int stat_stacks_reconfig_maybe (
struct ext_support *this,
enum stat_item *items,
int numitems)
{
if (stat_items_check_failed(numitems, items))
return -EINVAL;
/* is this the first time or have things changed since we were last called?
if so, gotta' redo all of our stacks stuff ... */
if (this->items->num != numitems + 1
|| memcmp(this->items->enums, items, sizeof(enum stat_item) * numitems)) {
// allow for our STAT_logical_end
if (!(this->items->enums = realloc(this->items->enums, sizeof(enum stat_item) * (numitems + 1))))
return -ENOMEM;
memcpy(this->items->enums, items, sizeof(enum stat_item) * numitems);
this->items->enums[numitems] = STAT_logical_end;
this->items->num = numitems + 1;
stat_extents_free_all(this);
return 1;
}
return 0;
} // end: stat_stacks_reconfig_maybe
static struct stat_stack *stat_update_single_stack (
struct stat_info *info,
struct ext_support *this)
{
if (!this->extents
&& !(stat_stacks_alloc(this, 1)))
return NULL;
if (this->dirty_stacks)
stat_cleanup_stacks_all(this);
stat_assign_results(this->extents->stacks[0], &info->sys_hist, &info->cpu_hist);
this->dirty_stacks = 1;
return this->extents->stacks[0];
} // end: stat_update_single_stack
#if defined(PRETEND_NUMA) && defined(NUMA_DISABLE)
# warning 'PRETEND_NUMA' ignored, 'NUMA_DISABLE' is active
#endif
// ___ Public Functions |||||||||||||||||||||||||||||||||||||||||||||||||||||||
// --- standard required functions --------------------------------------------
/*
* procps_stat_new:
*
* Create a new container to hold the stat information
*
* The initial refcount is 1, and needs to be decremented
* to release the resources of the structure.
*
* Returns: < 0 on failure, 0 on success along with
* a pointer to a new context struct
*/
PROCPS_EXPORT int procps_stat_new (
struct stat_info **info)
{
struct stat_info *p;
int rc;
if (info == NULL || *info != NULL)
return -EINVAL;
if (!(p = calloc(1, sizeof(struct stat_info))))
return -ENOMEM;
if (!(p->stat_buf = calloc(1, BUFFER_INCR))) {
free(p);
return -ENOMEM;
}
p->stat_buf_size = BUFFER_INCR;
p->refcount = 1;
p->results.cpus = &p->cpus.result;
p->results.nodes = &p->nodes.result;
p->cpus.total = procps_cpu_count();
// these 3 are for reap, sharing a single set of items
p->cpu_summary.items = p->cpus.fetch.items = p->nodes.fetch.items = &p->reap_items;
// the select guy has its own set of items
p->select.items = &p->select_items;
#ifndef NUMA_DISABLE
#ifndef PRETEND_NUMA
// we'll try for the most recent version, then a version we know works...
if ((p->libnuma_handle = dlopen("libnuma.so", RTLD_LAZY))
|| (p->libnuma_handle = dlopen("libnuma.so.1", RTLD_LAZY))) {
p->our_max_node = dlsym(p->libnuma_handle, "numa_max_node");
p->our_node_of_cpu = dlsym(p->libnuma_handle, "numa_node_of_cpu");
if (p->our_max_node == NULL
|| (p->our_node_of_cpu == NULL)) {
// this dlclose is safe - we've yet to call numa_node_of_cpu
// ( there's one other dlclose which has now been disabled )
dlclose(p->libnuma_handle);
p->libnuma_handle = NULL;
}
}
#else
p->libnuma_handle = (void *)-1;
p->our_max_node = fake_max_node;
p->our_node_of_cpu = fake_node_of_cpu;
#endif
#endif
/* do a priming read here for the following potential benefits: |
1) ensure there will be no problems with subsequent access |
2) make delta results potentially useful, even if 1st time |
3) elimnate need for history distortions 1st time 'switch' | */
if ((rc = stat_read_failed(p))) {
procps_stat_unref(&p);
return rc;
}
*info = p;
return 0;
} // end :procps_stat_new
PROCPS_EXPORT int procps_stat_ref (
struct stat_info *info)
{
if (info == NULL)
return -EINVAL;
info->refcount++;
return info->refcount;
} // end: procps_stat_ref
PROCPS_EXPORT int procps_stat_unref (
struct stat_info **info)
{
if (info == NULL || *info == NULL)
return -EINVAL;
(*info)->refcount--;
if ((*info)->refcount < 1) {
if ((*info)->stat_fp)
fclose((*info)->stat_fp);
if ((*info)->stat_buf)
free((*info)->stat_buf);
if ((*info)->cpus.anchor)
free((*info)->cpus.anchor);
if ((*info)->cpus.result.stacks)
free((*info)->cpus.result.stacks);
if ((*info)->cpus.hist.tics)
free((*info)->cpus.hist.tics);
if ((*info)->cpus.fetch.extents)
stat_extents_free_all(&(*info)->cpus.fetch);
if ((*info)->nodes.anchor)
free((*info)->nodes.anchor);
if ((*info)->nodes.result.stacks)
free((*info)->nodes.result.stacks);
if ((*info)->nodes.hist.tics)
free((*info)->nodes.hist.tics);
if ((*info)->nodes.fetch.extents)
stat_extents_free_all(&(*info)->nodes.fetch);
if ((*info)->cpu_summary.extents)
stat_extents_free_all(&(*info)->cpu_summary);
if ((*info)->select.extents)
stat_extents_free_all(&(*info)->select);
if ((*info)->reap_items.enums)
free((*info)->reap_items.enums);
if ((*info)->select_items.enums)
free((*info)->select_items.enums);
#ifndef NUMA_DISABLE
#ifndef PRETEND_NUMA
/* note: we'll skip a dlcose() to avoid the following libnuma memory
* leak which is triggered after a call to numa_node_of_cpu():
* ==1234== LEAK SUMMARY:
* ==1234== definitely lost: 512 bytes in 1 blocks
* ==1234== indirectly lost: 48 bytes in 2 blocks
* ==1234== ...
* [ thanks very much libnuma, for all the pain you've caused ]
*/
// if ((*info)->libnuma_handle)
// dlclose((*info)->libnuma_handle);
#endif
#endif
free(*info);
*info = NULL;
return 0;
}
return (*info)->refcount;
} // end: procps_stat_unref
// --- variable interface functions -------------------------------------------
PROCPS_EXPORT struct stat_result *procps_stat_get (
struct stat_info *info,
enum stat_item item)
{
static time_t sav_secs;
time_t cur_secs;
if (info == NULL)
return NULL;
if (item < 0 || item >= STAT_logical_end)
return NULL;
/* we will NOT read the source file with every call - rather, we'll offer
a granularity of 1 second between reads ... */
cur_secs = time(NULL);
if (1 <= cur_secs - sav_secs) {
if (stat_read_failed(info))
return NULL;
sav_secs = cur_secs;
}
info->get_this.item = item;
// with 'get', we must NOT honor the usual 'noop' guarantee
// if (item > STAT_noop)
info->get_this.result.ull_int = 0;
Item_table[item].setsfunc(&info->get_this, &info->sys_hist, &info->cpu_hist);
return &info->get_this;
} // end: procps_stat_get
/* procps_stat_reap():
*
* Harvest all the requested NUMA NODE and/or CPU information providing the
* result stacks along with totals and the cpu summary.
*
* Returns: pointer to a stat_reaped struct on success, NULL on error.
*/
PROCPS_EXPORT struct stat_reaped *procps_stat_reap (
struct stat_info *info,
enum stat_reap_type what,
enum stat_item *items,
int numitems)
{
int rc;
if (info == NULL || items == NULL)
return NULL;
if (what != STAT_REAP_CPUS_ONLY && what != STAT_REAP_CPUS_AND_NODES)
return NULL;
#ifdef ENFORCE_LOGICAL
{ int i;
// those STAT_SYS_type enum's make sense only to 'select' ...
for (i = 0; i < numitems; i++) {
if (items[i] > STAT_TIC_highest)
return NULL;
}
}
#endif
if (0 > (rc = stat_stacks_reconfig_maybe(&info->cpu_summary, items, numitems)))
return NULL;
if (rc) {
stat_extents_free_all(&info->cpus.fetch);
stat_extents_free_all(&info->nodes.fetch);
}
if (stat_read_failed(info))
return NULL;
info->results.summary = stat_update_single_stack(info, &info->cpu_summary);
/* unlike the other 'reap' functions, <stat> provides for two separate |
stacks pointer arrays exposed to callers. Thus, to keep our promise |
of NULL delimit we must ensure a minimal array for the optional one | */
if (!info->nodes.result.stacks
&& (!(info->nodes.result.stacks = malloc(sizeof(void *)))))
return NULL;
info->nodes.result.total = 0;
info->nodes.result.stacks[0] = NULL;
switch (what) {
case STAT_REAP_CPUS_ONLY:
if (!stat_stacks_fetch(info, &info->cpus))
return NULL;
break;
case STAT_REAP_CPUS_AND_NODES:
#ifndef NUMA_DISABLE
/* note: if we're doing numa at all, we must do this numa history |
before we build (fetch) the cpu stacks since the read_stat guy |
will have marked (temporarily) all the cpu node ids as invalid | */
if (0 > stat_make_numa_hist(info))
return NULL;
// tolerate an unexpected absence of libnuma.so ...
stat_stacks_fetch(info, &info->nodes);
#endif
if (!stat_stacks_fetch(info, &info->cpus))
return NULL;
break;
default:
return NULL;
};
return &info->results;
} // end: procps_stat_reap
/* procps_stat_select():
*
* Harvest all the requested TIC and/or SYS information then return
* it in a results stack.
*
* Returns: pointer to a stat_stack struct on success, NULL on error.
*/
PROCPS_EXPORT struct stat_stack *procps_stat_select (
struct stat_info *info,
enum stat_item *items,
int numitems)
{
if (info == NULL || items == NULL)
return NULL;
if (0 > stat_stacks_reconfig_maybe(&info->select, items, numitems))
return NULL;
if (stat_read_failed(info))
return NULL;
return stat_update_single_stack(info, &info->select);
} // end: procps_stat_select
/*
* procps_stat_sort():
*
* Sort stacks anchored in the passed stack pointers array
* based on the designated sort enumerator and specified order.
*
* Returns those same addresses sorted.
*
* Note: all of the stacks must be homogeneous (of equal length and content).
*/
PROCPS_EXPORT struct stat_stack **procps_stat_sort (
struct stat_info *info,
struct stat_stack *stacks[],
int numstacked,
enum stat_item sortitem,
enum stat_sort_order order)
{
struct stat_result *p;
struct sort_parms parms;
int offset;
if (info == NULL || stacks == NULL)
return NULL;
// a stat_item is currently unsigned, but we'll protect our future
if (sortitem < 0 || sortitem >= STAT_logical_end)
return NULL;
if (order != STAT_SORT_ASCEND && order != STAT_SORT_DESCEND)
return NULL;
if (numstacked < 2)
return stacks;
offset = 0;
p = stacks[0]->head;
for (;;) {
if (p->item == sortitem)
break;
++offset;
if (p->item >= STAT_logical_end)
return NULL;
++p;
}
parms.offset = offset;
parms.order = order;
qsort_r(stacks, numstacked, sizeof(void *), (QSR_t)Item_table[p->item].sortfunc, &parms);
return stacks;
} // end: procps_stat_sort
// --- special debugging function(s) ------------------------------------------
/*
* The following isn't part of the normal programming interface. Rather,
* it exists to validate result types referenced in application programs.
*
* It's used only when:
* 1) the 'XTRA_PROCPS_DEBUG' has been defined, or
* 2) the '#include <proc/xtra-procps-debug.h>' used
*/
PROCPS_EXPORT struct stat_result *xtra_stat_get (
struct stat_info *info,
enum stat_item actual_enum,
const char *typestr,
const char *file,
int lineno)
{
struct stat_result *r = procps_stat_get(info, actual_enum);
if (actual_enum < 0 || actual_enum >= STAT_logical_end) {
fprintf(stderr, "%s line %d: invalid item = %d, type = %s\n"
, file, lineno, actual_enum, typestr);
}
if (r) {
char *str = Item_table[r->item].type2str;
if (str[0]
&& (strcmp(typestr, str)))
fprintf(stderr, "%s line %d: was %s, expected %s\n", file, lineno, typestr, str);
}
return r;
} // end: xtra_stat_get_
PROCPS_EXPORT struct stat_result *xtra_stat_val (
int relative_enum,
const char *typestr,
const struct stat_stack *stack,
struct stat_info *info,
const char *file,
int lineno)
{
char *str;
int i;
for (i = 0; stack->head[i].item < STAT_logical_end; i++)
;
if (relative_enum < 0 || relative_enum >= i) {
fprintf(stderr, "%s line %d: invalid relative_enum = %d, type = %s\n"
, file, lineno, relative_enum, typestr);
return NULL;
}
str = Item_table[stack->head[relative_enum].item].type2str;
if (str[0]
&& (strcmp(typestr, str))) {
fprintf(stderr, "%s line %d: was %s, expected %s\n", file, lineno, typestr, str);
}
return &stack->head[relative_enum];
} // end: xtra_stat_val