top: program code changes, enable NUMA/Node extensions

This commit extends the top Summary Area cpu states to
include information on Non-Uniform Memory Architecture
nodes. It is based upon changes originally proposed by
Lance Shelton who was instrumental in the final patch.

With this change, the user will have new commands that
will provide alternatives to the individual cpu stats:

. '2' toggles between cpu & numa node summary displays
. '3' provides node summary and related cpu statistics

These extensions required some minimal system support.
Typically, the numactl package (and maybe libnuma-dev)
are all that's needed to show a single node which owns
all the processors. Failing that, or for slightly more
variety, top also offers a #define named PRETEND_NUMA.

(everything is perfectly justified plus right margins)
(are completely filled, but of course it must be luck)

Signed-off-by: Jim Warner <james.warner@comcast.net>
Signed-off-by: Lance Shelton <LShelton@fusionio.com>
This commit is contained in:
Lance Shelton 2013-04-08 00:00:00 -07:00 committed by Craig Small
parent c2afde130e
commit 8d989c68c0
4 changed files with 168 additions and 32 deletions

182
top/top.c
View File

@ -57,6 +57,15 @@
#include "top.h"
#include "top_nls.h"
#ifdef NUMA_ENABLED
#ifdef PRETEND_NUMA
static int numa_max_node(void) { return 2; }
static int numa_node_of_cpu(int num) { return (num % 3); }
#else
#include <numa.h>
#endif
#endif
/*###### Miscellaneous global stuff ####################################*/
/* The original and new terminal definitions
@ -209,6 +218,10 @@ static int Autox_array [P_MAXPFLGS],
#else // nls_maybe
static char Scaled_sfxtab[] = { 'k', 'm', 'g', 't', 'p', 'e', 0 };
#endif
/* Support for NUMA Node display & expansion (targeting) */
static int Numa_node_tot;
static int Numa_node_sel = -1;
/*###### Sort callbacks ################################################*/
@ -2272,16 +2285,25 @@ static void zap_fieldstab (void) {
* we preserve all cpu data in our CPU_t array which is organized
* as follows:
* cpus[0] thru cpus[n] == tics for each separate cpu
* cpus[Cpu_faux_tot] == tics from the 1st /proc/stat line */
* cpus[sumSLOT] == tics from the 1st /proc/stat line
* [ and beyond sumSLOT == tics for each cpu NUMA node ] */
static CPU_t *cpus_refresh (CPU_t *cpus) {
static FILE *fp = NULL;
static int sav_cpus = -1;
static int sav_slot = -1;
char buf[MEDBUFSIZ]; // enough for /proc/stat CPU line (not the intr line)
#ifdef NUMA_ENABLED
#define sumSLOT ( smp_num_cpus )
#define totSLOT ( 1 + smp_num_cpus + Numa_node_tot)
int i, node;
#else
#define sumSLOT ( Cpu_faux_tot )
#define totSLOT ( 1 + Cpu_faux_tot )
int i;
#endif
/*** hotplug_acclimated ***/
if (sav_cpus != Cpu_faux_tot) {
sav_cpus = Cpu_faux_tot;
if (sav_slot != sumSLOT) {
sav_slot = sumSLOT;
zap_fieldstab();
if (fp) { fclose(fp); fp = NULL; }
if (cpus) { free(cpus); cpus = NULL; }
@ -2292,36 +2314,55 @@ static CPU_t *cpus_refresh (CPU_t *cpus) {
if (!fp) {
if (!(fp = fopen("/proc/stat", "r")))
error_exit(fmtmk(N_fmt(FAIL_statopn_fmt), strerror(errno)));
/* note: we allocate one more CPU_t than Cpu_faux_tot so the last
slot can hold tics representing the /proc/stat cpu summary
(the 1st line) -- that slot supports our View_CPUSUM toggle */
cpus = alloc_c((1 + Cpu_faux_tot) * sizeof(CPU_t));
/* note: we allocate one more CPU_t via totSLOT than 'cpus' so that a
slot can hold tics representing the /proc/stat cpu summary */
cpus = alloc_c(totSLOT * sizeof(CPU_t));
}
rewind(fp);
fflush(fp);
// remember from last time around
memcpy(&cpus[Cpu_faux_tot].sav, &cpus[Cpu_faux_tot].cur, sizeof(CT_t));
memcpy(&cpus[sumSLOT].sav, &cpus[sumSLOT].cur, sizeof(CT_t));
// then value the last slot with the cpu summary line
if (!fgets(buf, sizeof(buf), fp)) error_exit(N_txt(FAIL_statget_txt));
memset(&cpus[Cpu_faux_tot].cur, 0, sizeof(CT_t));
memset(&cpus[sumSLOT].cur, 0, sizeof(CT_t));
if (4 > sscanf(buf, "cpu %Lu %Lu %Lu %Lu %Lu %Lu %Lu %Lu"
, &cpus[Cpu_faux_tot].cur.u, &cpus[Cpu_faux_tot].cur.n, &cpus[Cpu_faux_tot].cur.s
, &cpus[Cpu_faux_tot].cur.i, &cpus[Cpu_faux_tot].cur.w, &cpus[Cpu_faux_tot].cur.x
, &cpus[Cpu_faux_tot].cur.y, &cpus[Cpu_faux_tot].cur.z))
, &cpus[sumSLOT].cur.u, &cpus[sumSLOT].cur.n, &cpus[sumSLOT].cur.s
, &cpus[sumSLOT].cur.i, &cpus[sumSLOT].cur.w, &cpus[sumSLOT].cur.x
, &cpus[sumSLOT].cur.y, &cpus[sumSLOT].cur.z))
error_exit(N_txt(FAIL_statget_txt));
#ifndef CPU_ZEROTICS
cpus[Cpu_faux_tot].cur.tot = cpus[Cpu_faux_tot].cur.u + cpus[Cpu_faux_tot].cur.s
+ cpus[Cpu_faux_tot].cur.n + cpus[Cpu_faux_tot].cur.i + cpus[Cpu_faux_tot].cur.w
+ cpus[Cpu_faux_tot].cur.x + cpus[Cpu_faux_tot].cur.y + cpus[Cpu_faux_tot].cur.z;
cpus[sumSLOT].cur.tot = cpus[sumSLOT].cur.u + cpus[sumSLOT].cur.s
+ cpus[sumSLOT].cur.n + cpus[sumSLOT].cur.i + cpus[sumSLOT].cur.w
+ cpus[sumSLOT].cur.x + cpus[sumSLOT].cur.y + cpus[sumSLOT].cur.z;
/* if a cpu has registered substantially fewer tics than those expected,
we'll force it to be treated as 'idle' so as not to present misleading
percentages. */
cpus[Cpu_faux_tot].edge =
((cpus[Cpu_faux_tot].cur.tot - cpus[Cpu_faux_tot].sav.tot) / smp_num_cpus) / (100 / TICS_EDGE);
cpus[sumSLOT].edge =
((cpus[sumSLOT].cur.tot - cpus[sumSLOT].sav.tot) / smp_num_cpus) / (100 / TICS_EDGE);
#endif
#ifdef NUMA_ENABLED
for (i = 0; i < Numa_node_tot; i++) {
node = sumSLOT + 1 + i;
// remember from last time around
memcpy(&cpus[node].sav, &cpus[node].cur, sizeof(CT_t));
// initialize current node statistics
memset(&cpus[node].cur, 0, sizeof(CT_t));
#ifndef CPU_ZEROTICS
cpus[node].edge = cpus[sumSLOT].edge;
// this is for symmetry only, it's not currently required
cpus[node].cur.tot = cpus[sumSLOT].cur.tot;
#endif
}
#endif
// now value each separate cpu's tics...
#ifdef NUMA_ENABLED
for (i = 0; i < sumSLOT; i++) {
#else
for (i = 0; i < sumSLOT && i < Screen_rows; i++) {
#endif
// now value each separate cpu's tics, maybe
for (i = 0; i < Cpu_faux_tot && i < Screen_rows; i++) {
#ifdef PRETEND4CPUS
rewind(fp);
fgets(buf, sizeof(buf), fp);
@ -2334,21 +2375,38 @@ static CPU_t *cpus_refresh (CPU_t *cpus) {
, &cpus[i].cur.u, &cpus[i].cur.n, &cpus[i].cur.s
, &cpus[i].cur.i, &cpus[i].cur.w, &cpus[i].cur.x
, &cpus[i].cur.y, &cpus[i].cur.z)) {
memmove(&cpus[i], &cpus[Cpu_faux_tot], sizeof(CPU_t));
memmove(&cpus[i], &cpus[sumSLOT], sizeof(CPU_t));
break; // tolerate cpus taken offline
}
#ifndef CPU_ZEROTICS
cpus[i].edge = cpus[Cpu_faux_tot].edge;
cpus[i].edge = cpus[sumSLOT].edge;
// this is for symmetry only, it's not currently required
cpus[i].cur.tot = cpus[Cpu_faux_tot].cur.tot;
cpus[i].cur.tot = cpus[sumSLOT].cur.tot;
#endif
#ifdef PRETEND4CPUS
cpus[i].id = i;
#endif
#ifdef NUMA_ENABLED
if (-1 < (node = numa_node_of_cpu(cpus[i].id))) {
node += (sumSLOT + 1);
cpus[node].cur.u += cpus[i].cur.u;
cpus[node].cur.n += cpus[i].cur.n;
cpus[node].cur.s += cpus[i].cur.s;
cpus[node].cur.i += cpus[i].cur.i;
cpus[node].cur.w += cpus[i].cur.w;
cpus[node].cur.x += cpus[i].cur.x;
cpus[node].cur.y += cpus[i].cur.y;
cpus[node].cur.z += cpus[i].cur.z;
}
#endif
}
Cpu_faux_tot = i; // tolerate cpus taken offline
return cpus;
#undef sumSLOT
#undef totSLOT
} // end: cpus_refresh
@ -2579,6 +2637,9 @@ static void sysinfo_refresh (int forced) {
cpuinfo();
Cpu_faux_tot = smp_num_cpus;
cpu_secs = cur_secs;
#ifdef NUMA_ENABLED
Numa_node_tot = numa_max_node() + 1;
#endif
}
#endif
} // end: sysinfo_refresh
@ -3165,6 +3226,10 @@ static void before (char *me) {
memcpy(HHash_two, HHash_nul, sizeof(HHash_nul));
#endif
#ifdef NUMA_ENABLED
Numa_node_tot = numa_max_node() + 1;
#endif
#ifndef SIGRTMAX // not available on hurd, maybe others too
#define SIGRTMAX 32
#endif
@ -4304,7 +4369,34 @@ static void keys_summary (int ch) {
switch (ch) {
case '1':
TOGw(w, View_CPUSUM);
if (CHKw(w, View_CPUNOD)) OFFw(w, View_CPUSUM);
else TOGw(w, View_CPUSUM);
OFFw(w, View_CPUNOD);
Numa_node_sel = -1;
break;
case '2':
if (!Numa_node_tot)
show_msg(N_txt(NUMA_nodenot_txt));
else {
if (Numa_node_sel < 0) TOGw(w, View_CPUNOD);
if (!CHKw(w, View_CPUNOD)) SETw(w, View_CPUSUM);
Numa_node_sel = -1;
}
break;
case '3':
if (!Numa_node_tot)
show_msg(N_txt(NUMA_nodenot_txt));
else {
int num = get_int(fmtmk(N_fmt(NUMA_nodeget_fmt), Numa_node_tot -1));
if (GET_INTNONE < num) {
if (num >= 0 && num < Numa_node_tot) {
Numa_node_sel = num;
SETw(w, View_CPUNOD);
OFFw(w, View_CPUSUM);
} else
show_msg(N_txt(NUMA_nodebad_txt));
}
}
break;
case 'C':
VIZTOGw(w, View_SCROLL);
@ -4754,7 +4846,7 @@ static void do_key (int ch) {
, 'I', 'k', 'r', 's', 'X', 'Y', 'Z', '0'
, kbd_ENTER, kbd_SPACE, '\0' } },
{ keys_summary,
{ '1', 'C', 'l', 'm', 't', '\0' } },
{ '1', '2', '3', 'C', 'l', 'm', 't', '\0' } },
{ keys_task,
{ '#', '<', '>', 'b', 'c', 'i', 'J', 'j', 'n', 'O', 'o'
, 'R', 'S', 'U', 'u', 'V', 'x', 'y', 'z'
@ -4868,6 +4960,8 @@ static void summary_show (void) {
#define anyFLG 0xffffff
static CPU_t *smpcpu = NULL;
WIN_t *w = Curwin; // avoid gcc bloat with a local copy
char tmp[MEDBUFSIZ];
int i;
// Display Uptime and Loadavg
if (isROOM(View_LOADAV, 1)) {
@ -4877,7 +4971,7 @@ static void summary_show (void) {
show_special(0, fmtmk(CHKw(w, Show_TASKON)? LOADAV_line_alt : LOADAV_line
, w->grpname, sprint_uptime()));
Msg_row += 1;
}
} // end: View_LOADAV
// Display Task and Cpu(s) States
if (isROOM(View_STATES, 2)) {
@ -4889,13 +4983,41 @@ static void summary_show (void) {
smpcpu = cpus_refresh(smpcpu);
#ifdef NUMA_ENABLED
if (CHKw(w, View_CPUNOD)) {
if (Numa_node_sel < 0) {
// display the 1st /proc/stat line, then the nodes (if room)
summary_hlp(&smpcpu[smp_num_cpus], N_txt(WORD_allcpus_txt));
Msg_row += 1;
// display each cpu node's states
for (i = 0; i < Numa_node_tot; i++) {
if (!isROOM(anyFLG, 1)) break;
snprintf(tmp, sizeof(tmp), N_fmt(NUMA_nodenam_fmt), i);
summary_hlp(&smpcpu[1 + smp_num_cpus + i], tmp);
Msg_row += 1;
}
} else {
// display the node summary, then the associated cpus (if room)
snprintf(tmp, sizeof(tmp), N_fmt(NUMA_nodenam_fmt), Numa_node_sel);
summary_hlp(&smpcpu[1 + smp_num_cpus + Numa_node_sel], tmp);
Msg_row += 1;
for (i = 0; i < Cpu_faux_tot; i++) {
if (Numa_node_sel == numa_node_of_cpu(smpcpu[i].id)) {
if (!isROOM(anyFLG, 1)) break;
snprintf(tmp, sizeof(tmp), N_fmt(WORD_eachcpu_fmt), smpcpu[i].id);
summary_hlp(&smpcpu[i], tmp);
Msg_row += 1;
}
}
}
} else
#endif
if (CHKw(w, View_CPUSUM)) {
// display just the 1st /proc/stat line
summary_hlp(&smpcpu[Cpu_faux_tot], N_txt(WORD_allcpus_txt));
Msg_row += 1;
} else {
int i;
char tmp[MEDBUFSIZ];
// display each cpu's states separately, screen height permitting...
for (i = 0; i < Cpu_faux_tot; i++) {
snprintf(tmp, sizeof(tmp), N_fmt(WORD_eachcpu_fmt), smpcpu[i].id);
@ -4904,7 +5026,7 @@ static void summary_show (void) {
if (!isROOM(anyFLG, 1)) break;
}
}
}
} // end: View_STATES
// Display Memory and Swap stats
if (isROOM(View_MEMORY, 2)) {
@ -4953,7 +5075,7 @@ static void summary_show (void) {
#undef mkM
#undef mkS
#undef prT
}
} // end: View_MEMORY
#undef isROOM
#undef anyFLG

View File

@ -23,6 +23,7 @@
#include "../proc/readproc.h"
/* Defines represented in configure.ac ----------------------------- */
//#define NUMA_ENABLED /* enable summary area NUMA/Node extension */
//#define OOMEM_ENABLE /* enable the SuSE out-of-memory additions */
//#define SIGNALS_LESS /* favor reduced signal load over response */
@ -44,6 +45,7 @@
//#define PRETEND2_5_X /* pretend we're linux 2.5.x (for IO-wait) */
//#define PRETEND4CPUS /* pretend we're smp with 4 ticsers (sic) */
//#define PRETENDNOCAP /* use a terminal without essential caps */
//#define PRETEND_NUMA /* pretend we've got some linux NUMA Nodes */
//#define RCFILE_NOERR /* rcfile errs silently default, vs. fatal */
//#define RECALL_FIXED /* don't reorder saved strings if recalled */
//#define RMAN_IGNORED /* don't consider auto right margin glitch */
@ -76,6 +78,9 @@
to kernel and/or cpu anomalies (see CPU_ZEROTICS), thanks to:
Jaromir Capik, <jcapik@redhat.com> - February, 2012 */
/* For the impetus and NUMA/Node prototype design, thanks to:
Lance Shelton <LShelton@fusionio.com> - April, 2013 */
#ifdef PRETEND2_5_X
#define linux_version_code LINUX_VERSION(2,5,43)
#endif
@ -287,6 +292,7 @@ typedef struct CPU_t {
letter shown is the corresponding 'command' toggle */
// 'View_' flags affect the summary (minimum), taken from 'Curwin'
#define View_CPUSUM 0x008000 // '1' - show combined cpu stats (vs. each)
#define View_CPUNOD 0x400000 // '2' - show numa node cpu stats ('3' also)
#define View_LOADAV 0x004000 // 'l' - display load avg and uptime summary
#define View_STATES 0x002000 // 't' - display task/cpu(s) states summary
#define View_MEMORY 0x001000 // 'm' - display memory summary
@ -613,6 +619,9 @@ typedef struct WIN_t {
#if defined(RECALL_FIXED) && defined(TERMIOS_ONLY)
# error 'RECALL_FIXED' conflicts with 'TERMIOS_ONLY'
#endif
#if defined(PRETEND_NUMA) && !defined(NUMA_ENABLED)
# error 'PRETEND_NUMA' also requires 'NUMA_ENABLED'
#endif
#if (LRGBUFSIZ < SCREENMAX)
# error 'LRGBUFSIZ' must NOT be less than 'SCREENMAX'
#endif

View File

@ -442,6 +442,10 @@ static void build_norm_nlstab (void) {
Norm_nlstab[WORD_exclude_txt] = _("exclude");
Norm_nlstab[OSEL_statlin_fmt] = _("<Enter> to resume, filters: %s");
Norm_nlstab[WORD_noneone_txt] = _("none");
Norm_nlstab[NUMA_nodenam_fmt] = _("Node%-2d:");
Norm_nlstab[NUMA_nodeget_fmt] = _("expand which node (0-%d)");
Norm_nlstab[NUMA_nodebad_txt] = _("invalid node");
Norm_nlstab[NUMA_nodenot_txt] = _("sorry, NUMA extensions unavailable");
}
@ -481,7 +485,7 @@ static void build_uniq_nlstab (void) {
"\n"
" Z~5,~1B~5,E,e Global: '~1Z~2' colors; '~1B~2' bold; '~1E~2'/'~1e~2' summary/task memory scale\n"
" l,t,m Toggle Summary: '~1l~2' load avg; '~1t~2' task/cpu stats; '~1m~2' memory info\n"
" 0,1,I Toggle: '~10~2' Zero suppress; '~11~2' separate cpu(s); '~1I~2' Irix mode\n"
" 0,1,2,3,I Toggle: '~10~2' zeros; '~11~2/~12~2/~13~2' cpus or numa node views; '~1I~2' Irix mode\n"
" f,F,X Fields: '~1f~2'/'~1F~2' add/remove/order/sort; '~1X~2' increase fixed-width\n"
"\n"
" L,&,<,> . Locate: '~1L~2'/'~1&~2' find/again; Move sort column: '~1<~2'/'~1>~2' left/right\n"

View File

@ -75,7 +75,8 @@ enum norm_nls {
GET_max_task_fmt, GET_nice_num_fmt, GET_pid2kill_fmt, GET_pid2nice_fmt,
GET_sigs_num_fmt, GET_user_ids_txt, HELP_cmdline_fmt, HILIGHT_cant_txt,
IRIX_curmode_fmt, LIMIT_exceed_fmt, MISSING_args_fmt, NAME_windows_fmt,
NOT_onsecure_txt, NOT_smp_cpus_txt, OFF_one_word_txt, ON_word_only_txt,
NOT_onsecure_txt, NOT_smp_cpus_txt, NUMA_nodebad_txt, NUMA_nodeget_fmt,
NUMA_nodenam_fmt, NUMA_nodenot_txt, OFF_one_word_txt, ON_word_only_txt,
OSEL_casenot_txt, OSEL_caseyes_txt, OSEL_errdelm_fmt, OSEL_errdups_txt,
OSEL_errvalu_fmt, OSEL_prompts_fmt, OSEL_statlin_fmt, RC_bad_entry_fmt,
RC_bad_files_fmt, SCROLL_coord_fmt, SELECT_clash_txt, THREADS_show_fmt,