top: tolerate loss of smp cpus, improve CPU_t management

Prior to this patch, top was able to handle any hotplugged
cpus *added* to the system in two distinct ways.

 1) Newly added cpus would be detected by sysinfo_refresh
    calling the library's cpuinfo function, which occurs
    at most every 5 minutes.

 2) The user could force a refresh using either the
    <Enter> or <Space> keys.

Unfortunately, the *loss* of a cpu would produce an early
exit due to a /proc/stat read failure.  Such a failure
can be produced in the following way:
  sudo echo 0 > /sys/devices/system/cpu/cpu??/online

This commit allows top to tolerate the loss of cpus.
It also provides for more efficient CPU_t management,
especially for massively parallel cpu environments.

Note: Changes to the cpu compliment can produce a single
cycle distortion of cpu percentages.  Such distortion is
most visible when each cpu is being displayed.  It can
be eliminated with a forced refresh via <Enter>/<Space>.
This commit is contained in:
Jim Warner 2012-02-01 00:00:00 -06:00 committed by Craig Small
parent d7c986cd30
commit f348575edc
2 changed files with 61 additions and 65 deletions

106
top/top.c
View File

@ -78,8 +78,7 @@ static unsigned Page_size;
static unsigned Pg2K_shft = 0; static unsigned Pg2K_shft = 0;
/* SMP, Irix/Solaris mode, Linux 2.5.xx support */ /* SMP, Irix/Solaris mode, Linux 2.5.xx support */
/* (assume no IO-wait stats, overridden if linux 2.5.41) */ static int Cpu_faux_tot;
static int Cpu_tot;
static float Cpu_pmax; static float Cpu_pmax;
static const char *Cpu_States_fmts; static const char *Cpu_States_fmts;
@ -331,7 +330,7 @@ static void bye_bye (const char *str) {
"\n\tProgram" "\n\tProgram"
"\n\t Linux version = %u.%u.%u, %s" "\n\t Linux version = %u.%u.%u, %s"
"\n\t Hertz = %u (%u bytes, %u-bit time)" "\n\t Hertz = %u (%u bytes, %u-bit time)"
"\n\t Page_size = %d, Cpu_tot = %d" "\n\t Page_size = %d, Cpu_faux_tot = %d, smp_num_cpus = %d"
"\n\t sizeof(CPU_t) = %u, sizeof(HST_t) = %u (%u HST_t's/Page), HHist_siz = %u" "\n\t sizeof(CPU_t) = %u, sizeof(HST_t) = %u (%u HST_t's/Page), HHist_siz = %u"
"\n\t sizeof(proc_t) = %u, sizeof(proc_t.cmd) = %u, sizeof(proc_t*) = %u" "\n\t sizeof(proc_t) = %u, sizeof(proc_t.cmd) = %u, sizeof(proc_t*) = %u"
"\n\t Frames_libflags = %08lX" "\n\t Frames_libflags = %08lX"
@ -364,7 +363,7 @@ static void bye_bye (const char *str) {
, LINUX_VERSION_PATCH(linux_version_code) , LINUX_VERSION_PATCH(linux_version_code)
, procps_version , procps_version
, (unsigned)Hertz, (unsigned)sizeof(Hertz), (unsigned)sizeof(Hertz) * 8 , (unsigned)Hertz, (unsigned)sizeof(Hertz), (unsigned)sizeof(Hertz) * 8
, Page_size, Cpu_tot , (unsigned) sizeof(CPU_t) , Page_size, Cpu_faux_tot, (int)smp_num_cpus, (unsigned)sizeof(CPU_t)
, (unsigned)sizeof(HST_t), Page_size / (unsigned)sizeof(HST_t), HHist_siz , (unsigned)sizeof(HST_t), Page_size / (unsigned)sizeof(HST_t), HHist_siz
, (unsigned)sizeof(proc_t), (unsigned)sizeof(p->cmd), (unsigned)sizeof(proc_t*) , (unsigned)sizeof(proc_t), (unsigned)sizeof(p->cmd), (unsigned)sizeof(proc_t*)
, (long)Frames_libflags , (long)Frames_libflags
@ -1755,7 +1754,7 @@ static void zap_fieldstab (void) {
always: always:
Fieldstab[P_CPN].head = "P "; Fieldstab[P_CPN].head = "P ";
Fieldstab[P_CPN].fmts = "%1d "; Fieldstab[P_CPN].fmts = "%1d ";
if (1 < (digits = (unsigned)snprintf(buf, sizeof(buf), "%u", (unsigned)Cpu_tot))) { if (1 < (digits = (unsigned)snprintf(buf, sizeof(buf), "%u", (unsigned)smp_num_cpus))) {
if (5 < digits) error_exit(N_txt(FAIL_widecpu_txt)); if (5 < digits) error_exit(N_txt(FAIL_widecpu_txt));
snprintf(fmts_cpu, sizeof(fmts_cpu), "%%%ud ", digits); snprintf(fmts_cpu, sizeof(fmts_cpu), "%%%ud ", digits);
Fieldstab[P_CPN].head = " P " + 5 - digits; Fieldstab[P_CPN].head = " P " + 5 - digits;
@ -1764,9 +1763,9 @@ always:
Cpu_pmax = 99.9; Cpu_pmax = 99.9;
Fieldstab[P_CPU].fmts = " %#4.1f "; Fieldstab[P_CPU].fmts = " %#4.1f ";
if (Rc.mode_irixps && Cpu_tot > 1 && !Thread_mode) { if (Rc.mode_irixps && smp_num_cpus > 1 && !Thread_mode) {
Cpu_pmax = 100.0 * Cpu_tot; Cpu_pmax = 100.0 * smp_num_cpus;
if (Cpu_tot > 10) { if (smp_num_cpus > 10) {
if (Cpu_pmax > 99999.0) Cpu_pmax = 99999.0; if (Cpu_pmax > 99999.0) Cpu_pmax = 99999.0;
Fieldstab[P_CPU].fmts = "%5.0f "; Fieldstab[P_CPU].fmts = "%5.0f ";
} else { } else {
@ -1786,7 +1785,7 @@ always:
* we preserve all cpu data in our CPU_t array which is organized * we preserve all cpu data in our CPU_t array which is organized
* as follows: * as follows:
* cpus[0] thru cpus[n] == tics for each separate cpu * cpus[0] thru cpus[n] == tics for each separate cpu
* cpus[Cpu_tot] == tics from the 1st /proc/stat line */ * cpus[Cpu_faux_tot] == tics from the 1st /proc/stat line */
static CPU_t *cpus_refresh (CPU_t *cpus) { static CPU_t *cpus_refresh (CPU_t *cpus) {
static FILE *fp = NULL; static FILE *fp = NULL;
static int sav_cpus = -1; static int sav_cpus = -1;
@ -1794,8 +1793,8 @@ static CPU_t *cpus_refresh (CPU_t *cpus) {
int i; int i;
/*** hotplug_acclimated ***/ /*** hotplug_acclimated ***/
if (sav_cpus != smp_num_cpus) { if (sav_cpus != Cpu_faux_tot) {
Cpu_tot = sav_cpus = smp_num_cpus; sav_cpus = Cpu_faux_tot;
zap_fieldstab(); zap_fieldstab();
if (fp) { fclose(fp); fp = NULL; } if (fp) { fclose(fp); fp = NULL; }
if (cpus) { free(cpus); cpus = NULL; } if (cpus) { free(cpus); cpus = NULL; }
@ -1806,43 +1805,48 @@ static CPU_t *cpus_refresh (CPU_t *cpus) {
if (!fp) { if (!fp) {
if (!(fp = fopen("/proc/stat", "r"))) if (!(fp = fopen("/proc/stat", "r")))
error_exit(fmtmk(N_fmt(FAIL_statopn_fmt), strerror(errno))); error_exit(fmtmk(N_fmt(FAIL_statopn_fmt), strerror(errno)));
/* note: we allocate one more CPU_t than Cpu_tot so that the last slot /* note: we allocate one more CPU_t than Cpu_faux_tot so the last
can hold tics representing the /proc/stat cpu summary (the first slot can hold tics representing the /proc/stat cpu summary
line read) -- that slot supports our View_CPUSUM toggle */ (the 1st line) -- that slot supports our View_CPUSUM toggle */
cpus = alloc_c((1 + Cpu_tot) * sizeof(CPU_t)); cpus = alloc_c((1 + Cpu_faux_tot) * sizeof(CPU_t));
} }
rewind(fp); rewind(fp);
fflush(fp); fflush(fp);
// first value the last slot with the cpu summary line // remember from last time around
cpus[Cpu_tot].x = cpus[Cpu_tot].y = cpus[Cpu_tot].z = 0; memcpy(&cpus[Cpu_faux_tot].sav, &cpus[Cpu_faux_tot].cur, sizeof(CT_t));
// FIXME: can't tell above by kernel version number // then value the last slot with the cpu summary line
if (!fgets(buf, sizeof(buf), fp)) error_exit(N_txt(FAIL_statget_txt)); if (!fgets(buf, sizeof(buf), fp)) error_exit(N_txt(FAIL_statget_txt));
memset(&cpus[Cpu_faux_tot].cur, 0, sizeof(CT_t));
if (4 > sscanf(buf, "cpu %Lu %Lu %Lu %Lu %Lu %Lu %Lu %Lu" if (4 > sscanf(buf, "cpu %Lu %Lu %Lu %Lu %Lu %Lu %Lu %Lu"
, &cpus[Cpu_tot].u, &cpus[Cpu_tot].n, &cpus[Cpu_tot].s, &cpus[Cpu_tot].i , &cpus[Cpu_faux_tot].cur.u, &cpus[Cpu_faux_tot].cur.n, &cpus[Cpu_faux_tot].cur.s
, &cpus[Cpu_tot].w, &cpus[Cpu_tot].x, &cpus[Cpu_tot].y, &cpus[Cpu_tot].z)) , &cpus[Cpu_faux_tot].cur.i, &cpus[Cpu_faux_tot].cur.w, &cpus[Cpu_faux_tot].cur.x
, &cpus[Cpu_faux_tot].cur.y, &cpus[Cpu_faux_tot].cur.z))
error_exit(N_txt(FAIL_statget_txt)); error_exit(N_txt(FAIL_statget_txt));
// and just in case we're 2.2.xx compiled without SMP support...
if (1 == Cpu_tot) // now value each separate cpu's tics, maybe
memcpy(cpus, &cpus[1], sizeof(CPU_t)); for (i = 0; i < Cpu_faux_tot && i < Screen_rows; i++) {
// now value each separate cpu's tics
for (i = 0; i < Cpu_tot; i++) {
#ifdef PRETEND4CPUS #ifdef PRETEND4CPUS
rewind(fp); rewind(fp);
fgets(buf, sizeof(buf), fp); fgets(buf, sizeof(buf), fp);
#endif #endif
// remember from last time around
memcpy(&cpus[i].sav, &cpus[i].cur, sizeof(CT_t));
if (!fgets(buf, sizeof(buf), fp)) error_exit(N_txt(FAIL_statget_txt)); if (!fgets(buf, sizeof(buf), fp)) error_exit(N_txt(FAIL_statget_txt));
cpus[i].x = cpus[i].y = cpus[i].z = 0; memset(&cpus[i].cur, 0, sizeof(CT_t));
// FIXME: can't tell above by kernel version number if (4 > sscanf(buf, "cpu%d %Lu %Lu %Lu %Lu %Lu %Lu %Lu %Lu", &cpus[i].id
if (4 > sscanf(buf, "cpu%u %Lu %Lu %Lu %Lu %Lu %Lu %Lu %Lu", &cpus[i].id , &cpus[i].cur.u, &cpus[i].cur.n, &cpus[i].cur.s
, &cpus[i].u, &cpus[i].n, &cpus[i].s, &cpus[i].i , &cpus[i].cur.i, &cpus[i].cur.w, &cpus[i].cur.x
, &cpus[i].w, &cpus[i].x, &cpus[i].y, &cpus[i].z)) { , &cpus[i].cur.y, &cpus[i].cur.z)) {
error_exit(N_txt(FAIL_statget_txt)); memmove(&cpus[i], &cpus[Cpu_faux_tot], sizeof(CPU_t));
break; // tolerate cpus taken offline
} }
#ifdef PRETEND4CPUS #ifdef PRETEND4CPUS
cpus[i].id = i; cpus[i].id = i;
#endif #endif
} }
Cpu_faux_tot = i; // tolerate cpus taken offline
return cpus; return cpus;
} // end: cpus_refresh } // end: cpus_refresh
@ -1918,7 +1922,7 @@ static void prochlp (proc_t *this) {
oldtimev.tv_usec = timev.tv_usec; oldtimev.tv_usec = timev.tv_usec;
// if in Solaris mode, adjust our scaling for all cpus // if in Solaris mode, adjust our scaling for all cpus
Frame_etscale = 100.0f / ((float)Hertz * (float)et * (Rc.mode_irixps ? 1 : Cpu_tot)); Frame_etscale = 100.0f / ((float)Hertz * (float)et * (Rc.mode_irixps ? 1 : smp_num_cpus));
#ifdef OFF_HST_HASH #ifdef OFF_HST_HASH
maxt_sav = Frame_maxtask; maxt_sav = Frame_maxtask;
#endif #endif
@ -2061,6 +2065,7 @@ static void sysinfo_refresh (int forced) {
/*** hotplug_acclimated ***/ /*** hotplug_acclimated ***/
if (300 <= cur_secs - cpu_secs) { if (300 <= cur_secs - cpu_secs) {
cpuinfo(); cpuinfo();
Cpu_faux_tot = smp_num_cpus;
cpu_secs = cur_secs; cpu_secs = cur_secs;
} }
#endif #endif
@ -2086,7 +2091,7 @@ static void before (char *me) {
#ifdef PRETEND4CPUS #ifdef PRETEND4CPUS
smp_num_cpus = 4; smp_num_cpus = 4;
#endif #endif
Cpu_tot = smp_num_cpus; Cpu_faux_tot = smp_num_cpus;
Cpu_States_fmts = N_unq(STATE_lin2x4_fmt); Cpu_States_fmts = N_unq(STATE_lin2x4_fmt);
if (linux_version_code > LINUX_VERSION(2, 5, 41)) if (linux_version_code > LINUX_VERSION(2, 5, 41))
Cpu_States_fmts = N_unq(STATE_lin2x5_fmt); Cpu_States_fmts = N_unq(STATE_lin2x5_fmt);
@ -2806,7 +2811,7 @@ static void keys_global (int ch) {
Pseudo_row = PROC_XTRA; Pseudo_row = PROC_XTRA;
break; break;
case 'I': case 'I':
if (Cpu_tot > 1) { if (Cpu_faux_tot > 1) {
Rc.mode_irixps = !Rc.mode_irixps; Rc.mode_irixps = !Rc.mode_irixps;
show_msg(fmtmk(N_fmt(IRIX_curmode_fmt) show_msg(fmtmk(N_fmt(IRIX_curmode_fmt)
, Rc.mode_irixps ? N_txt(ON_word_only_txt) : N_txt(OFF_one_word_txt))); , Rc.mode_irixps ? N_txt(ON_word_only_txt) : N_txt(OFF_one_word_txt)));
@ -3309,20 +3314,21 @@ static void do_key (int ch) {
* display and thus requiring the cpu summary toggle */ * display and thus requiring the cpu summary toggle */
static void summaryhlp (CPU_t *cpu, const char *pfx) { static void summaryhlp (CPU_t *cpu, const char *pfx) {
/* we'll trim to zero if we get negative time ticks, /* we'll trim to zero if we get negative time ticks,
which has happened with some SMP kernels (pre-2.4?) */ which has happened with some SMP kernels (pre-2.4?)
and when cpus are dynamically added or removed */
#define TRIMz(x) ((tz = (SIC_t)(x)) < 0 ? 0 : tz) #define TRIMz(x) ((tz = (SIC_t)(x)) < 0 ? 0 : tz)
SIC_t u_frme, s_frme, n_frme, i_frme, w_frme, x_frme, y_frme, z_frme, tot_frme, tz; SIC_t u_frme, s_frme, n_frme, i_frme, w_frme, x_frme, y_frme, z_frme, tot_frme, tz;
float scale; float scale;
u_frme = cpu->u - cpu->u_sav; u_frme = TRIMz(cpu->cur.u - cpu->sav.u);
s_frme = cpu->s - cpu->s_sav; s_frme = TRIMz(cpu->cur.s - cpu->sav.s);
n_frme = cpu->n - cpu->n_sav; n_frme = TRIMz(cpu->cur.n - cpu->sav.n);
i_frme = TRIMz(cpu->i - cpu->i_sav); i_frme = TRIMz(cpu->cur.i - cpu->sav.i);
if ((u_frme == 0) && (i_frme == 0)) i_frme = 100; if ((u_frme == 0) && (i_frme == 0)) i_frme = 100;
w_frme = cpu->w - cpu->w_sav; w_frme = TRIMz(cpu->cur.w - cpu->sav.w);
x_frme = cpu->x - cpu->x_sav; x_frme = TRIMz(cpu->cur.x - cpu->sav.x);
y_frme = cpu->y - cpu->y_sav; y_frme = TRIMz(cpu->cur.y - cpu->sav.y);
z_frme = cpu->z - cpu->z_sav; z_frme = TRIMz(cpu->cur.z - cpu->sav.z);
tot_frme = u_frme + s_frme + n_frme + i_frme + w_frme + x_frme + y_frme + z_frme; tot_frme = u_frme + s_frme + n_frme + i_frme + w_frme + x_frme + y_frme + z_frme;
if (1 > tot_frme) tot_frme = 1; if (1 > tot_frme) tot_frme = 1;
scale = 100.0 / (float)tot_frme; scale = 100.0 / (float)tot_frme;
@ -3334,16 +3340,6 @@ static void summaryhlp (CPU_t *cpu, const char *pfx) {
, (float)n_frme * scale, (float)i_frme * scale , (float)n_frme * scale, (float)i_frme * scale
, (float)w_frme * scale, (float)x_frme * scale , (float)w_frme * scale, (float)x_frme * scale
, (float)y_frme * scale, (float)z_frme * scale)); , (float)y_frme * scale, (float)z_frme * scale));
// remember for next time around
cpu->u_sav = cpu->u;
cpu->s_sav = cpu->s;
cpu->n_sav = cpu->n;
cpu->i_sav = cpu->i;
cpu->w_sav = cpu->w;
cpu->x_sav = cpu->x;
cpu->y_sav = cpu->y;
cpu->z_sav = cpu->z;
#undef TRIMz #undef TRIMz
} // end: summaryhlp } // end: summaryhlp
@ -3381,13 +3377,13 @@ static void summary_show (void) {
if (CHKw(w, View_CPUSUM)) { if (CHKw(w, View_CPUSUM)) {
// display just the 1st /proc/stat line // display just the 1st /proc/stat line
summaryhlp(&smpcpu[Cpu_tot], N_txt(WORD_allcpus_txt)); summaryhlp(&smpcpu[Cpu_faux_tot], N_txt(WORD_allcpus_txt));
Msg_row += 1; Msg_row += 1;
} else { } else {
int i; int i;
char tmp[MEDBUFSIZ]; char tmp[MEDBUFSIZ];
// display each cpu's states separately, screen height permitting... // display each cpu's states separately, screen height permitting...
for (i = 0; i < Cpu_tot; i++) { for (i = 0; i < Cpu_faux_tot; i++) {
snprintf(tmp, sizeof(tmp), N_fmt(WORD_eachcpu_fmt), smpcpu[i].id); snprintf(tmp, sizeof(tmp), N_fmt(WORD_eachcpu_fmt), smpcpu[i].id);
summaryhlp(&smpcpu[i], tmp); summaryhlp(&smpcpu[i], tmp);
Msg_row += 1; Msg_row += 1;

View File

@ -51,9 +51,6 @@
*** hotplug_acclimated *** *** hotplug_acclimated ***
( hopefully libproc will also be supportive of our efforts ) */ ( hopefully libproc will also be supportive of our efforts ) */
/* And there are still some of these lurking here and there...
FIXME - blah, blah... */
/* For introducing inaugural cgroup support, thanks to: /* For introducing inaugural cgroup support, thanks to:
Jan Gorig <jgorig@redhat.com> - April, 2011 */ Jan Gorig <jgorig@redhat.com> - April, 2011 */
@ -207,19 +204,22 @@ typedef struct HST_t {
} HST_t; } HST_t;
#endif #endif
/* This structure stores a frame's cpu tics used in history /* These 2 structures store a frame's cpu tics used in history
calculations. It exists primarily for SMP support but serves calculations. They exist primarily for SMP support but serve
all environments. */ all environments. */
typedef struct CPU_t { typedef struct CT_t {
/* other kernels: u == user/us, n == nice/ni, s == system/sy, i == idle/id /* other kernels: u == user/us, n == nice/ni, s == system/sy, i == idle/id
2.5.41 kernel: w == IO-wait/wa (io wait time) 2.5.41 kernel: w == IO-wait/wa (io wait time)
2.6.0 kernel: x == hi (hardware irq time), y == si (software irq time) 2.6.0 kernel: x == hi (hardware irq time), y == si (software irq time)
2.6.11 kernel: z == st (virtual steal time) */ 2.6.11 kernel: z == st (virtual steal time) */
TIC_t u, n, s, i, w, x, y, z; // as represented in /proc/stat TIC_t u, n, s, i, w, x, y, z; // as represented in /proc/stat
TIC_t u_sav, s_sav, n_sav, i_sav, w_sav, x_sav, y_sav, z_sav; // in the order of our display } CT_t;
unsigned id; // the CPU ID number
} CPU_t;
typedef struct CPU_t {
CT_t cur; // current frame's cpu tics
CT_t sav; // prior frame's cpu tics
int id; // the cpu id number (0 - nn)
} CPU_t;
/* /////////////////////////////////////////////////////////////// */ /* /////////////////////////////////////////////////////////////// */
/* Special Section: multiple windows/field groups --------------- */ /* Special Section: multiple windows/field groups --------------- */