library: refactor and rely on modern kernels for wchan

Several Debian based distributions were recently found
to have omitted a kernel configuration option that had
the effect of rendering /proc/#/stat and /proc/#/wchan
useless for providing any 'sleeping in function' info.

That problem also prompted a reevaluation of the whole
approach to wchan matters which had grown increasingly
complex as our library evolved over the last 13 years.

The net result was a decision to rely on /proc/#/wchan
which arrived along with the 2.5 kernel. This then let
us vastly simplify the internal code plus the external
interface which will benefit both the top and ps pgms.

Reference(s):
http://www.freelists.org/post/procps/WCHAN,11
https://lkml.org/lkml/2008/11/6/12
https://bugs.debian.org/711592

Signed-off-by: Jim Warner <james.warner@comcast.net>
This commit is contained in:
Jim Warner 2015-06-18 00:00:00 -05:00 committed by Craig Small
parent 932f54b19d
commit 6b8dc5511f
6 changed files with 58 additions and 646 deletions

View File

@ -156,7 +156,6 @@ proc_libprocps_la_SOURCES = \
proc/devname.h \ proc/devname.h \
proc/escape.c \ proc/escape.c \
proc/escape.h \ proc/escape.h \
proc/ksym.c \
proc/procps.h \ proc/procps.h \
proc/pwcache.c \ proc/pwcache.c \
proc/pwcache.h \ proc/pwcache.h \
@ -170,6 +169,7 @@ proc_libprocps_la_SOURCES = \
proc/sysinfo.h \ proc/sysinfo.h \
proc/version.c \ proc/version.c \
proc/version.h \ proc/version.h \
proc/wchan.c \
proc/wchan.h \ proc/wchan.h \
proc/whattime.c \ proc/whattime.c \
proc/whattime.h proc/whattime.h

View File

@ -1,639 +0,0 @@
/*
* ksym.c - kernel symbol handling
* Copyright 1998-2003 by Albert Cahalan
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <stdarg.h>
#include <fcntl.h>
#include <errno.h>
#include <unistd.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include <sys/utsname.h>
#include "procps.h"
#include "alloc.h"
#include "version.h"
#include "sysinfo.h" /* smp_num_cpus */
#include "wchan.h" // to verify prototypes
#define KSYMS_FILENAME "/proc/ksyms"
#if 0
#undef KSYMS_FILENAME
#define KSYMS_FILENAME "/would/be/nice/to/have/this/file"
#define SYSMAP_FILENAME "/home/albert/ps/45621/System.map-hacked"
#define linux_version_code 131598 /* ? */
#define smp_num_cpus 2
#endif
#if 0
#undef KSYMS_FILENAME
#define KSYMS_FILENAME "/home/albert/ps/45621/ksyms-2.3.12"
#define SYSMAP_FILENAME "/home/albert/ps/45621/System.map-2.3.12"
#define linux_version_code 131852 /* 2.3.12 */
#define smp_num_cpus 2
#endif
#if 0
#undef KSYMS_FILENAME
#define KSYMS_FILENAME "/home/albert/ps/45621/ksyms-2.3.18ac8-MODVERS"
#define SYSMAP_FILENAME "/home/albert/ps/45621/System.map-2.3.18ac8-MODVERS"
#define linux_version_code 131858 /* 2.3.18ac8 */
#define smp_num_cpus 2
#endif
#if 0
#undef KSYMS_FILENAME
#define KSYMS_FILENAME "/home/albert/ps/45621/ksyms-2.3.18ac8-NOMODVERS"
#define SYSMAP_FILENAME "/home/albert/ps/45621/System.map-2.3.18ac8-NOMODVERS"
#define linux_version_code 131858 /* 2.3.18ac8 */
#define smp_num_cpus 2
#endif
/* These are the symbol types, with relative popularity:
* ? w machine type junk for Alpha -- odd syntax
* ? S not for i386
* 4 W not for i386
* 60 R
* 100 A
* 125 r
* 363 s not for i386
* 858 B
* 905 g generated by modutils?
* 929 G generated by modutils?
* 1301 b
* 2750 D
* 4481 d
* 11417 ?
* 13666 t
* 15442 T
*
* For i386, that is: "RArBbDd?tT"
*/
#define SYMBOL_TYPE_CHARS "Tt?dDbBrARGgsWS"
/*
* '?' is a symbol type
* '.' is part of a name (versioning?)
* "\t[]" are for the module name in /proc/ksyms
*/
#define LEGAL_SYSMAP_CHARS "0123456789_ ?.\n\t[]" \
"abcdefghijklmnopqrstuvwxyz" \
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
/* System.map lines look like:
* hex num, space, one of SYMBOL_TYPE_CHARS, space, LEGAL_SYSMAP_CHARS, \n
*
* Alpha systems can start with a few lines that have the address replaced
* by space padding and a 'w' for the type. For those lines, the last space
* is followed by something like: mikasa_primo_mv p2k_mv sable_gamma_mv
* (just one of those, always with a "_mv", then the newline)
*
* The /proc/ksyms lines are like System.map lines w/o the symbol type char.
* When odd features are used, the name part contains:
* "(.*)_R(smp_|smp2gig_|2gig_)?[0-9a-fA-F]{8,}"
* It is likely that more crap will be added...
*/
typedef struct symb {
unsigned KLONG addr;
const char *name;
} symb;
/* These mostly rely on POSIX to make them zero. */
static symb hashtable[256];
static char *sysmap_data;
static unsigned sysmap_room;
static symb *sysmap_index;
static unsigned sysmap_count;
static char *ksyms_data;
static unsigned ksyms_room = 4096;
static symb *ksyms_index;
static unsigned ksyms_count;
static unsigned idx_room;
/*********************************/
/* Kill this: _R(smp_?|smp2gig_?|2gig_?)?[0-9a-f]{8,}$
* We kill: (_R[^A-Z]*[0-9a-f]{8,})+$
*
* The loop should almost never be taken, but it has to be there.
* It gets rid of anything that _looks_ like a version code, even
* if a real version code has already been found. This is because
* the inability to perfectly recognize a version code may lead to
* symbol mangling, which in turn leads to mismatches between the
* /proc/ksyms and System.map data files.
*/
#if 0
static char *chop_version(char *arg){
char *cp;
cp = strchr(arg,'\t');
if(cp) *cp = '\0'; /* kill trailing module name first */
for(;;){
char *p;
int len = 0;
cp = strrchr(arg, 'R');
if(!cp || cp<=arg+1 || cp[-1]!='_') break;
for(p=cp; *++p; ){
switch(*p){
default:
goto out;
case '0' ... '9':
case 'a' ... 'f':
len++;
continue;
case 'g' ... 'z':
case '_':
len=0;
continue;
}
}
if(len<8) break;
cp[-1] = '\0';
}
out:
if(*arg=='G'){
int len = strlen(arg);
while( len>8 && !memcmp(arg,"GPLONLY_",8) ){
arg += 8;
len -= 8;
}
}
return arg;
}
#endif
static char *chop_version(char *arg){
char *cp;
cp = strchr(arg,'\t');
if(cp) *cp = '\0'; /* kill trailing module name first */
for(;;){
int len;
cp = strrchr(arg, 'R');
if(!cp || cp<=arg+1 || cp[-1]!='_') break;
len=strlen(cp);
if(len<9) break;
if(strpbrk(cp+1,"ABCDEFGHIJKLMNOPQRSTUVWXYZ")) break;
if(strspn(cp+len-8,"0123456789abcdef")!=8) break;
cp[-1] = '\0';
}
if(*arg=='G'){
int len = strlen(arg);
while( len>8 && !memcmp(arg,"GPLONLY_",8) ){
arg += 8;
len -= 8;
}
}
return arg;
}
/***********************************/
static const symb *search(unsigned KLONG address, symb *idx, unsigned count){
unsigned left;
unsigned mid;
unsigned right;
if(!idx) return NULL; /* maybe not allocated */
if(address < idx[0].addr) return NULL;
if(address >= idx[count-1].addr) return idx+count-1;
left = 0;
right = count-1;
for(;;){
mid = (left + right) / 2;
if(address >= idx[mid].addr) left = mid;
if(address <= idx[mid].addr) right = mid;
if(right-left <= 1) break;
}
if(address == idx[right].addr) return idx+right;
return idx+left;
}
/*********************************/
/* allocate if needed, read, and return buffer size */
static void read_file(const char *restrict filename, char **bufp, unsigned *restrict roomp) {
int fd = 0;
ssize_t done;
char *buf = *bufp;
ssize_t total = 0;
unsigned room = *roomp;
if(!room) goto hell; /* failed before */
if(!buf) buf = xmalloc(room);
open_again:
fd = open(filename, O_RDONLY|O_NOCTTY|O_NONBLOCK);
if(fd<0){
switch(errno){
case EINTR: goto open_again;
default: _exit(101);
case EACCES: /* somebody screwing around? */
/* FIXME: set a flag to disable symbol lookup? */
case ENOENT:; /* no module support */
}
goto hell;
}
for(;;){
done = read(fd, buf+total, room-total-1);
if(done==0) break; /* nothing left */
if(done==-1){
if(errno==EINTR) continue; /* try again */
perror("");
goto hell;
}
if(done==(ssize_t)room-total-1){
char *tmp;
total += done;
/* more to go, but no room in buffer */
room *= 2;
tmp = xrealloc(buf, room);
buf = tmp;
continue;
}
if(done>0 && done<(ssize_t)room-total-1){
total += done;
continue; /* OK, we read some. Go do more. */
}
fprintf(stderr,"%ld can't happen\n", (long)done);
/* FIXME: memory leak */
_exit(42);
}
buf[total] = '\0'; // parse_ksyms() expects NUL-terminated file
*bufp = buf;
*roomp = room;
close(fd);
return;
hell:
free(buf);
*bufp = NULL;
*roomp = 0; /* this function will never work again */
total = 0;
if(fd>0) close(fd);
return;
}
/*********************************/
static int parse_ksyms(void) {
char *endp;
if(!ksyms_room || !ksyms_data) goto quiet_goodbye;
endp = ksyms_data;
ksyms_count = 0;
if(idx_room) goto bypass; /* some space already allocated */
idx_room = 512;
for(;;){
void *vp;
idx_room *= 2;
vp = xrealloc(ksyms_index, sizeof(symb)*idx_room);
ksyms_index = vp;
bypass:
for(;;){
char *saved;
if(!*endp) return 1;
saved = endp;
ksyms_index[ksyms_count].addr = STRTOUKL(endp, &endp, 16);
if(endp==saved || *endp != ' ') goto bad_parse;
endp++;
saved = endp;
endp = strchr(endp,'\n');
if(!endp) goto bad_parse; /* no newline */
*endp = '\0';
ksyms_index[ksyms_count].name = chop_version(saved);
++endp;
if(++ksyms_count >= idx_room) break; /* need more space */
}
}
if(0){
bad_parse:
fprintf(stderr, "Warning: "KSYMS_FILENAME" not normal\n");
}
quiet_goodbye:
idx_room = 0;
free(ksyms_data);
ksyms_data = NULL;
ksyms_room = 0;
free(ksyms_index);
ksyms_index = NULL;
ksyms_count = 0;
return 0;
}
/*********************************/
#define VCNT 16
static int sysmap_mmap(const char *restrict const filename, message_fn message) {
struct stat sbuf;
char *endp;
int fd;
char Version[32];
fd = open(filename, O_RDONLY|O_NOCTTY|O_NONBLOCK);
if(fd<0) return 0;
if(fstat(fd, &sbuf) < 0) goto bad_open;
if(!S_ISREG(sbuf.st_mode)) goto bad_open;
if(sbuf.st_size < 5000) goto bad_open; /* if way too small */
/* Would be shared read-only, but we want '\0' after each name. */
endp = mmap(0, sbuf.st_size + 1, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0);
sysmap_data = endp;
while(*endp==' '){ /* damn Alpha machine types */
if(strncmp(endp," w ", 19)) goto bad_parse;
endp += 19;
endp = strchr(endp,'\n');
if(!endp) goto bad_parse; /* no newline */
if(strncmp(endp-3, "_mv\n", 4)) goto bad_parse;
endp++;
}
if(sysmap_data == (caddr_t) -1) goto bad_open;
close(fd);
fd = -1;
sprintf(Version, "Version_%d", linux_version_code);
sysmap_room = 512;
for(;;){
void *vp;
sysmap_room *= 2;
vp = xrealloc(sysmap_index, sizeof(symb)*sysmap_room);
sysmap_index = vp;
for(;;){
char *vstart;
if(endp - sysmap_data >= sbuf.st_size){ /* if we reached the end */
int i = VCNT; /* check VCNT times to verify this file */
if(*Version) goto bad_version;
if(!ksyms_index) return 1; /* if can not verify, assume success */
while(i--){
#if 1
const symb *findme;
const symb *map_symb;
/* Choose VCNT entries from /proc/ksyms to test */
findme = ksyms_index + (ksyms_count*i/VCNT);
/* Search for them in the System.map */
map_symb = search(findme->addr, sysmap_index, sysmap_count);
if(map_symb){
if(map_symb->addr != findme->addr) continue;
/* backup to first matching address */
while (map_symb != sysmap_index){
if (map_symb->addr != (map_symb-1)->addr) break;
map_symb--;
}
/* search for name in symbols with same address */
while (map_symb != (sysmap_index+sysmap_count)){
if (map_symb->addr != findme->addr) break;
if (!strcmp(map_symb->name,findme->name)) goto good_match;
map_symb++;
}
map_symb--; /* backup to last symbol with matching address */
message("{%s} {%s}\n",map_symb->name,findme->name);
goto bad_match;
}
good_match:;
#endif
}
return 1; /* success */
}
sysmap_index[sysmap_count].addr = STRTOUKL(endp, &endp, 16);
if(*endp != ' ') goto bad_parse;
endp++;
if(!strchr(SYMBOL_TYPE_CHARS, *endp)) goto bad_parse;
endp++;
if(*endp != ' ') goto bad_parse;
endp++;
vstart = endp;
endp = strchr(endp,'\n');
if(!endp) goto bad_parse; /* no newline */
*endp = '\0';
++endp;
vstart = chop_version(vstart);
sysmap_index[sysmap_count].name = vstart;
if(*vstart=='V' && *Version && !strcmp(Version,vstart)) *Version='\0';
if(++sysmap_count >= sysmap_room) break; /* need more space */
}
}
#ifdef BUILD_WITH_WHINE
if(0){
bad_match:
message("Warning: %s does not match kernel data.\n", filename);
}
#endif
if(0){
bad_version:
message("Warning: %s has an incorrect kernel version.\n", filename);
}
if(0){
bad_alloc:
message("Warning: not enough memory available\n");
}
#ifdef BUILD_WITH_WHINE
if(0){
bad_parse:
message("Warning: %s not parseable as a System.map\n", filename);
}
#endif
if(0){
bad_open:
message("Warning: %s could not be opened as a System.map\n", filename);
}
sysmap_room=0;
sysmap_count=0;
free(sysmap_index);
sysmap_index = NULL;
if(fd>=0) close(fd);
if(sysmap_data) munmap(sysmap_data, sbuf.st_size + 1);
sysmap_data = NULL;
return 0;
}
/*********************************/
static void read_and_parse(void){
static time_t stamp; /* after data gets old, load /proc/ksyms again */
if(time(NULL) != stamp){
read_file(KSYMS_FILENAME, &ksyms_data, &ksyms_room);
parse_ksyms();
memset((void*)hashtable,0,sizeof(hashtable)); /* invalidate cache */
stamp = time(NULL);
}
}
/*********************************/
static void default_message(const char *restrict format, ...) __attribute__((format(printf,1,2)));
static void default_message(const char *restrict format, ...) {
va_list arg;
va_start (arg, format);
vfprintf (stderr, format, arg);
va_end (arg);
}
/*********************************/
static int use_wchan_file;
int open_psdb_message(const char *restrict override, message_fn message) {
static const char *sysmap_paths[] = {
"/boot/System.map-%s",
"/boot/System.map",
"/lib/modules/%s/System.map",
"/usr/src/linux/System.map",
"/System.map",
NULL
};
struct stat sbuf;
struct utsname uts;
char path[128];
const char **fmt = sysmap_paths;
const char *sm;
#ifdef SYSMAP_FILENAME /* debug feature */
override = SYSMAP_FILENAME;
#endif
// first allow for a user-selected System.map file
if(
(sm=override)
||
(sm=getenv("PS_SYSMAP"))
||
(sm=getenv("PS_SYSTEM_MAP"))
){
if(!have_privs){
read_and_parse();
if(sysmap_mmap(sm, message)) return 0;
}
/* failure is better than ignoring the user & using bad data */
return -1; /* ought to return "Namelist not found." */
}
// next try the Linux 2.5.xx method
if(!stat("/proc/self/wchan", &sbuf)){
use_wchan_file = 1; // hack
return 0;
}
// finally, search for the System.map file
uname(&uts);
path[sizeof path - 1] = '\0';
do{
int did_ksyms = 0;
snprintf(path, sizeof path - 1, *fmt, uts.release);
if(!stat(path, &sbuf)){
if (did_ksyms++) read_and_parse();
if (sysmap_mmap(path, message)) return 0;
}
}while(*++fmt);
/* TODO: Without System.map, no need to keep ksyms loaded. */
return -1;
}
/***************************************/
int open_psdb(const char *restrict override) {
return open_psdb_message(override, default_message);
}
/***************************************/
static const char * read_wchan_file(unsigned pid){
static char buf[64];
const char *ret = buf;
ssize_t num;
int fd;
snprintf(buf, sizeof buf, "/proc/%d/wchan", pid);
fd = open(buf, O_RDONLY);
if(fd==-1) return "?";
num = read(fd, buf, sizeof buf - 1);
close(fd);
if(num<1) return "?"; // allow for "0"
buf[num] = '\0';
if(buf[0]=='0' && buf[1]=='\0') return "-";
// would skip over numbers if they existed -- but no
// lame ppc64 has a '.' in front of every name
if(*ret=='.') ret++;
switch(*ret){
case 's': if(!strncmp(ret, "sys_", 4)) ret += 4; break;
case 'd': if(!strncmp(ret, "do_", 3)) ret += 3; break;
case '_': while(*ret=='_') ret++; break;
}
return ret;
}
/***************************************/
static const symb fail = { .name = "?" };
static const char dash[] = "-";
static const char star[] = "*";
#define MAX_OFFSET (0x1000*sizeof(long)) /* past this is generally junk */
/* return pointer to temporary static buffer with function name */
const char * lookup_wchan(unsigned KLONG address, unsigned pid) {
const symb *mod_symb;
const symb *map_symb;
const symb *good_symb;
const char *ret;
unsigned hash;
// can't cache it due to a race condition :-(
if(use_wchan_file) return read_wchan_file(pid);
if(!address) return dash;
if(!~address) return star;
read_and_parse();
hash = (address >> 4) & 0xff; /* got 56/63 hits & 7/63 misses */
if(hashtable[hash].addr == address) return hashtable[hash].name;
mod_symb = search(address, ksyms_index, ksyms_count);
if(!mod_symb) mod_symb = &fail;
map_symb = search(address, sysmap_index, sysmap_count);
if(!map_symb) map_symb = &fail;
/* which result is closest? */
good_symb = (mod_symb->addr > map_symb->addr)
? mod_symb
: map_symb
;
if(address > good_symb->addr + MAX_OFFSET) good_symb = &fail;
/* good_symb->name has the data, but needs to be trimmed */
ret = good_symb->name;
// lame ppc64 has a '.' in front of every name
if(*ret=='.') ret++;
switch(*ret){
case 's': if(!strncmp(ret, "sys_", 4)) ret += 4; break;
case 'd': if(!strncmp(ret, "do_", 3)) ret += 3; break;
case '_': while(*ret=='_') ret++; break;
}
/* if(!*ret) ret = fail.name; */ /* not likely (name was "sys_", etc.) */
/* cache name after abbreviation */
hashtable[hash].addr = address;
hashtable[hash].name = ret;
return ret;
}

View File

@ -40,8 +40,6 @@ global:
look_up_our_self; look_up_our_self;
lookup_wchan; lookup_wchan;
meminfo; meminfo;
open_psdb;
open_psdb_message;
openproc; openproc;
page_bytes; page_bytes;
pretty_print_signals; pretty_print_signals;

View File

@ -287,7 +287,6 @@ extern proc_t * get_proc_stats(pid_t pid, proc_t *p);
#define PROC_FILLGRP 0x0010 // resolve group id number -> group name #define PROC_FILLGRP 0x0010 // resolve group id number -> group name
#define PROC_FILLSTATUS 0x0020 // read status #define PROC_FILLSTATUS 0x0020 // read status
#define PROC_FILLSTAT 0x0040 // read stat #define PROC_FILLSTAT 0x0040 // read stat
#define PROC_FILLWCHAN 0x0080 // look up WCHAN name
#define PROC_FILLARG 0x0100 // alloc and fill in `cmdline' #define PROC_FILLARG 0x0100 // alloc and fill in `cmdline'
#define PROC_FILLCGROUP 0x0200 // alloc and fill in `cgroup` #define PROC_FILLCGROUP 0x0200 // alloc and fill in `cgroup`
#define PROC_FILLSUPGRP 0x0400 // resolve supplementary group id -> group name #define PROC_FILLSUPGRP 0x0400 // resolve supplementary group id -> group name

56
proc/wchan.c Normal file
View File

@ -0,0 +1,56 @@
/*
* wchan.c - kernel symbol handling
* Copyright 1998-2003 by Albert Cahalan
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <stdio.h>
#include <string.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/stat.h>
#include "wchan.h" // to verify prototype
const char * lookup_wchan (int pid) {
static char buf[64];
const char *ret = buf;
ssize_t num;
int fd;
snprintf(buf, sizeof buf, "/proc/%d/wchan", pid);
fd = open(buf, O_RDONLY);
if (fd==-1) return "?";
num = read(fd, buf, sizeof buf - 1);
close(fd);
if (num<1) return "?"; // allow for "0"
buf[num] = '\0';
if (buf[0]=='0' && buf[1]=='\0') return "-";
// lame ppc64 has a '.' in front of every name
if (*ret=='.') ret++;
switch (*ret){
case 's': if(!strncmp(ret, "sys_", 4)) ret += 4; break;
case 'd': if(!strncmp(ret, "do_", 3)) ret += 3; break;
case '_': while(*ret=='_') ret++; break;
default : break;
}
return ret;
}

View File

@ -5,9 +5,7 @@
EXTERN_C_BEGIN EXTERN_C_BEGIN
extern const char * lookup_wchan(unsigned KLONG address, unsigned pid); extern const char * lookup_wchan (int pid);
extern int open_psdb(const char *__restrict override);
extern int open_psdb_message(const char *__restrict override, message_fn message);
EXTERN_C_END EXTERN_C_END