Reliably force restart when a subprocess has a fatal error.

Suppose a system call such as bind() fails in the sockd subprocess in
request_sockd_fd().  sockd will suicide().  This will send a SIGCHLD to
the master process, which the master process should respond to by
calling suicide(), forcing a process supervisor to respawn the entire
ndhc program.

But, this doesn't reliably happen prior to this commit because of the
interaction between request_sock_fd() and signalfd() [or equivalently
self-pipe-trick] signal handling.

request_sock_fd() makes ndhc-master synchronously wait for a response
from sockd via safe_recvmsg().  The normal goto-like signal handling
path is suppressed when using signalfd() , so when SIGCHLD is received,
it will not be handled until io is dispatched for the signalfd or pipe.
But such code will never be reached because ndhc-master is waiting in
safe_recvmsg() and thus never polls signal fd status.

So, revert to using traditional POSIX sigaction() for SIGCHLD, which
provides exactly the required behavior for proper functioning.
This commit is contained in:
Nicholas J. Kain 2020-10-20 01:12:59 -04:00
parent f0340b1475
commit 8d89ca9f19

View File

@ -158,17 +158,42 @@ void show_usage(void)
exit(EXIT_SUCCESS);
}
static void signal_handler(int signo)
{
switch (signo) {
case SIGCHLD: {
static const char errstr[] = "ndhc-master: Subprocess terminated unexpectedly. Exiting.";
safe_write(STDOUT_FILENO, errstr, sizeof errstr - 1);
exit(EXIT_FAILURE);
}
default:
break;
}
}
static void setup_signals_ndhc(void)
{
sigset_t mask;
sigemptyset(&mask);
sigaddset(&mask, SIGUSR1);
sigaddset(&mask, SIGUSR2);
sigaddset(&mask, SIGCHLD);
sigaddset(&mask, SIGTERM);
sigaddset(&mask, SIGINT);
if (sigprocmask(SIG_BLOCK, &mask, (sigset_t *)0) < 0)
suicide("sigprocmask failed");
sigemptyset(&mask);
sigaddset(&mask, SIGCHLD);
if (sigprocmask(SIG_UNBLOCK, &mask, (sigset_t *)0) < 0)
suicide("sigprocmask failed");
struct sigaction sa = {
.sa_handler = signal_handler,
.sa_flags = SA_RESTART,
};
sigemptyset(&sa.sa_mask);
if (sigaction(SIGCHLD, &sa, NULL))
suicide("sigaction failed");
if (cs.signalFd >= 0) {
epoll_del(cs.epollFd, cs.signalFd);
close(cs.signalFd);
@ -197,8 +222,6 @@ static int signal_dispatch(void)
switch (si.ssi_signo) {
case SIGUSR1: return SIGNAL_RENEW;
case SIGUSR2: return SIGNAL_RELEASE;
case SIGCHLD:
suicide("ndhc-master: Subprocess terminated unexpectedly. Exiting.");
case SIGTERM:
log_line("Received SIGTERM. Exiting gracefully.");
exit(EXIT_SUCCESS);