Reliably force restart when a subprocess has a fatal error.
Suppose a system call such as bind() fails in the sockd subprocess in request_sockd_fd(). sockd will suicide(). This will send a SIGCHLD to the master process, which the master process should respond to by calling suicide(), forcing a process supervisor to respawn the entire ndhc program. But, this doesn't reliably happen prior to this commit because of the interaction between request_sock_fd() and signalfd() [or equivalently self-pipe-trick] signal handling. request_sock_fd() makes ndhc-master synchronously wait for a response from sockd via safe_recvmsg(). The normal goto-like signal handling path is suppressed when using signalfd() , so when SIGCHLD is received, it will not be handled until io is dispatched for the signalfd or pipe. But such code will never be reached because ndhc-master is waiting in safe_recvmsg() and thus never polls signal fd status. So, revert to using traditional POSIX sigaction() for SIGCHLD, which provides exactly the required behavior for proper functioning.
This commit is contained in:
parent
f0340b1475
commit
8d89ca9f19
29
src/ndhc.c
29
src/ndhc.c
@ -158,17 +158,42 @@ void show_usage(void)
|
||||
exit(EXIT_SUCCESS);
|
||||
}
|
||||
|
||||
static void signal_handler(int signo)
|
||||
{
|
||||
switch (signo) {
|
||||
case SIGCHLD: {
|
||||
static const char errstr[] = "ndhc-master: Subprocess terminated unexpectedly. Exiting.";
|
||||
safe_write(STDOUT_FILENO, errstr, sizeof errstr - 1);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static void setup_signals_ndhc(void)
|
||||
{
|
||||
sigset_t mask;
|
||||
sigemptyset(&mask);
|
||||
sigaddset(&mask, SIGUSR1);
|
||||
sigaddset(&mask, SIGUSR2);
|
||||
sigaddset(&mask, SIGCHLD);
|
||||
sigaddset(&mask, SIGTERM);
|
||||
sigaddset(&mask, SIGINT);
|
||||
if (sigprocmask(SIG_BLOCK, &mask, (sigset_t *)0) < 0)
|
||||
suicide("sigprocmask failed");
|
||||
|
||||
sigemptyset(&mask);
|
||||
sigaddset(&mask, SIGCHLD);
|
||||
if (sigprocmask(SIG_UNBLOCK, &mask, (sigset_t *)0) < 0)
|
||||
suicide("sigprocmask failed");
|
||||
struct sigaction sa = {
|
||||
.sa_handler = signal_handler,
|
||||
.sa_flags = SA_RESTART,
|
||||
};
|
||||
sigemptyset(&sa.sa_mask);
|
||||
if (sigaction(SIGCHLD, &sa, NULL))
|
||||
suicide("sigaction failed");
|
||||
|
||||
if (cs.signalFd >= 0) {
|
||||
epoll_del(cs.epollFd, cs.signalFd);
|
||||
close(cs.signalFd);
|
||||
@ -197,8 +222,6 @@ static int signal_dispatch(void)
|
||||
switch (si.ssi_signo) {
|
||||
case SIGUSR1: return SIGNAL_RENEW;
|
||||
case SIGUSR2: return SIGNAL_RELEASE;
|
||||
case SIGCHLD:
|
||||
suicide("ndhc-master: Subprocess terminated unexpectedly. Exiting.");
|
||||
case SIGTERM:
|
||||
log_line("Received SIGTERM. Exiting gracefully.");
|
||||
exit(EXIT_SUCCESS);
|
||||
|
Loading…
Reference in New Issue
Block a user