add retry option to supervise-daemon

The --retry option for supervise-daemon defines how the supervisor will
attempt to stop the child process it is monitoring. It is defined when
the supervisor is started since stopping the supervisor just sends a
signal to the active supervisor.

This fixes #160.
This commit is contained in:
William Hubbs 2017-09-06 13:22:30 -05:00
parent 36a0ab9054
commit 17b5cc78d3
4 changed files with 30 additions and 4 deletions

View File

@ -36,6 +36,8 @@
.Ar pidfile .Ar pidfile
.Fl P , -respawn-period .Fl P , -respawn-period
.Ar seconds .Ar seconds
.Fl R , -retry
.Ar arg
.Fl r , -chroot .Fl r , -chroot
.Ar chrootpath .Ar chrootpath
.Fl u , -user .Fl u , -user
@ -115,6 +117,9 @@ Modifies the scheduling priority of the daemon.
.It Fl P , -respawn-period Ar seconds .It Fl P , -respawn-period Ar seconds
Sets the length of a respawn period. The default is 10 seconds. See the Sets the length of a respawn period. The default is 10 seconds. See the
description of --respawn-max for more information. description of --respawn-max for more information.
.It Fl R , -retry Ar timeout | Ar signal Ns / Ns Ar timeout
The retry specification can be either a timeout in seconds or multiple
signal/timeout pairs (like SIGTERM/5).
.It Fl r , -chroot Ar path .It Fl r , -chroot Ar path
chroot to this directory before starting the daemon. All other paths, such chroot to this directory before starting the daemon. All other paths, such
as the path to the daemon, chdir and pidfile, should be relative to the chroot. as the path to the daemon, chdir and pidfile, should be relative to the chroot.

View File

@ -23,6 +23,7 @@ supervise_start()
# command_args="this \"is a\" test" # command_args="this \"is a\" test"
# to work properly. # to work properly.
eval supervise-daemon --start \ eval supervise-daemon --start \
${retry:+--retry} $retry \
${chroot:+--chroot} $chroot \ ${chroot:+--chroot} $chroot \
${pidfile:+--pidfile} $pidfile \ ${pidfile:+--pidfile} $pidfile \
${respawn_delay:+--respawn-delay} $respawn_delay \ ${respawn_delay:+--respawn-delay} $respawn_delay \

View File

@ -159,7 +159,7 @@ rc-update: rc-update.o _usage.o rc-misc.o
start-stop-daemon: start-stop-daemon.o _usage.o rc-misc.o rc-schedules.o start-stop-daemon: start-stop-daemon.o _usage.o rc-misc.o rc-schedules.o
${CC} ${LOCAL_CFLAGS} ${LOCAL_LDFLAGS} ${CFLAGS} ${LDFLAGS} -o $@ $^ ${LDADD} ${CC} ${LOCAL_CFLAGS} ${LOCAL_LDFLAGS} ${CFLAGS} ${LDFLAGS} -o $@ $^ ${LDADD}
supervise-daemon: supervise-daemon.o _usage.o rc-misc.o supervise-daemon: supervise-daemon.o _usage.o rc-misc.o rc-schedules.o
${CC} ${LOCAL_CFLAGS} ${LOCAL_LDFLAGS} ${CFLAGS} ${LDFLAGS} -o $@ $^ ${LDADD} ${CC} ${LOCAL_CFLAGS} ${LOCAL_LDFLAGS} ${CFLAGS} ${LDFLAGS} -o $@ $^ ${LDADD}
service_get_value service_set_value get_options save_options: do_value.o rc-misc.o service_get_value service_set_value get_options save_options: do_value.o rc-misc.o

View File

@ -61,12 +61,13 @@ static struct pam_conv conv = { NULL, NULL};
#include "queue.h" #include "queue.h"
#include "rc.h" #include "rc.h"
#include "rc-misc.h" #include "rc-misc.h"
#include "rc-schedules.h"
#include "_usage.h" #include "_usage.h"
#include "helpers.h" #include "helpers.h"
const char *applet = NULL; const char *applet = NULL;
const char *extraopts = NULL; const char *extraopts = NULL;
const char *getoptstring = "D:d:e:g:I:Kk:m:N:p:r:Su:1:2:" \ const char *getoptstring = "D:d:e:g:I:Kk:m:N:p:R:r:Su:1:2:" \
getoptstring_COMMON; getoptstring_COMMON;
const struct option longopts[] = { const struct option longopts[] = {
{ "respawn-delay", 1, NULL, 'D'}, { "respawn-delay", 1, NULL, 'D'},
@ -80,6 +81,7 @@ const struct option longopts[] = {
{ "nicelevel", 1, NULL, 'N'}, { "nicelevel", 1, NULL, 'N'},
{ "pidfile", 1, NULL, 'p'}, { "pidfile", 1, NULL, 'p'},
{ "respawn-period", 1, NULL, 'P'}, { "respawn-period", 1, NULL, 'P'},
{ "retry", 1, NULL, 'R'},
{ "chroot", 1, NULL, 'r'}, { "chroot", 1, NULL, 'r'},
{ "start", 0, NULL, 'S'}, { "start", 0, NULL, 'S'},
{ "user", 1, NULL, 'u'}, { "user", 1, NULL, 'u'},
@ -99,6 +101,7 @@ const char * const longopts_help[] = {
"Set a nicelevel when starting", "Set a nicelevel when starting",
"Match pid found in this file", "Match pid found in this file",
"Set respawn time period", "Set respawn time period",
"Retry schedule to use when stopping",
"Chroot to this directory", "Chroot to this directory",
"Start daemon", "Start daemon",
"Change the process user", "Change the process user",
@ -410,6 +413,9 @@ int main(int argc, char **argv)
bool stop = false; bool stop = false;
char *exec = NULL; char *exec = NULL;
char *pidfile = NULL; char *pidfile = NULL;
char *retry = NULL;
int nkilled;
int sig = SIGTERM;
char *home = NULL; char *home = NULL;
int tid = 0; int tid = 0;
pid_t child_pid, pid; pid_t child_pid, pid;
@ -534,6 +540,9 @@ int main(int argc, char **argv)
pidfile = optarg; pidfile = optarg;
break; break;
case 'R': /* --retry <schedule>|timeout */
retry = optarg;
break;
case 'r': /* --chroot /new/root */ case 'r': /* --chroot /new/root */
ch_root = optarg; ch_root = optarg;
break; break;
@ -605,6 +614,10 @@ int main(int argc, char **argv)
"than %d to avoid infinite respawning", applet, "than %d to avoid infinite respawning", applet,
respawn_delay * respawn_max); respawn_delay * respawn_max);
} }
if (retry)
parse_schedule(applet, retry, sig);
else
parse_schedule(applet, NULL, sig);
} }
/* Expand ~ */ /* Expand ~ */
@ -655,9 +668,13 @@ int main(int argc, char **argv)
else else
i = kill(pid, SIGTERM); i = kill(pid, SIGTERM);
if (i != 0) if (i != 0)
/* We failed to stop something */ /* We failed to send the signal */
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
/* wait for the supervisor to go down */
while (kill(pid, 0) == 0)
sleep(1);
/* Even if we have not actually killed anything, we should /* Even if we have not actually killed anything, we should
* remove information about it as it may have unexpectedly * remove information about it as it may have unexpectedly
* crashed out. We should also return success as the end * crashed out. We should also return success as the end
@ -737,7 +754,10 @@ int main(int argc, char **argv)
wait(&i); wait(&i);
if (exiting) { if (exiting) {
syslog(LOG_INFO, "stopping %s, pid %d", exec, child_pid); syslog(LOG_INFO, "stopping %s, pid %d", exec, child_pid);
kill(child_pid, SIGTERM); nkilled = run_stop_schedule(applet, exec, NULL, child_pid,
0, false, false);
if (nkilled > 0)
syslog(LOG_INFO, "killed %d processes", nkilled);
} else { } else {
sleep(respawn_delay); sleep(respawn_delay);
if (respawn_max > 0 && respawn_period > 0) { if (respawn_max > 0 && respawn_period > 0) {