From ee53e6ad29d246c76b636cda72ad4834929ada43 Mon Sep 17 00:00:00 2001 From: Roy Marples Date: Sun, 19 Apr 2009 18:20:49 +0000 Subject: [PATCH] -w, --wait seconds New option so you can set the number of seconds to wait for and check the daemon is still running. The old code that periodically polled wasn't that reliable - in this case simpler == better. Fixes #160 --- man/start-stop-daemon.8 | 11 +++- src/rc/start-stop-daemon.c | 111 ++++++++++++++++--------------------- 2 files changed, 56 insertions(+), 66 deletions(-) diff --git a/man/start-stop-daemon.8 b/man/start-stop-daemon.8 index 88f542c7..1b1f664c 100644 --- a/man/start-stop-daemon.8 +++ b/man/start-stop-daemon.8 @@ -22,7 +22,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd January 1, 2009 +.Dd April 19, 2009 .Dt START-STOP-DAEMON 8 SMM .Os OpenRC .Sh NAME @@ -140,6 +140,12 @@ Redirect the standard output of the process to logfile when started with Must be an absolute pathname, but relative to the path optionally given with .Fl r , -chroot . The logfile can also be a named pipe. +.It Fl w , -wait Ar seconds +Wait +.Ar seconds +after starting and check that daemon is still running. +Useful for daemons that check configuration after forking or stopping race +conditions where the pidfile is written out after forking. .It Fl 2 , -stderr Ar logfile The same thing as .Fl 1 , -stdout @@ -160,7 +166,8 @@ can also set the scheduling priority of the daemon, but the command line option takes precedence. .Pp .Va SSD_STARTWAIT -overrides the number of milliseconds set in +As the +.Fl w , -wait option above. .Pa /etc/rc.conf .Nm waits for to check the daemon is still running. diff --git a/src/rc/start-stop-daemon.c b/src/rc/start-stop-daemon.c index 53de7bd2..90e25747 100644 --- a/src/rc/start-stop-daemon.c +++ b/src/rc/start-stop-daemon.c @@ -549,7 +549,7 @@ expand_home(const char *home, const char *path) } #include "_usage.h" -#define getoptstring "KN:R:Sbc:d:e:g:k:mn:op:s:tu:r:x:1:2:" getoptstring_COMMON +#define getoptstring "KN:R:Sbc:d:e:g:k:mn:op:s:tu:r:w:x:1:2:" getoptstring_COMMON static const struct option longopts[] = { { "stop", 0, NULL, 'K'}, { "nicelevel", 1, NULL, 'N'}, @@ -570,6 +570,7 @@ static const struct option longopts[] = { { "test", 0, NULL, 't'}, { "user", 1, NULL, 'u'}, { "chroot", 1, NULL, 'r'}, + { "wait", 1, NULL, 'w'}, { "exec", 1, NULL, 'x'}, { "stdout", 1, NULL, '1'}, { "stderr", 1, NULL, '2'}, @@ -595,6 +596,7 @@ static const char * const longopts_help[] = { "Test actions, don't do them", "Change the process user", "Chroot to this directory", + "Seconds to wait for daemon start", "Binary to start/stop", "Redirect stdout to file", "Redirect stderr to file", @@ -807,6 +809,11 @@ int start_stop_daemon(int argc, char **argv) case 'a': /* --startas */ startas = optarg; break; + case 'w': + if (sscanf(optarg, "%d", &start_wait) != 1) + eerrorx("%s: `%s' not a number", + applet, optarg); + break; case 'x': /* --exec */ exec = optarg; break; @@ -1030,6 +1037,10 @@ int start_stop_daemon(int argc, char **argv) eindent(); } + /* Remove existing pidfile */ + if (pidfile) + unlink(pidfile); + if (background) signal_setup(SIGCHLD, handle_signal); @@ -1216,78 +1227,50 @@ int start_stop_daemon(int argc, char **argv) /* Wait a little bit and check that process is still running We do this as some badly written daemons fork and then barf */ - if ((p = getenv("SSD_STARTWAIT")) || - (p = rc_conf_value("rc_start_wait"))) + if (start_wait == 0 && + ((p = getenv("SSD_STARTWAIT")) || + (p = rc_conf_value("rc_start_wait")))) { - if (sscanf(p, "%u", &start_wait) == 1) - start_wait *= 1000000; - else + if (sscanf(p, "%u", &start_wait) != 1) start_wait = 0; } if (start_wait > 0) { struct timespec ts; - int nloops = start_wait / POLL_INTERVAL; - int nloopsp = WAIT_PIDFILE / POLL_INTERVAL; bool alive = false; - - ts.tv_sec = 0; - ts.tv_nsec = POLL_INTERVAL; - - while (nloops) { - if (nanosleep(&ts, NULL) == -1) { - if (errno == EINTR) - eerror("%s: caught an interrupt", applet); - else { - eerror("%s: nanosleep: %s", applet, strerror(errno)); - return 0; - } + + ts.tv_sec = start_wait; + ts.tv_nsec = 0; + if (nanosleep(&ts, NULL) == -1) { + if (errno == EINTR) + eerror("%s: caught an interrupt", applet); + else { + eerror("%s: nanosleep: %s", + applet, strerror(errno)); + return 0; } - - /* We wait for a specific amount of time for a pidfile - * to be created. - * Once everything is in place we then wait some more - * to ensure that the daemon really is running and won't - * abort due to a config error. */ - if (!background && pidfile && nloopsp) - nloopsp--; - else - nloops--; - - /* This is knarly. - * If we backgrounded then we know the exact pid. - * Otherwise if we have a pidfile then it *may* know - * the exact pid. - * Failing that, we'll have to query processes. - * We sleep first as some programs like ntp like to - * fork, and write their pidfile a LONG time later. */ - if (background) { - if (kill (pid, 0) == 0) - alive = true; - } else { - if (pidfile) { - /* The pidfile may not have been - * written yet - give it some time */ - if ((pid = get_pid(pidfile, true)) == -1) { - if (! nloopsp) - eerrorx("%s: did not " - "create a valid" - " pid in `%s'", - applet, pidfile); - alive = true; - pid = 0; - } else - nloopsp = 0; - } else - pid = 0; - if (do_stop(exec, (const char *const *)margv, - pid, uid, 0, true, false, true) > 0) - alive = true; - } - - if (!alive) - eerrorx("%s: %s died", applet, exec); } + if (background) { + if (kill(pid, 0) == 0) + alive = true; + } else { + if (pidfile) { + pid = get_pid(pidfile, true); + if (pid == -1) { + eerrorx("%s: did not " + "create a valid" + " pid in `%s'", + applet, pidfile); + } + } else + pid = 0; + if (do_stop(exec, (const char *const *)margv, + pid, uid, 0, true, false, true) > 0) + alive = true; + } + + if (!alive) + eerrorx("%s: %s died", applet, exec); } if (svcname)