Fix #36: retry DNS lookup of remote syslog servers with res_init()

This patch replaces the INET_SUSPEND_TIME for DNS lookup with a 5 sec
back-off to prevent DNS lookup on each message.

Also, reorder WARN() and NOTE() so they are called *after* setting the
f_type, otherwise we unleash endless recursive loops.

To avoid filling up the log with "Failed resolving ..." messages every
time we retry, we set a flag to remember we've already logged warning.

Signed-off-by: Joachim Wiberg <troglobit@gmail.com>
This commit is contained in:
Joachim Wiberg 2021-11-27 20:06:02 +01:00
parent 0a0380cbdd
commit f516ff6023
2 changed files with 28 additions and 16 deletions

View File

@ -170,6 +170,7 @@ void wallmsg(struct filed *f, struct iovec *iov, int iovcnt);
void reapchild(); void reapchild();
const char *cvtaddr(struct sockaddr_storage *f, int len); const char *cvtaddr(struct sockaddr_storage *f, int len);
const char *cvthname(struct sockaddr *f, socklen_t len); const char *cvthname(struct sockaddr *f, socklen_t len);
static void forw_lookup(struct filed *f);
void domark(void *arg); void domark(void *arg);
void doflush(void *arg); void doflush(void *arg);
void debug_switch(); void debug_switch();
@ -700,6 +701,9 @@ static int nslookup(const char *host, const char *service, struct addrinfo **ai)
if (!node || !node[0]) if (!node || !node[0])
node = NULL; node = NULL;
/* Reset resolver cache and retry name lookup */
res_init();
logit("nslookup '%s:%s'\n", node ?: "none", service); logit("nslookup '%s:%s'\n", node ?: "none", service);
memset(&hints, 0, sizeof(hints)); memset(&hints, 0, sizeof(hints));
hints.ai_flags = !node ? AI_PASSIVE : 0; hints.ai_flags = !node ? AI_PASSIVE : 0;
@ -1579,7 +1583,10 @@ void fprintlog_write(struct filed *f, struct iovec *iov, int iovcnt, int flags)
break; break;
case F_FORW_UNKN: case F_FORW_UNKN:
/* nslookup retry handled by domark() timer */ logit("\n");
forw_lookup(f);
if (f->f_type == F_FORW)
goto f_forw;
break; break;
case F_FORW: case F_FORW:
@ -1839,7 +1846,7 @@ static void fprintlog_first(struct filed *f, struct buf_msg *buffer)
/* Messages generated by syslogd itself may not have a timestamp */ /* Messages generated by syslogd itself may not have a timestamp */
check_timestamp(buffer); check_timestamp(buffer);
if (f->f_type != F_FORW_SUSP) { if (f->f_type != F_FORW_SUSP && f->f_type != F_FORW_UNKN) {
f->f_time = timer_now(); f->f_time = timer_now();
f->f_prevcount = 0; f->f_prevcount = 0;
} }
@ -2090,39 +2097,42 @@ void flog(int pri, char *fmt, ...)
static void forw_lookup(struct filed *f) static void forw_lookup(struct filed *f)
{ {
struct addrinfo *ai;
time_t diff;
char *host = f->f_un.f_forw.f_hname; char *host = f->f_un.f_forw.f_hname;
char *serv = f->f_un.f_forw.f_serv; char *serv = f->f_un.f_forw.f_serv;
struct addrinfo *ai;
int err, first; int err, first;
time_t diff;
/* Called from cfline() for initial lookup? */ /* Called from cfline() for initial lookup? */
first = f->f_type == F_UNUSED ? 1 : 0; first = f->f_type == F_UNUSED ? 1 : 0;
/*
* Not INET_SUSPEND_TIME, but back off a few seconds at least
* to prevent syslogd from hammering the resolver for every
* little message that is logged. E.g., at boot when we read
* the kernel ring buffer.
*/
diff = timer_now() - f->f_time; diff = timer_now() - f->f_time;
if (!first && diff < INET_SUSPEND_TIME) { if (!first && diff < 5)
logit("Forwarding suspension not over, time left: %d\n",
(int)(INET_SUSPEND_TIME - diff));
return; return;
}
if (!first)
logit("Forwarding suspension to %s:%s over, retrying\n", host, serv);
err = nslookup(host, serv, &ai); err = nslookup(host, serv, &ai);
if (err) { if (err) {
WARN("Failed resolving '%s:%s': %s", host, serv, gai_strerror(err));
f->f_type = F_FORW_UNKN; f->f_type = F_FORW_UNKN;
f->f_time = timer_now(); f->f_time = timer_now();
if (!first && !(f->f_flags & SUSP_RETR))
WARN("Failed resolving '%s:%s': %s", host, serv, gai_strerror(err));
f->f_flags |= SUSP_RETR; /* Retry silently */
return; return;
} }
if (!first) f->f_flags &= ~SUSP_RETR;
NOTE("Successfully resolved '%s:%s', resuming operation.", host, serv);
f->f_type = F_FORW; f->f_type = F_FORW;
f->f_un.f_forw.f_addr = ai; f->f_un.f_forw.f_addr = ai;
f->f_prevcount = 0; f->f_prevcount = 0;
if (!first)
NOTE("Successfully resolved '%s:%s', initiating forwarding.", host, serv);
} }
void domark(void *arg) void domark(void *arg)
@ -2144,7 +2154,8 @@ void doflush(void *arg)
SIMPLEQ_FOREACH(f, &fhead, f_link) { SIMPLEQ_FOREACH(f, &fhead, f_link) {
if (f->f_type == F_FORW_UNKN) { if (f->f_type == F_FORW_UNKN) {
forw_lookup(f); forw_lookup(f);
continue; if (f->f_type != F_FORW)
continue;
} }
if (f->f_prevcount && timer_now() >= REPEATTIME(f)) { if (f->f_prevcount && timer_now() >= REPEATTIME(f)) {

View File

@ -183,6 +183,7 @@
#define MARK 0x008 /* this message is a mark */ #define MARK 0x008 /* this message is a mark */
#define RFC3164 0x010 /* format log message according to RFC 3164 */ #define RFC3164 0x010 /* format log message according to RFC 3164 */
#define RFC5424 0x020 /* format log message according to RFC 5424 */ #define RFC5424 0x020 /* format log message according to RFC 5424 */
#define SUSP_RETR 0x040 /* suspend/forw_unkn, retrying nslookup */
/* Syslog timestamp formats. */ /* Syslog timestamp formats. */
#define BSDFMT_DATELEN 0 #define BSDFMT_DATELEN 0