ntpd: don't stay at short polling interval
To avoid polling servers frequently slowly increase the interval up to BIGPOLL when - no replies are received from a peer - no source can be selected - peer claims to be unsynchronized (e.g. we are polling it too frequently) When recv() returns with an error, drop code to try to continue on network errors: I'm not convinced those cases happen in real life. function old new delta recv_and_process_peer_pkt 919 838 -81 Signed-off-by: Miroslav Lichvar <mlichvar@redhat.com> Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
This commit is contained in:
parent
cf76b5ce12
commit
b434ce7069
@ -136,17 +136,17 @@
|
|||||||
#define BURSTPOLL 0 /* initial poll */
|
#define BURSTPOLL 0 /* initial poll */
|
||||||
#define MINPOLL 5 /* minimum poll interval. std ntpd uses 6 (6: 64 sec) */
|
#define MINPOLL 5 /* minimum poll interval. std ntpd uses 6 (6: 64 sec) */
|
||||||
/*
|
/*
|
||||||
* If offset > discipline_jitter * POLLADJ_GATE, and poll interval is >= 2^BIGPOLL,
|
* If offset > discipline_jitter * POLLADJ_GATE, and poll interval is > 2^BIGPOLL,
|
||||||
* then it is decreased _at once_. (If < 2^BIGPOLL, it will be decreased _eventually_).
|
* then it is decreased _at once_. (If <= 2^BIGPOLL, it will be decreased _eventually_).
|
||||||
*/
|
*/
|
||||||
#define BIGPOLL 10 /* 2^10 sec ~= 17 min */
|
#define BIGPOLL 9 /* 2^9 sec ~= 8.5 min */
|
||||||
#define MAXPOLL 12 /* maximum poll interval (12: 1.1h, 17: 36.4h). std ntpd uses 17 */
|
#define MAXPOLL 12 /* maximum poll interval (12: 1.1h, 17: 36.4h). std ntpd uses 17 */
|
||||||
/*
|
/*
|
||||||
* Actively lower poll when we see such big offsets.
|
* Actively lower poll when we see such big offsets.
|
||||||
* With STEP_THRESHOLD = 0.125, it means we try to sync more aggressively
|
* With STEP_THRESHOLD = 0.125, it means we try to sync more aggressively
|
||||||
* if offset increases over ~0.04 sec
|
* if offset increases over ~0.04 sec
|
||||||
*/
|
*/
|
||||||
#define POLLDOWN_OFFSET (STEP_THRESHOLD / 3)
|
//#define POLLDOWN_OFFSET (STEP_THRESHOLD / 3)
|
||||||
#define MINDISP 0.01 /* minimum dispersion (sec) */
|
#define MINDISP 0.01 /* minimum dispersion (sec) */
|
||||||
#define MAXDISP 16 /* maximum dispersion (sec) */
|
#define MAXDISP 16 /* maximum dispersion (sec) */
|
||||||
#define MAXSTRAT 16 /* maximum stratum (infinity metric) */
|
#define MAXSTRAT 16 /* maximum stratum (infinity metric) */
|
||||||
@ -984,8 +984,8 @@ static void clamp_pollexp_and_set_MAXSTRAT(void)
|
|||||||
{
|
{
|
||||||
if (G.poll_exp < MINPOLL)
|
if (G.poll_exp < MINPOLL)
|
||||||
G.poll_exp = MINPOLL;
|
G.poll_exp = MINPOLL;
|
||||||
if (G.poll_exp >= BIGPOLL)
|
if (G.poll_exp > BIGPOLL)
|
||||||
G.poll_exp = BIGPOLL - 1;
|
G.poll_exp = BIGPOLL;
|
||||||
G.polladj_count = 0;
|
G.polladj_count = 0;
|
||||||
G.stratum = MAXSTRAT;
|
G.stratum = MAXSTRAT;
|
||||||
}
|
}
|
||||||
@ -1682,7 +1682,7 @@ poll_interval(int upper_bound)
|
|||||||
VERB4 bb_error_msg("chose poll interval:%u (poll_exp:%d)", interval, G.poll_exp);
|
VERB4 bb_error_msg("chose poll interval:%u (poll_exp:%d)", interval, G.poll_exp);
|
||||||
return interval;
|
return interval;
|
||||||
}
|
}
|
||||||
static NOINLINE void
|
static void
|
||||||
adjust_poll(int count)
|
adjust_poll(int count)
|
||||||
{
|
{
|
||||||
G.polladj_count += count;
|
G.polladj_count += count;
|
||||||
@ -1693,7 +1693,7 @@ adjust_poll(int count)
|
|||||||
VERB4 bb_error_msg("polladj: discipline_jitter:%f ++poll_exp=%d",
|
VERB4 bb_error_msg("polladj: discipline_jitter:%f ++poll_exp=%d",
|
||||||
G.discipline_jitter, G.poll_exp);
|
G.discipline_jitter, G.poll_exp);
|
||||||
}
|
}
|
||||||
} else if (G.polladj_count < -POLLADJ_LIMIT || (count < 0 && G.poll_exp >= BIGPOLL)) {
|
} else if (G.polladj_count < -POLLADJ_LIMIT || (count < 0 && G.poll_exp > BIGPOLL)) {
|
||||||
G.polladj_count = 0;
|
G.polladj_count = 0;
|
||||||
if (G.poll_exp > MINPOLL) {
|
if (G.poll_exp > MINPOLL) {
|
||||||
llist_t *item;
|
llist_t *item;
|
||||||
@ -1736,19 +1736,23 @@ recv_and_process_peer_pkt(peer_t *p)
|
|||||||
* ntp servers reply from their *other IP*.
|
* ntp servers reply from their *other IP*.
|
||||||
* TODO: maybe we should check at least what we can: from.port == 123?
|
* TODO: maybe we should check at least what we can: from.port == 123?
|
||||||
*/
|
*/
|
||||||
|
recv_again:
|
||||||
size = recv(p->p_fd, &msg, sizeof(msg), MSG_DONTWAIT);
|
size = recv(p->p_fd, &msg, sizeof(msg), MSG_DONTWAIT);
|
||||||
if (size == -1) {
|
if (size < 0) {
|
||||||
bb_perror_msg("recv(%s) error", p->p_dotted);
|
if (errno == EINTR)
|
||||||
if (errno == EHOSTUNREACH || errno == EHOSTDOWN
|
/* Signal caught */
|
||||||
|| errno == ENETUNREACH || errno == ENETDOWN
|
goto recv_again;
|
||||||
|| errno == ECONNREFUSED || errno == EADDRNOTAVAIL
|
if (errno == EAGAIN)
|
||||||
|| errno == EAGAIN
|
/* There was no packet after all
|
||||||
) {
|
* (poll() returning POLLIN for a fd
|
||||||
//TODO: always do this?
|
* is not a ironclad guarantee that data is there)
|
||||||
interval = poll_interval(RETRY_INTERVAL);
|
*/
|
||||||
goto set_next_and_ret;
|
return;
|
||||||
}
|
/*
|
||||||
xfunc_die();
|
* If you need a different handling for a specific
|
||||||
|
* errno, always explain it in comment.
|
||||||
|
*/
|
||||||
|
bb_perror_msg_and_die("recv(%s) error", p->p_dotted);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (size != NTP_MSGSIZE_NOAUTH && size != NTP_MSGSIZE) {
|
if (size != NTP_MSGSIZE_NOAUTH && size != NTP_MSGSIZE) {
|
||||||
@ -1774,10 +1778,15 @@ recv_and_process_peer_pkt(peer_t *p)
|
|||||||
|| msg.m_stratum == 0
|
|| msg.m_stratum == 0
|
||||||
|| msg.m_stratum > NTP_MAXSTRATUM
|
|| msg.m_stratum > NTP_MAXSTRATUM
|
||||||
) {
|
) {
|
||||||
// TODO: stratum 0 responses may have commands in 32-bit m_refid field:
|
|
||||||
// "DENY", "RSTR" - peer does not like us at all
|
|
||||||
// "RATE" - peer is overloaded, reduce polling freq
|
|
||||||
bb_error_msg("reply from %s: peer is unsynced", p->p_dotted);
|
bb_error_msg("reply from %s: peer is unsynced", p->p_dotted);
|
||||||
|
/*
|
||||||
|
* Stratum 0 responses may have commands in 32-bit m_refid field:
|
||||||
|
* "DENY", "RSTR" - peer does not like us at all,
|
||||||
|
* "RATE" - peer is overloaded, reduce polling freq.
|
||||||
|
* If poll interval is small, increase it.
|
||||||
|
*/
|
||||||
|
if (G.poll_exp < BIGPOLL)
|
||||||
|
goto increase_interval;
|
||||||
goto pick_normal_interval;
|
goto pick_normal_interval;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1866,11 +1875,19 @@ recv_and_process_peer_pkt(peer_t *p)
|
|||||||
/* Muck with statictics and update the clock */
|
/* Muck with statictics and update the clock */
|
||||||
filter_datapoints(p);
|
filter_datapoints(p);
|
||||||
q = select_and_cluster();
|
q = select_and_cluster();
|
||||||
rc = -1;
|
rc = 0;
|
||||||
if (q) {
|
if (q) {
|
||||||
rc = 0;
|
|
||||||
if (!(option_mask32 & OPT_w)) {
|
if (!(option_mask32 & OPT_w)) {
|
||||||
rc = update_local_clock(q);
|
rc = update_local_clock(q);
|
||||||
|
#if 0
|
||||||
|
//Disabled this because there is a case where largish offsets
|
||||||
|
//are unavoidable: if network round-trip delay is, say, ~0.6s,
|
||||||
|
//error in offset estimation would be ~delay/2 ~= 0.3s.
|
||||||
|
//Thus, offsets will be usually in -0.3...0.3s range.
|
||||||
|
//In this case, this code would keep poll interval small,
|
||||||
|
//but it won't be helping.
|
||||||
|
//BIGOFF check below deals with a case of seeing multi-second offsets.
|
||||||
|
|
||||||
/* If drift is dangerously large, immediately
|
/* If drift is dangerously large, immediately
|
||||||
* drop poll interval one step down.
|
* drop poll interval one step down.
|
||||||
*/
|
*/
|
||||||
@ -1879,9 +1896,15 @@ recv_and_process_peer_pkt(peer_t *p)
|
|||||||
adjust_poll(-POLLADJ_LIMIT * 3);
|
adjust_poll(-POLLADJ_LIMIT * 3);
|
||||||
rc = 0;
|
rc = 0;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
/* No peer selected.
|
||||||
|
* If poll interval is small, increase it.
|
||||||
|
*/
|
||||||
|
if (G.poll_exp < BIGPOLL)
|
||||||
|
goto increase_interval;
|
||||||
}
|
}
|
||||||
/* else: no peer selected, rc = -1: we want to poll more often */
|
|
||||||
|
|
||||||
if (rc != 0) {
|
if (rc != 0) {
|
||||||
/* Adjust the poll interval by comparing the current offset
|
/* Adjust the poll interval by comparing the current offset
|
||||||
@ -1893,6 +1916,7 @@ recv_and_process_peer_pkt(peer_t *p)
|
|||||||
if (rc > 0 && G.offset_to_jitter_ratio <= POLLADJ_GATE) {
|
if (rc > 0 && G.offset_to_jitter_ratio <= POLLADJ_GATE) {
|
||||||
/* was += G.poll_exp but it is a bit
|
/* was += G.poll_exp but it is a bit
|
||||||
* too optimistic for my taste at high poll_exp's */
|
* too optimistic for my taste at high poll_exp's */
|
||||||
|
increase_interval:
|
||||||
adjust_poll(MINPOLL);
|
adjust_poll(MINPOLL);
|
||||||
} else {
|
} else {
|
||||||
adjust_poll(-G.poll_exp * 2);
|
adjust_poll(-G.poll_exp * 2);
|
||||||
@ -1917,7 +1941,6 @@ recv_and_process_peer_pkt(peer_t *p)
|
|||||||
interval = BIGOFF_INTERVAL;
|
interval = BIGOFF_INTERVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
set_next_and_ret:
|
|
||||||
set_next(p, interval);
|
set_next(p, interval);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2252,6 +2275,9 @@ int ntpd_main(int argc UNUSED_PARAM, char **argv)
|
|||||||
/* Timed out waiting for reply */
|
/* Timed out waiting for reply */
|
||||||
close(p->p_fd);
|
close(p->p_fd);
|
||||||
p->p_fd = -1;
|
p->p_fd = -1;
|
||||||
|
/* If poll interval is small, increase it */
|
||||||
|
if (G.poll_exp < BIGPOLL)
|
||||||
|
adjust_poll(MINPOLL);
|
||||||
timeout = poll_interval(NOREPLY_INTERVAL);
|
timeout = poll_interval(NOREPLY_INTERVAL);
|
||||||
bb_error_msg("timed out waiting for %s, reach 0x%02x, next query in %us",
|
bb_error_msg("timed out waiting for %s, reach 0x%02x, next query in %us",
|
||||||
p->p_dotted, p->reachable_bits, timeout);
|
p->p_dotted, p->reachable_bits, timeout);
|
||||||
|
Loading…
Reference in New Issue
Block a user