diff --git a/docs/ares_init_options.3 b/docs/ares_init_options.3 index 3d983ebe..f2bb8c8d 100644 --- a/docs/ares_init_options.3 +++ b/docs/ares_init_options.3 @@ -136,21 +136,21 @@ servers available. Instead, fail initialization with \fIARES_ENOSERVER\fP. .B int \fItimeout\fP; .br The number of seconds each name server is given to respond to a query on the -first try. (After the first try, the timeout algorithm becomes more -complicated, but scales linearly with the value of \fItimeout\fP.) The -default is two seconds. This option is being deprecated by -\fIARES_OPT_TIMEOUTMS\fP starting in c-ares 1.5.2. +first try. See \fIARES_OPT_TIMEOUTMS\fP which this value is converted into. .TP 18 .B ARES_OPT_TIMEOUTMS .B int \fItimeout\fP; .br The number of milliseconds each name server is given to respond to a query on -the first try. (After the first try, the timeout algorithm becomes more -complicated, but scales linearly with the value of \fItimeout\fP.) The -default is two seconds. Note that this option is specified with the same -struct field as the former \fIARES_OPT_TIMEOUT\fP, it is but the option bits -that tell c-ares how to interpret the number. This option was added in c-ares -1.5.2. +the first try of any given server. The default is two seconds, however any +value below 250ms will automatically be set to 250ms (roughly the RTT half-way +around the world). Note that this option is specified with the same struct field +as the former \fIARES_OPT_TIMEOUT\fP, it is but the option bits that tell c-ares +how to interpret the number. This option was added in c-ares 1.5.2. + +As of c-ares 1.32.0, this option is only honored on the first successful query +to any given server, after that the timeout is automatically calculated based +on prior query history. .TP 18 .B ARES_OPT_TRIES .B int \fItries\fP; diff --git a/src/lib/Makefile.inc b/src/lib/Makefile.inc index 242f31b0..e6bfbdea 100644 --- a/src/lib/Makefile.inc +++ b/src/lib/Makefile.inc @@ -5,7 +5,7 @@ CSOURCES = ares__addrinfo2hostent.c \ ares__addrinfo_localhost.c \ ares__buf.c \ ares__close_sockets.c \ - ares__hosts_file.c \ + ares__hosts_file.c \ ares__htable.c \ ares__htable_asvp.c \ ares__htable_strvp.c \ @@ -28,7 +28,7 @@ CSOURCES = ares__addrinfo2hostent.c \ ares_dns_parse.c \ ares_dns_record.c \ ares_dns_write.c \ - ares_event_configchg.c \ + ares_event_configchg.c \ ares_event_epoll.c \ ares_event_kqueue.c \ ares_event_poll.c \ @@ -50,7 +50,8 @@ CSOURCES = ares__addrinfo2hostent.c \ ares_getsock.c \ ares_init.c \ ares_library_init.c \ - ares_math.c \ + ares_math.c \ + ares_metrics.c \ ares_create_query.c \ ares_options.c \ ares_parse_a_reply.c \ @@ -66,7 +67,7 @@ CSOURCES = ares__addrinfo2hostent.c \ ares_parse_uri_reply.c \ ares_platform.c \ ares_process.c \ - ares_qcache.c \ + ares_qcache.c \ ares_query.c \ ares_rand.c \ ares_search.c \ @@ -75,12 +76,12 @@ CSOURCES = ares__addrinfo2hostent.c \ ares_str.c \ ares_strerror.c \ ares_strsplit.c \ - ares_sysconfig.c \ - ares_sysconfig_files.c \ - ares_sysconfig_mac.c \ - ares_sysconfig_win.c \ + ares_sysconfig.c \ + ares_sysconfig_files.c \ + ares_sysconfig_mac.c \ + ares_sysconfig_win.c \ ares_timeout.c \ - ares_update_servers.c \ + ares_update_servers.c \ ares_version.c \ inet_net_pton.c \ inet_ntop.c \ diff --git a/src/lib/ares_metrics.c b/src/lib/ares_metrics.c new file mode 100644 index 00000000..2250664d --- /dev/null +++ b/src/lib/ares_metrics.c @@ -0,0 +1,260 @@ +/* MIT License + * + * Copyright (c) 2024 Brad House + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * SPDX-License-Identifier: MIT + */ + + +/* IMPLEMENTATION NOTES + * ==================== + * + * With very little effort we should be able to determine fairly proper timeouts + * we can use based on prior query history. We track in order to be able to + * auto-scale when network conditions change (e.g. maybe there is a provider + * failover and timings change due to that). Apple appears to do this within + * their system resolver in MacOS. Obviously we should have a minimum, maximum, + * and initial value to make sure the algorithm doesn't somehow go off the + * rails. + * + * Values: + * - Minimum Timeout: 250ms (approximate RTT half-way around the globe) + * - Maximum Timeout: 5000ms (Recommended timeout in RFC 1123), can be reduced + * by ARES_OPT_MAXTIMEOUTMS, but otherwise the bound specified by the option + * caps the retry timeout. + * - Initial Timeout: User-specified via configuration or ARES_OPT_TIMEOUTMS + * - Average latency multiplier: 5x (a local DNS server returning a cached value + * will be quicker than if it needs to recurse so we need to account for this) + * - Minimum Count for Average: 3. This is the minimum number of queries we + * need to form an average for the bucket. + * + * Per-server buckets for tracking latency over time (these are ephemeral + * meaning they don't persist once a channel is destroyed). We record both the + * current timespan for the bucket and the immediate preceding timespan in case + * of roll-overs we can still maintain recent metrics for calculations: + * - 1 minute + * - 15 minutes + * - 1 hr + * - 1 day + * - since inception + * + * Each bucket would contain: + * - timestamp (divided by interval) + * - minimum latency + * - maximum latency + * - total time + * - count + * NOTE: average latency is (total time / count), we will calculate this + * dynamically when needed + * + * Basic algorithm for calculating timeout to use would be: + * - Scan from most recent bucket to least recent + * - Check timestamp of bucket, if doesn't match current time, continue to next + * bucket + * - Check count of bucket, if its not at least the "Minimum Count for Average", + * check the previous bucket, otherwise continue to next bucket + * - If we reached the end with no bucket match, use "Initial Timeout" + * - If bucket is selected, take ("total time" / count) as Average latency, + * multiply by "Average Latency Multiplier", bound by "Minimum Timeout" and + * "Maximum Timeout" + * NOTE: The timeout calculated may not be the timeout used. If we are retrying + * the query on the same server another time, then it will use a larger value + * + * On each query reply where the response is legitimate (proper response or + * NXDOMAIN) and not something like a server error: + * - Cycle through each bucket in order + * - Check timestamp of bucket against current timestamp, if out of date + * overwrite previous entry with values, clear current values + * - Compare current minimum and maximum recorded latency against query time and + * adjust if necessary + * - Increment "count" by 1 and "total time" by the query time + * + * Other Notes: + * - This is always-on, the only user-configurable value is the initial + * timeout which will simply re-uses the current option. + * - Minimum and Maximum latencies for a bucket are currently unused but are + * there in case we find a need for them in the future. + */ + +#include "ares_setup.h" +#include "ares.h" +#include "ares_private.h" + +/*! Minimum timeout value. Chosen due to it being approximately RTT half-way + * around the world */ +#define MIN_TIMEOUT_MS 250 + +/*! Multiplier to apply to average latency to come up with an initial timeout */ +#define AVG_TIMEOUT_MULTIPLIER 5 + +/*! Upper timeout bounds, only used if channel->maxtimeout not set */ +#define MAX_TIMEOUT_MS 5000 + +/*! Minimum queries required to form an average */ +#define MIN_COUNT_FOR_AVERAGE 3 + +static time_t ares_metric_timestamp(ares_server_bucket_t bucket, + const ares_timeval_t *now, + ares_bool_t is_previous) +{ + time_t divisor = 1; /* Silence bogus MSVC warning by setting default value */ + + switch (bucket) { + case ARES_METRIC_1MINUTE: + divisor = 60; + break; + case ARES_METRIC_15MINUTES: + divisor = 15 * 60; + break; + case ARES_METRIC_1HOUR: + divisor = 60 * 60; + break; + case ARES_METRIC_1DAY: + divisor = 24 * 60 * 60; + break; + case ARES_METRIC_INCEPTION: + return is_previous?0:1; + case ARES_METRIC_COUNT: + return 0; /* Invalid! */ + } + + if (is_previous) { + if (divisor >= now->sec) { + return 0; + } + return (time_t)((now->sec - divisor) / divisor); + } + + return (time_t)(now->sec / divisor); +} + +void ares_metrics_record(const struct query *query, struct server_state *server, + ares_status_t status, const ares_dns_record_t *dnsrec) +{ + ares_timeval_t now = ares__tvnow(); + ares_timeval_t tvdiff; + unsigned int query_ms; + ares_dns_rcode_t rcode; + ares_server_bucket_t i; + + if (status != ARES_SUCCESS) { + return; + } + + if (server == NULL) { + return; + } + + rcode = ares_dns_record_get_rcode(dnsrec); + if (rcode != ARES_RCODE_NOERROR && rcode != ARES_RCODE_NXDOMAIN) { + return; + } + + ares__timeval_diff(&tvdiff, &query->ts, &now); + query_ms = (unsigned int)(tvdiff.sec + (tvdiff.usec / 1000)); + if (query_ms == 0) { + query_ms = 1; + } + + /* Place in each bucket */ + for (i=0; imetrics[i].ts) { + server->metrics[i].prev_ts = server->metrics[i].ts; + server->metrics[i].prev_total_ms = server->metrics[i].total_ms; + server->metrics[i].prev_total_count = server->metrics[i].total_count; + server->metrics[i].ts = ts; + server->metrics[i].latency_min_ms = 0; + server->metrics[i].latency_max_ms = 0; + server->metrics[i].total_ms = 0; + server->metrics[i].total_count = 0; + } + + if (server->metrics[i].latency_min_ms == 0 || + server->metrics[i].latency_min_ms > query_ms) { + server->metrics[i].latency_min_ms = query_ms; + } + + if (query_ms > server->metrics[i].latency_max_ms) { + server->metrics[i].latency_min_ms = query_ms; + } + + server->metrics[i].total_count++; + server->metrics[i].total_ms += (ares_uint64_t)query_ms; + } +} + +size_t ares_metrics_server_timeout(const struct server_state *server, + const ares_timeval_t *now) +{ + const ares_channel_t *channel = server->channel; + ares_server_bucket_t i; + size_t timeout_ms = 0; + + + for (i=0; imetrics[i].ts || + server->metrics[i].total_count < MIN_COUNT_FOR_AVERAGE) { + time_t prev_ts = ares_metric_timestamp(i, now, ARES_TRUE); + if (prev_ts != server->metrics[i].prev_ts || + server->metrics[i].prev_total_count < MIN_COUNT_FOR_AVERAGE) { + /* Move onto next bucket */ + continue; + } + /* Calculate average time for previous bucket */ + timeout_ms = (size_t)(server->metrics[i].prev_total_ms / server->metrics[i].prev_total_count); + } else { + /* Calculate average time for current bucket*/ + timeout_ms = (size_t)(server->metrics[i].total_ms / server->metrics[i].total_count); + } + + /* Multiply average by constant to get timeout value */ + timeout_ms *= AVG_TIMEOUT_MULTIPLIER; + break; + } + + /* If we're here, that means its the first query for the server, so we just + * use the initial default timeout */ + if (timeout_ms == 0) { + timeout_ms = channel->timeout; + } + + /* don't go below lower bounds */ + if (timeout_ms < MIN_TIMEOUT_MS) { + timeout_ms = MIN_TIMEOUT_MS; + } + + /* don't go above upper bounds */ + if (channel->maxtimeout && timeout_ms > channel->maxtimeout) { + timeout_ms = (size_t)channel->maxtimeout; + } else if (timeout_ms > MAX_TIMEOUT_MS) { + timeout_ms = MAX_TIMEOUT_MS; + } + + return timeout_ms; +} diff --git a/src/lib/ares_private.h b/src/lib/ares_private.h index 7f5680d2..1fde90da 100644 --- a/src/lib/ares_private.h +++ b/src/lib/ares_private.h @@ -170,6 +170,29 @@ typedef struct { unsigned int usec; /*!< Microseconds. Can't be negative. */ } ares_timeval_t; +/*! Various buckets for grouping history */ +typedef enum { + ARES_METRIC_1MINUTE = 0, /*!< Bucket for tracking over the last minute */ + ARES_METRIC_15MINUTES, /*!< Bucket for tracking over the last 15 minutes */ + ARES_METRIC_1HOUR, /*!< Bucket for tracking over the last hour */ + ARES_METRIC_1DAY, /*!< Bucket for tracking over the last day */ + ARES_METRIC_INCEPTION, /*!< Bucket for tracking since inception */ + ARES_METRIC_COUNT /*!< Count of buckets, not a real bucket */ +} ares_server_bucket_t; + +/*! Data metrics collected for each bucket */ +typedef struct { + time_t ts; /*!< Timestamp divided by bucket divisor */ + unsigned int latency_min_ms; /*!< Minimum latency for queries */ + unsigned int latency_max_ms; /*!< Maximum latency for queries */ + ares_uint64_t total_ms; /*!< Cumulative query time for bucket */ + ares_uint64_t total_count; /*!< Number of queries for bucket */ + + time_t prev_ts; /*!< Previous period bucket timestamp */ + ares_uint64_t prev_total_ms; /*!< Previous period bucket cumulative query time */ + ares_uint64_t prev_total_count; /*!< Previous period bucket query count */ +} ares_server_metrics_t; + struct server_state { /* Configuration */ size_t idx; /* index for server in system configuration */ @@ -195,6 +218,9 @@ struct server_state { /* TCP output queue */ ares__buf_t *tcp_send; + /*! Buckets for collecting metrics about the server */ + ares_server_metrics_t metrics[ARES_METRIC_COUNT]; + /* Link back to owning channel */ ares_channel_t *channel; }; @@ -203,6 +229,7 @@ struct server_state { struct query { /* Query ID from qbuf, for faster lookup, and current timeout */ unsigned short qid; /* host byte order */ + ares_timeval_t ts; /*!< Timestamp query was sent */ ares_timeval_t timeout; ares_channel_t *channel; @@ -229,8 +256,8 @@ struct query { ares_bool_t using_tcp; ares_status_t error_status; size_t timeouts; /* number of timeouts we saw for this request */ - ares_bool_t no_retries; /* do not perform any additional retries, this is set - * when a query is to be canceled */ + ares_bool_t no_retries; /* do not perform any additional retries, this is set + * when a query is to be canceled */ }; struct apattern { @@ -410,6 +437,9 @@ ares_timeval_t ares__tvnow(void); void ares__timeval_remaining(ares_timeval_t *remaining, const ares_timeval_t *now, const ares_timeval_t *tout); +void ares__timeval_diff(ares_timeval_t *tvdiff, + const ares_timeval_t *tvstart, + const ares_timeval_t *tvstop); ares_status_t ares__expand_name_validated(const unsigned char *encoded, const unsigned char *abuf, size_t alen, char **s, size_t *enclen, @@ -651,6 +681,11 @@ ares_status_t ares_qcache_fetch(ares_channel_t *channel, const ares_dns_record_t *dnsrec, const ares_dns_record_t **dnsrec_resp); +void ares_metrics_record(const struct query *query, struct server_state *server, + ares_status_t status, const ares_dns_record_t *dnsrec); +size_t ares_metrics_server_timeout(const struct server_state *server, + const ares_timeval_t *now); + ares_status_t ares__channel_threading_init(ares_channel_t *channel); void ares__channel_threading_destroy(ares_channel_t *channel); void ares__channel_lock(const ares_channel_t *channel); diff --git a/src/lib/ares_process.c b/src/lib/ares_process.c index 6e50902b..51360d1c 100644 --- a/src/lib/ares_process.c +++ b/src/lib/ares_process.c @@ -69,8 +69,9 @@ static ares_bool_t same_questions(const ares_dns_record_t *qrec, const ares_dns_record_t *arec); static ares_bool_t same_address(const struct sockaddr *sa, const struct ares_addr *aa); -static void end_query(ares_channel_t *channel, struct query *query, - ares_status_t status, const ares_dns_record_t *dnsrec); +static void end_query(ares_channel_t *channel, struct server_state *server, + struct query *query, ares_status_t status, + const ares_dns_record_t *dnsrec); /* Invoke the server state callback after a success or failure */ static void invoke_server_state_cb(const struct server_state *server, @@ -702,7 +703,7 @@ static ares_status_t process_answer(ares_channel_t *channel, /* Parse the question we sent as we use it to compare */ status = ares_dns_parse(query->qbuf, query->qlen, 0, &qdnsrec); if (status != ARES_SUCCESS) { - end_query(channel, query, status, NULL); + end_query(channel, server, query, status, NULL); goto cleanup; } @@ -728,7 +729,7 @@ static ares_status_t process_answer(ares_channel_t *channel, ares_dns_has_opt_rr(qdnsrec) && !ares_dns_has_opt_rr(rdnsrec)) { status = rewrite_without_edns(qdnsrec, query); if (status != ARES_SUCCESS) { - end_query(channel, query, status, NULL); + end_query(channel, server, query, status, NULL); goto cleanup; } @@ -787,7 +788,7 @@ static ares_status_t process_answer(ares_channel_t *channel, } server_set_good(server, query->using_tcp); - end_query(channel, query, ARES_SUCCESS, rdnsrec); + end_query(channel, server, query, ARES_SUCCESS, rdnsrec); status = ARES_SUCCESS; @@ -833,7 +834,7 @@ ares_status_t ares__requeue_query(struct query *query, query->error_status = ARES_ETIMEOUT; } - end_query(channel, query, query->error_status, NULL); + end_query(channel, NULL, query, query->error_status, NULL); return ARES_ETIMEOUT; } @@ -944,10 +945,13 @@ static ares_status_t ares__append_tcpbuf(struct server_state *server, return ares__buf_append(server->tcp_send, query->qbuf, query->qlen); } -static size_t ares__calc_query_timeout(const struct query *query) +static size_t ares__calc_query_timeout(const struct query *query, + const struct server_state *server, + const ares_timeval_t *now) { const ares_channel_t *channel = query->channel; - size_t timeplus = channel->timeout; + size_t timeout = ares_metrics_server_timeout(server, now); + size_t timeplus = timeout; size_t rounds; size_t num_servers = ares__slist_len(channel->servers); @@ -986,8 +990,8 @@ static size_t ares__calc_query_timeout(const struct query *query) /* We want explicitly guarantee that timeplus is greater or equal to timeout * specified in channel options. */ - if (timeplus < channel->timeout) { - timeplus = channel->timeout; + if (timeplus < timeout) { + timeplus = timeout; } return timeplus; @@ -1014,7 +1018,7 @@ ares_status_t ares__send_query(struct query *query, const ares_timeval_t *now) } if (server == NULL) { - end_query(channel, query, ARES_ENOSERVER /* ? */, NULL); + end_query(channel, server, query, ARES_ENOSERVER /* ? */, NULL); return ARES_ENOSERVER; } @@ -1041,7 +1045,7 @@ ares_status_t ares__send_query(struct query *query, const ares_timeval_t *now) /* Anything else is not retryable, likely ENOMEM */ default: - end_query(channel, query, status, NULL); + end_query(channel, server, query, status, NULL); return status; } } @@ -1052,7 +1056,7 @@ ares_status_t ares__send_query(struct query *query, const ares_timeval_t *now) status = ares__append_tcpbuf(server, query); if (status != ARES_SUCCESS) { - end_query(channel, query, status, NULL); + end_query(channel, server, query, status, NULL); /* Only safe to kill connection if it was new, otherwise it should be * cleaned up by another process later */ @@ -1100,7 +1104,7 @@ ares_status_t ares__send_query(struct query *query, const ares_timeval_t *now) /* Anything else is not retryable, likely ENOMEM */ default: - end_query(channel, query, status, NULL); + end_query(channel, server, query, status, NULL); return status; } node = ares__llist_node_first(server->connections); @@ -1122,18 +1126,19 @@ ares_status_t ares__send_query(struct query *query, const ares_timeval_t *now) } } - timeplus = ares__calc_query_timeout(query); + timeplus = ares__calc_query_timeout(query, server, now); /* Keep track of queries bucketed by timeout, so we can process * timeout events quickly. */ ares__slist_node_destroy(query->node_queries_by_timeout); + query->ts = *now; query->timeout = *now; timeadd(&query->timeout, timeplus); query->node_queries_by_timeout = ares__slist_insert(channel->queries_by_timeout, query); if (!query->node_queries_by_timeout) { /* LCOV_EXCL_START: OutOfMemory */ - end_query(channel, query, ARES_ENOMEM, NULL); + end_query(channel, server, query, ARES_ENOMEM, NULL); /* Only safe to kill connection if it was new, otherwise it should be * cleaned up by another process later */ if (new_connection) { @@ -1151,7 +1156,7 @@ ares_status_t ares__send_query(struct query *query, const ares_timeval_t *now) if (query->node_queries_to_conn == NULL) { /* LCOV_EXCL_START: OutOfMemory */ - end_query(channel, query, ARES_ENOMEM, NULL); + end_query(channel, server, query, ARES_ENOMEM, NULL); /* Only safe to kill connection if it was new, otherwise it should be * cleaned up by another process later */ if (new_connection) { @@ -1248,9 +1253,13 @@ static void ares_detach_query(struct query *query) query->node_all_queries = NULL; } -static void end_query(ares_channel_t *channel, struct query *query, - ares_status_t status, const ares_dns_record_t *dnsrec) + +static void end_query(ares_channel_t *channel, struct server_state *server, + struct query *query, ares_status_t status, + const ares_dns_record_t *dnsrec) { + ares_metrics_record(query, server, status, dnsrec); + /* Invoke the callback. */ query->callback(query->arg, status, query->timeouts, dnsrec); ares__free_query(query); diff --git a/src/lib/ares_timeout.c b/src/lib/ares_timeout.c index 63455ee7..ac02b972 100644 --- a/src/lib/ares_timeout.c +++ b/src/lib/ares_timeout.c @@ -55,6 +55,19 @@ void ares__timeval_remaining(ares_timeval_t *remaining, } } +void ares__timeval_diff(ares_timeval_t *tvdiff, + const ares_timeval_t *tvstart, + const ares_timeval_t *tvstop) +{ + tvdiff->sec = tvstop->sec - tvstart->sec; + if (tvstop->usec > tvstart->usec) { + tvdiff->usec = tvstop->usec - tvstart->usec; + } else { + tvdiff->sec -= 1; + tvdiff->usec = tvstop->usec + 1000000 - tvstart->usec; + } +} + static struct timeval ares_timeval_to_struct_timeval(const ares_timeval_t *atv) { struct timeval tv;