From 3ba577c8c4d35267a470acfe54cb4545cfe6660f Mon Sep 17 00:00:00 2001 From: Brad House Date: Fri, 8 Nov 2024 11:17:50 -0500 Subject: [PATCH] ares_getaddrinfo() for AF_UNSPEC should retry if ipv6 received We added an optimization to stop retries on other address classes on failures if one address class was received successfully. In production, however, some odd misconfigured use cases could mean an ipv6 address would be returned but the host was really only capable of connecting to ipv4 machines. We want to modify this optimization now to continue retries on ipv4 even if ipv6 was received, but NOT the other way around. It was always more likely that ipv6 resolution would cause the delays due to system issues, as the world still really only runs on ipv4... Authored-By: Brad House (@bradh352) --- src/lib/ares_getaddrinfo.c | 34 +++++++++++++++++++- test/ares-test-mock-ai.cc | 66 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 99 insertions(+), 1 deletion(-) diff --git a/src/lib/ares_getaddrinfo.c b/src/lib/ares_getaddrinfo.c index 8a195ed0..acb3474f 100644 --- a/src/lib/ares_getaddrinfo.c +++ b/src/lib/ares_getaddrinfo.c @@ -484,6 +484,18 @@ static void terminate_retries(const struct host_query *hquery, query->no_retries = ARES_TRUE; } +static ares_bool_t ai_has_ipv4(struct ares_addrinfo *ai) +{ + struct ares_addrinfo_node *node; + + for (node = ai->nodes; node != NULL; node = node->ai_next) { + if (node->ai_family == AF_INET) { + return ARES_TRUE; + } + } + return ARES_FALSE; +} + static void host_callback(void *arg, ares_status_t status, size_t timeouts, const ares_dns_record_t *dnsrec) { @@ -499,7 +511,27 @@ static void host_callback(void *arg, ares_status_t status, size_t timeouts, addinfostatus = ares__parse_into_addrinfo(dnsrec, ARES_TRUE, hquery->port, hquery->ai); } - if (addinfostatus == ARES_SUCCESS) { + + /* We sent out ipv4 and ipv6 requests simultaneously. If we got a + * successful ipv4 response, we want to go ahead and tell the ipv6 request + * that if it fails or times out to not try again since we have the data + * we need. + * + * Our initial implementation of this would terminate retries if we got any + * successful response (ipv4 _or_ ipv6). But we did get some user-reported + * issues with this that had bad system configs and odd behavior: + * https://github.com/alpinelinux/docker-alpine/issues/366 + * + * Essentially the ipv6 query succeeded but the ipv4 query failed or timed + * out, and so we only returned the ipv6 address, but the host couldn't + * use ipv6. If we continued to allow ipv4 retries it would have found a + * server that worked and returned both address classes (this is clearly + * unexpected behavior). + * + * At some point down the road if ipv6 actually becomes required and + * reliable we can drop this ipv4 check. + */ + if (addinfostatus == ARES_SUCCESS && ai_has_ipv4(hquery->ai)) { terminate_retries(hquery, ares_dns_record_get_id(dnsrec)); } } diff --git a/test/ares-test-mock-ai.cc b/test/ares-test-mock-ai.cc index 1e3e735f..c87b79f7 100644 --- a/test/ares-test-mock-ai.cc +++ b/test/ares-test-mock-ai.cc @@ -724,6 +724,72 @@ class NoRotateMultiMockTestAI : public MockMultiServerChannelTestAI { NoRotateMultiMockTestAI() : MockMultiServerChannelTestAI(nullptr, ARES_OPT_NOROTATE) {} }; +/* We want to terminate retries of other address classes on getaddrinfo if one + * address class is returned already to return replies faster. + * UPDATE: actually we want to do this only if the address class we received + * was ipv4. We've seen issues if ipv6 was returned but the host was + * really only capable of ipv4. + */ +TEST_P(NoRotateMultiMockTestAI, v4Worksv6Timesout) { + std::vector nothing; + + DNSPacket rsp4; + rsp4.set_response().set_aa() + .add_question(new DNSQuestion("www.example.com", T_A)) + .add_answer(new DNSARR("www.example.com", 0x0100, {0x01, 0x02, 0x03, 0x04})); + + EXPECT_CALL(*servers_[0], OnRequest("www.example.com", T_A)) + .WillOnce(SetReply(servers_[0].get(), &rsp4)); + EXPECT_CALL(*servers_[0], OnRequest("www.example.com", T_AAAA)) + .WillOnce(SetReplyData(servers_[0].get(), nothing)); + + AddrInfoResult result; + struct ares_addrinfo_hints hints = {0, 0, 0, 0}; + hints.ai_family = AF_UNSPEC; + hints.ai_flags = ARES_AI_NOSORT; + ares_getaddrinfo(channel_, "www.example.com.", NULL, &hints, AddrInfoCallback, &result); + Process(); + EXPECT_TRUE(result.done_); + EXPECT_EQ(result.status_, ARES_SUCCESS); + EXPECT_THAT(result.ai_, IncludesNumAddresses(1)); + EXPECT_THAT(result.ai_, IncludesV4Address("1.2.3.4")); +} + +TEST_P(NoRotateMultiMockTestAI, v6Worksv4TimesoutFirst) { + std::vector nothing; + + DNSPacket rsp4; + rsp4.set_response().set_aa() + .add_question(new DNSQuestion("www.example.com", T_A)) + .add_answer(new DNSARR("www.example.com", 0x0100, {0x01, 0x02, 0x03, 0x04})); + + DNSPacket rsp6; + rsp6.set_response().set_aa() + .add_question(new DNSQuestion("www.example.com", T_AAAA)) + .add_answer(new DNSAaaaRR("www.example.com", 100, + {0x21, 0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x03})); + + EXPECT_CALL(*servers_[0], OnRequest("www.example.com", T_A)) + .WillOnce(SetReplyData(servers_[0].get(), nothing)); + EXPECT_CALL(*servers_[0], OnRequest("www.example.com", T_AAAA)) + .WillOnce(SetReply(servers_[0].get(), &rsp6)); + EXPECT_CALL(*servers_[1], OnRequest("www.example.com", T_A)) + .WillOnce(SetReply(servers_[1].get(), &rsp4)); + + AddrInfoResult result; + struct ares_addrinfo_hints hints = {0, 0, 0, 0}; + hints.ai_family = AF_UNSPEC; + hints.ai_flags = ARES_AI_NOSORT; + ares_getaddrinfo(channel_, "www.example.com.", NULL, &hints, AddrInfoCallback, &result); + Process(); + EXPECT_TRUE(result.done_); + EXPECT_EQ(result.status_, ARES_SUCCESS); + EXPECT_THAT(result.ai_, IncludesNumAddresses(2)); + EXPECT_THAT(result.ai_, IncludesV4Address("1.2.3.4")); + EXPECT_THAT(result.ai_, IncludesV6Address("2121:0000:0000:0000:0000:0000:0000:0303")); + +} TEST_P(NoRotateMultiMockTestAI, ThirdServer) { struct ares_options opts;