mirror of https://github.com/grpc/grpc.git
[c-ares DNS resolver] Fix file descriptor use-after-close bug when c-ares writes succeed but subsequent read fails (#33871)
Normally, c-ares related fds are destroyed after all DNS resolution is finished in [this code path](pull/33929/headc82d31677a/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper.cc (L210)
). Also there are some fds that c-ares may fail to open or write to initially, and c-ares will close them internally before grpc ever knows about them. But if: 1) c-ares opens a socket and successfully writes a request on it 2) then a subsequent read fails Then c-ares will close the fd in [this code path](bad62225b7/src/lib/ares_process.c (L740)
), but gRPC will have a reference on the fd and will still use it afterwards. Fix here is to leverage the c-ares socket-override API to properly track fd ownership between c-ares and grpc. Related: internal issue b/292203138
parent
98104bbc3c
commit
76203ba589
12 changed files with 490 additions and 220 deletions
@ -0,0 +1,199 @@ |
||||
//
|
||||
//
|
||||
// Copyright 2017 gRPC authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
//
|
||||
|
||||
#include <grpc/support/port_platform.h> |
||||
|
||||
#include "test/core/util/socket_use_after_close_detector.h" |
||||
|
||||
#include <errno.h> |
||||
#include <fcntl.h> |
||||
#include <string.h> |
||||
|
||||
// IWYU pragma: no_include <arpa/inet.h>
|
||||
// IWYU pragma: no_include <unistd.h>
|
||||
|
||||
#include <algorithm> |
||||
#include <memory> |
||||
#include <string> |
||||
#include <thread> |
||||
#include <vector> |
||||
|
||||
#include "gtest/gtest.h" |
||||
|
||||
#include <grpc/support/sync.h> |
||||
|
||||
#include "src/core/lib/iomgr/sockaddr.h" |
||||
#include "test/core/util/port.h" |
||||
|
||||
// TODO(unknown): pull in different headers when enabling this
|
||||
// test on windows. Also set BAD_SOCKET_RETURN_VAL
|
||||
// to INVALID_SOCKET on windows.
|
||||
#ifdef GPR_WINDOWS |
||||
#include "src/core/lib/iomgr/socket_windows.h" |
||||
#include "src/core/lib/iomgr/tcp_windows.h" |
||||
|
||||
#define BAD_SOCKET_RETURN_VAL INVALID_SOCKET |
||||
#else |
||||
#define BAD_SOCKET_RETURN_VAL (-1) |
||||
#endif |
||||
|
||||
namespace { |
||||
|
||||
#ifdef GPR_WINDOWS |
||||
void OpenAndCloseSocketsStressLoop(int port, gpr_event* done_ev) { |
||||
sockaddr_in6 addr; |
||||
memset(&addr, 0, sizeof(addr)); |
||||
addr.sin6_family = AF_INET6; |
||||
addr.sin6_port = htons(port); |
||||
((char*)&addr.sin6_addr)[15] = 1; |
||||
for (;;) { |
||||
if (gpr_event_get(done_ev)) { |
||||
return; |
||||
} |
||||
std::vector<int> sockets; |
||||
for (size_t i = 0; i < 50; i++) { |
||||
SOCKET s = WSASocket(AF_INET6, SOCK_STREAM, IPPROTO_TCP, nullptr, 0, |
||||
WSA_FLAG_OVERLAPPED); |
||||
ASSERT_TRUE(s != BAD_SOCKET_RETURN_VAL) |
||||
<< "Failed to create TCP ipv6 socket"; |
||||
char val = 1; |
||||
ASSERT_TRUE(setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val)) != |
||||
SOCKET_ERROR) |
||||
<< "Failed to set socketopt reuseaddr. WSA error: " + |
||||
std::to_string(WSAGetLastError()); |
||||
ASSERT_TRUE(grpc_tcp_set_non_block(s) == absl::OkStatus()) |
||||
<< "Failed to set socket non-blocking"; |
||||
ASSERT_TRUE(bind(s, (const sockaddr*)&addr, sizeof(addr)) != SOCKET_ERROR) |
||||
<< "Failed to bind socket " + std::to_string(s) + |
||||
" to [::1]:" + std::to_string(port) + |
||||
". WSA error: " + std::to_string(WSAGetLastError()); |
||||
ASSERT_TRUE(listen(s, 1) != SOCKET_ERROR) |
||||
<< "Failed to listen on socket " + std::to_string(s) + |
||||
". WSA error: " + std::to_string(WSAGetLastError()); |
||||
sockets.push_back(s); |
||||
} |
||||
// Do a non-blocking accept followed by a close on all of those sockets.
|
||||
// Do this in a separate loop to try to induce a time window to hit races.
|
||||
for (size_t i = 0; i < sockets.size(); i++) { |
||||
ASSERT_TRUE(accept(sockets[i], nullptr, nullptr) == INVALID_SOCKET) |
||||
<< "Accept on phony socket unexpectedly accepted actual connection."; |
||||
ASSERT_TRUE(WSAGetLastError() == WSAEWOULDBLOCK) |
||||
<< "OpenAndCloseSocketsStressLoop accept on socket " + |
||||
std::to_string(sockets[i]) + |
||||
" failed in " |
||||
"an unexpected way. " |
||||
"WSA error: " + |
||||
std::to_string(WSAGetLastError()) + |
||||
". Socket use-after-close bugs are likely."; |
||||
ASSERT_TRUE(closesocket(sockets[i]) != SOCKET_ERROR) |
||||
<< "Failed to close socket: " + std::to_string(sockets[i]) + |
||||
". WSA error: " + std::to_string(WSAGetLastError()); |
||||
} |
||||
} |
||||
return; |
||||
} |
||||
#else |
||||
void OpenAndCloseSocketsStressLoop(int port, gpr_event* done_ev) { |
||||
// The goal of this loop is to catch socket
|
||||
// "use after close" bugs within the c-ares resolver by acting
|
||||
// like some separate thread doing I/O.
|
||||
// It's goal is to try to hit race conditions whereby:
|
||||
// 1) The c-ares resolver closes a socket.
|
||||
// 2) This loop opens a socket with (coincidentally) the same handle.
|
||||
// 3) the c-ares resolver mistakenly uses that same socket without
|
||||
// realizing that its closed.
|
||||
// 4) This loop performs an operation on that socket that should
|
||||
// succeed but instead fails because of what the c-ares
|
||||
// resolver did in the meantime.
|
||||
sockaddr_in6 addr; |
||||
memset(&addr, 0, sizeof(addr)); |
||||
addr.sin6_family = AF_INET6; |
||||
addr.sin6_port = htons(port); |
||||
(reinterpret_cast<char*>(&addr.sin6_addr))[15] = 1; |
||||
for (;;) { |
||||
if (gpr_event_get(done_ev)) { |
||||
return; |
||||
} |
||||
std::vector<int> sockets; |
||||
// First open a bunch of sockets, bind and listen
|
||||
// '50' is an arbitrary number that, experimentally,
|
||||
// has a good chance of catching bugs.
|
||||
for (size_t i = 0; i < 50; i++) { |
||||
int s = socket(AF_INET6, SOCK_STREAM, 0); |
||||
int val = 1; |
||||
ASSERT_TRUE(setsockopt(s, SOL_SOCKET, SO_REUSEPORT, &val, sizeof(val)) == |
||||
0) |
||||
<< "Failed to set socketopt reuseport"; |
||||
ASSERT_TRUE(setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val)) == |
||||
0) |
||||
<< "Failed to set socket reuseaddr"; |
||||
ASSERT_TRUE(fcntl(s, F_SETFL, O_NONBLOCK) == 0) |
||||
<< "Failed to set socket non-blocking"; |
||||
ASSERT_TRUE(s != BAD_SOCKET_RETURN_VAL) |
||||
<< "Failed to create TCP ipv6 socket"; |
||||
ASSERT_TRUE(bind(s, (const sockaddr*)&addr, sizeof(addr)) == 0) |
||||
<< "Failed to bind socket " + std::to_string(s) + |
||||
" to [::1]:" + std::to_string(port) + |
||||
". errno: " + std::to_string(errno); |
||||
ASSERT_TRUE(listen(s, 1) == 0) << "Failed to listen on socket " + |
||||
std::to_string(s) + |
||||
". errno: " + std::to_string(errno); |
||||
sockets.push_back(s); |
||||
} |
||||
// Do a non-blocking accept followed by a close on all of those sockets.
|
||||
// Do this in a separate loop to try to induce a time window to hit races.
|
||||
for (size_t i = 0; i < sockets.size(); i++) { |
||||
if (accept(sockets[i], nullptr, nullptr)) { |
||||
// If e.g. a "shutdown" was called on this fd from another thread,
|
||||
// then this accept call should fail with an unexpected error.
|
||||
ASSERT_TRUE(errno == EAGAIN || errno == EWOULDBLOCK) |
||||
<< "OpenAndCloseSocketsStressLoop accept on socket " + |
||||
std::to_string(sockets[i]) + |
||||
" failed in " |
||||
"an unexpected way. " |
||||
"errno: " + |
||||
std::to_string(errno) + |
||||
". Socket use-after-close bugs are likely."; |
||||
} |
||||
ASSERT_TRUE(close(sockets[i]) == 0) |
||||
<< "Failed to close socket: " + std::to_string(sockets[i]) + |
||||
". errno: " + std::to_string(errno); |
||||
} |
||||
} |
||||
} |
||||
#endif |
||||
|
||||
} // namespace
|
||||
|
||||
namespace grpc_core { |
||||
namespace testing { |
||||
|
||||
SocketUseAfterCloseDetector::SocketUseAfterCloseDetector() { |
||||
int port = grpc_pick_unused_port_or_die(); |
||||
gpr_event_init(&done_ev_); |
||||
thread_ = std::make_unique<std::thread>(OpenAndCloseSocketsStressLoop, port, |
||||
&done_ev_); |
||||
} |
||||
|
||||
SocketUseAfterCloseDetector::~SocketUseAfterCloseDetector() { |
||||
gpr_event_set(&done_ev_, reinterpret_cast<void*>(1)); |
||||
thread_->join(); |
||||
} |
||||
|
||||
} // namespace testing
|
||||
} // namespace grpc_core
|
@ -0,0 +1,56 @@ |
||||
//
|
||||
//
|
||||
// Copyright 2017 gRPC authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
//
|
||||
|
||||
#ifndef GRPC_TEST_CORE_UTIL_SOCKET_USE_AFTER_CLOSE_DETECTOR_H |
||||
#define GRPC_TEST_CORE_UTIL_SOCKET_USE_AFTER_CLOSE_DETECTOR_H |
||||
|
||||
#include <grpc/support/port_platform.h> |
||||
|
||||
#include <memory> |
||||
#include <thread> |
||||
|
||||
#include <grpc/support/sync_generic.h> |
||||
|
||||
namespace grpc_core { |
||||
namespace testing { |
||||
|
||||
// This class is meant to detect file descriptor use-after-close
|
||||
// bugs occuring somewhere in the program while the object is in live.
|
||||
// The implementation currently uses a background thread to open
|
||||
// and close sockets in a loop, catching socket use-after-close bugs
|
||||
// by watching them manifest as unexpected socket operation failures.
|
||||
//
|
||||
// Note: this will not give false positives but may give false negatives.
|
||||
// That said this seems to be fairly reliable at finding use-after-close
|
||||
// bugs, at least on linux, because of fd handles being quickly reused.
|
||||
// For example this was able to catch the use-after-close bug from
|
||||
// https://github.com/grpc/grpc/pull/33871 "almost every time".
|
||||
class SocketUseAfterCloseDetector { |
||||
public: |
||||
SocketUseAfterCloseDetector(); |
||||
~SocketUseAfterCloseDetector(); |
||||
|
||||
private: |
||||
std::unique_ptr<std::thread> thread_; |
||||
gpr_event done_ev_; |
||||
}; |
||||
|
||||
} // namespace testing
|
||||
} // namespace grpc_core
|
||||
|
||||
#endif // GRPC_TEST_CORE_UTIL_SOCKET_USE_AFTER_CLOSE_DETECTOR_H
|
Loading…
Reference in new issue