[xds] Fix XdsClient race between ResourceDoesNotExist timer and receiving resources (#37678)

Issue noticed on xds_end2end_test and is made worse worse when reducing `xds_resource_does_not_exist_timeout_ms` to 500 and running it on tsan.

Closes #37678

COPYBARA_INTEGRATE_REVIEW=https://github.com/grpc/grpc/pull/37678 from yashykt:XdsClientOnTimerDebugging 1d31e28d2c
PiperOrigin-RevId: 673479242
pull/37680/head
Yash Tibrewal 2 months ago committed by Copybara-Service
parent 0aca67e7d6
commit 04f05a3031
  1. 29
      src/core/xds/xds_client/xds_client.cc
  2. 8
      test/cpp/end2end/xds/xds_end2end_test.cc

@ -210,6 +210,7 @@ class XdsClient::XdsChannel::AdsCall final
if (timer_handle_.has_value() &&
ads_call_->xds_client()->engine()->Cancel(*timer_handle_)) {
timer_handle_.reset();
ads_call_.reset();
}
}
@ -250,24 +251,28 @@ class XdsClient::XdsChannel::AdsCall final
}
void OnTimer() {
GRPC_TRACE_LOG(xds_client, INFO)
<< "[xds_client " << ads_call_->xds_client() << "] xds server "
<< ads_call_->xds_channel()->server_.server_uri()
<< ": timeout obtaining resource {type=" << type_->type_url()
<< " name="
<< XdsClient::ConstructFullXdsResourceName(
name_.authority, type_->type_url(), name_.key)
<< "} from xds server";
{
MutexLock lock(&ads_call_->xds_client()->mu_);
timer_handle_.reset();
resource_seen_ = true;
auto& authority_state =
ads_call_->xds_client()->authority_state_map_[name_.authority];
ResourceState& state = authority_state.resource_map[type_][name_.key];
state.meta.client_status = XdsApi::ResourceMetadata::DOES_NOT_EXIST;
ads_call_->xds_client()->NotifyWatchersOnResourceDoesNotExist(
state.watchers, ReadDelayHandle::NoWait());
// We might have received the resource after the timer fired but before
// the callback ran.
if (state.resource == nullptr) {
GRPC_TRACE_LOG(xds_client, INFO)
<< "[xds_client " << ads_call_->xds_client() << "] xds server "
<< ads_call_->xds_channel()->server_.server_uri()
<< ": timeout obtaining resource {type=" << type_->type_url()
<< " name="
<< XdsClient::ConstructFullXdsResourceName(
name_.authority, type_->type_url(), name_.key)
<< "} from xds server";
resource_seen_ = true;
state.meta.client_status = XdsApi::ResourceMetadata::DOES_NOT_EXIST;
ads_call_->xds_client()->NotifyWatchersOnResourceDoesNotExist(
state.watchers, ReadDelayHandle::NoWait());
}
}
ads_call_->xds_client()->work_serializer_.DrainQueue();
ads_call_.reset();

@ -949,10 +949,10 @@ class XdsServerSecurityTest : public XdsEnd2endTest {
absl::StrJoin(fields, ",\n"));
InitClient(builder, /*lb_expected_authority=*/"",
/*xds_resource_does_not_exist_timeout_ms=*/
5000, // using a low timeout to quickly end negative tests.
// Prefer using WaitOnServingStatusChange() or a similar
// loop on the client side to wait on status changes
// instead of increasing this timeout.
500, // using a low timeout to quickly end negative tests.
// Prefer using WaitOnServingStatusChange() or a similar
// loop on the client side to wait on status changes
// instead of increasing this timeout.
/*balancer_authority_override=*/"", /*args=*/nullptr,
CreateXdsChannelCredentials());
CreateBackends(1, /*xds_enabled=*/true,

Loading…
Cancel
Save