[HealthCheck] Make status message better (#37724)

Closes #37724

COPYBARA_INTEGRATE_REVIEW=https://github.com/grpc/grpc/pull/37724 from yashykt:PickFirstHealthWatchErrorMsg fc09a026bb
PiperOrigin-RevId: 676497230
pull/37769/head
Yash Tibrewal 2 months ago committed by Copybara-Service
parent 4b53c2bac3
commit 656f325b53
  1. 5
      src/core/BUILD
  2. 2
      src/core/client_channel/client_channel.cc
  3. 3
      src/core/client_channel/client_channel_filter.cc
  4. 6
      src/core/client_channel/subchannel.h
  5. 6
      src/core/load_balancing/health_check_client.cc
  6. 29
      src/core/load_balancing/outlier_detection/outlier_detection.cc
  7. 12
      src/core/load_balancing/pick_first/pick_first.cc
  8. 8
      src/core/load_balancing/subchannel_interface.h
  9. 2
      test/core/client_channel/bm_load_balanced_call_destination.cc
  10. 2
      test/core/client_channel/load_balanced_call_destination_test.cc
  11. 2
      test/core/load_balancing/bm_picker.cc
  12. 2
      test/core/load_balancing/lb_policy_test_lib.h

@ -3606,7 +3606,10 @@ grpc_cc_library(
grpc_cc_library(
name = "subchannel_interface",
hdrs = ["load_balancing/subchannel_interface.h"],
external_deps = ["absl/status"],
external_deps = [
"absl/status",
"absl/strings",
],
deps = [
"dual_ref_counted",
"iomgr_fwd",

@ -57,6 +57,7 @@
#include "src/core/client_channel/subchannel.h"
#include "src/core/client_channel/subchannel_interface_internal.h"
#include "src/core/ext/filters/channel_idle/legacy_channel_idle_filter.h"
#include "src/core/lib/address_utils/sockaddr_utils.h"
#include "src/core/lib/channel/channel_args.h"
#include "src/core/lib/channel/status_util.h"
#include "src/core/lib/config/core_configuration.h"
@ -167,6 +168,7 @@ class ClientChannel::SubchannelWrapper
void CancelDataWatcher(DataWatcherInterface* watcher) override
ABSL_EXCLUSIVE_LOCKS_REQUIRED(*client_channel_->work_serializer_);
void ThrottleKeepaliveTime(int new_keepalive_time);
std::string address() const override { return subchannel_->address(); }
private:
class WatcherWrapper;

@ -63,6 +63,7 @@
#include "src/core/client_channel/subchannel.h"
#include "src/core/client_channel/subchannel_interface_internal.h"
#include "src/core/handshaker/proxy_mapper_registry.h"
#include "src/core/lib/address_utils/sockaddr_utils.h"
#include "src/core/lib/channel/channel_args.h"
#include "src/core/lib/channel/channel_stack.h"
#include "src/core/lib/channel/status_util.h"
@ -616,6 +617,8 @@ class ClientChannelFilter::SubchannelWrapper final
subchannel_->ThrottleKeepaliveTime(new_keepalive_time);
}
std::string address() const override { return subchannel_->address(); }
private:
// This wrapper provides a bridge between the internal Subchannel API
// and the SubchannelInterface API that we expose to LB policies.

@ -33,6 +33,7 @@
#include "src/core/client_channel/connector.h"
#include "src/core/client_channel/subchannel_pool_interface.h"
#include "src/core/lib/address_utils/sockaddr_utils.h"
#include "src/core/lib/backoff/backoff.h"
#include "src/core/lib/channel/channel_args.h"
#include "src/core/lib/channel/channel_fwd.h"
@ -215,7 +216,10 @@ class Subchannel final : public DualRefCounted<Subchannel> {
channelz::SubchannelNode* channelz_node();
const grpc_resolved_address& address() const { return key_.address(); }
std::string address() const {
return grpc_sockaddr_to_uri(&key_.address())
.value_or("<unknown address type>");
}
// Starts watching the subchannel's connectivity state.
// The first callback to the watcher will be delivered ~immediately.

@ -166,11 +166,9 @@ void HealthProducer::HealthChecker::OnHealthWatchStatusChange(
// Prepend the subchannel's address to the status if needed.
absl::Status use_status;
if (!status.ok()) {
std::string address_str =
grpc_sockaddr_to_uri(&producer_->subchannel_->address())
.value_or("<unknown address type>");
use_status = absl::Status(
status.code(), absl::StrCat(address_str, ": ", status.message()));
status.code(), absl::StrCat(producer_->subchannel_->address(), ": ",
status.message()));
}
work_serializer_->Schedule(
[self = Ref(), state, status = std::move(use_status)]() mutable {

@ -159,11 +159,14 @@ class OutlierDetectionLb final : public LoadBalancingPolicy {
class WatcherWrapper final
: public SubchannelInterface::ConnectivityStateWatcherInterface {
public:
WatcherWrapper(std::shared_ptr<
WatcherWrapper(WeakRefCountedPtr<SubchannelWrapper> subchannel_wrapper,
std::shared_ptr<
SubchannelInterface::ConnectivityStateWatcherInterface>
health_watcher,
bool ejected)
: watcher_(std::move(health_watcher)), ejected_(ejected) {}
: subchannel_wrapper_(std::move(subchannel_wrapper)),
watcher_(std::move(health_watcher)),
ejected_(ejected) {}
void Eject() {
ejected_ = true;
@ -171,7 +174,8 @@ class OutlierDetectionLb final : public LoadBalancingPolicy {
watcher_->OnConnectivityStateChange(
GRPC_CHANNEL_TRANSIENT_FAILURE,
absl::UnavailableError(
"subchannel ejected by outlier detection"));
absl::StrCat(subchannel_wrapper_->address(),
": subchannel ejected by outlier detection")));
}
}
@ -192,7 +196,8 @@ class OutlierDetectionLb final : public LoadBalancingPolicy {
if (ejected_) {
new_state = GRPC_CHANNEL_TRANSIENT_FAILURE;
status = absl::UnavailableError(
"subchannel ejected by outlier detection");
absl::StrCat(subchannel_wrapper_->address(),
": subchannel ejected by outlier detection"));
}
watcher_->OnConnectivityStateChange(new_state, status);
}
@ -203,6 +208,7 @@ class OutlierDetectionLb final : public LoadBalancingPolicy {
}
private:
WeakRefCountedPtr<SubchannelWrapper> subchannel_wrapper_;
std::shared_ptr<SubchannelInterface::ConnectivityStateWatcherInterface>
watcher_;
absl::optional<grpc_connectivity_state> last_seen_state_;
@ -463,7 +469,8 @@ void OutlierDetectionLb::SubchannelWrapper::AddDataWatcher(
if (w->type() == HealthProducer::Type()) {
auto* health_watcher = static_cast<HealthWatcher*>(watcher.get());
auto watcher_wrapper = std::make_shared<WatcherWrapper>(
health_watcher->TakeWatcher(), ejected_);
WeakRefAsSubclass<SubchannelWrapper>(), health_watcher->TakeWatcher(),
ejected_);
watcher_wrapper_ = watcher_wrapper.get();
health_watcher->SetWatcher(std::move(watcher_wrapper));
}
@ -534,8 +541,8 @@ OutlierDetectionLb::Picker::Picker(OutlierDetectionLb* outlier_detection_lb,
: picker_(std::move(picker)), counting_enabled_(counting_enabled) {
GRPC_TRACE_LOG(outlier_detection_lb, INFO)
<< "[outlier_detection_lb " << outlier_detection_lb
<< "] constructed new picker " << this << " and counting "
<< "is " << (counting_enabled ? "enabled" : "disabled");
<< "] constructed new picker " << this << " and counting " << "is "
<< (counting_enabled ? "enabled" : "disabled");
}
LoadBalancingPolicy::PickResult OutlierDetectionLb::Picker::Pick(
@ -904,8 +911,8 @@ void OutlierDetectionLb::EjectionTimer::OnTimerLocked() {
config.success_rate_ejection->minimum_hosts) {
GRPC_TRACE_LOG(outlier_detection_lb, INFO)
<< "[outlier_detection_lb " << parent_.get()
<< "] running success rate algorithm: "
<< "stdev_factor=" << config.success_rate_ejection->stdev_factor
<< "] running success rate algorithm: " << "stdev_factor="
<< config.success_rate_ejection->stdev_factor
<< ", enforcement_percentage="
<< config.success_rate_ejection->enforcement_percentage;
// calculate ejection threshold: (mean - stdev *
@ -957,8 +964,8 @@ void OutlierDetectionLb::EjectionTimer::OnTimerLocked() {
config.failure_percentage_ejection->minimum_hosts) {
GRPC_TRACE_LOG(outlier_detection_lb, INFO)
<< "[outlier_detection_lb " << parent_.get()
<< "] running failure percentage algorithm: "
<< "threshold=" << config.failure_percentage_ejection->threshold
<< "] running failure percentage algorithm: " << "threshold="
<< config.failure_percentage_ejection->threshold
<< ", enforcement_percentage="
<< config.failure_percentage_ejection->enforcement_percentage;
for (auto& candidate : failure_percentage_ejection_candidates) {

@ -648,7 +648,8 @@ void PickFirst::HealthWatcher::OnConnectivityStateChange(
case GRPC_CHANNEL_TRANSIENT_FAILURE:
policy_->channel_control_helper()->UpdateState(
GRPC_CHANNEL_TRANSIENT_FAILURE, status,
MakeRefCounted<TransientFailurePicker>(status));
MakeRefCounted<TransientFailurePicker>(absl::UnavailableError(
absl::StrCat("health watch: ", status.message()))));
break;
case GRPC_CHANNEL_SHUTDOWN:
Crash("health watcher reported state SHUTDOWN");
@ -1552,7 +1553,8 @@ void OldPickFirst::HealthWatcher::OnConnectivityStateChange(
case GRPC_CHANNEL_TRANSIENT_FAILURE:
policy_->channel_control_helper()->UpdateState(
GRPC_CHANNEL_TRANSIENT_FAILURE, status,
MakeRefCounted<TransientFailurePicker>(status));
MakeRefCounted<TransientFailurePicker>(absl::UnavailableError(
absl::StrCat("health watch: ", status.message()))));
break;
case GRPC_CHANNEL_SHUTDOWN:
Crash("health watcher reported state SHUTDOWN");
@ -1644,9 +1646,9 @@ void OldPickFirst::SubchannelList::SubchannelData::OnConnectivityStateChange(
// If there is a pending update, switch to the pending update.
if (p->latest_pending_subchannel_list_ != nullptr) {
GRPC_TRACE_LOG(pick_first, INFO)
<< "Pick First " << p << " promoting pending subchannel "
<< "list " << p->latest_pending_subchannel_list_.get()
<< " to replace " << p->subchannel_list_.get();
<< "Pick First " << p << " promoting pending subchannel list "
<< p->latest_pending_subchannel_list_.get() << " to replace "
<< p->subchannel_list_.get();
p->UnsetSelectedSubchannel();
p->subchannel_list_ = std::move(p->latest_pending_subchannel_list_);
// Set our state to that of the pending subchannel list.

@ -21,6 +21,7 @@
#include <utility>
#include "absl/status/status.h"
#include "absl/strings/string_view.h"
#include <grpc/impl/connectivity_state.h>
#include <grpc/support/port_platform.h>
@ -102,6 +103,9 @@ class SubchannelInterface : public DualRefCounted<SubchannelInterface> {
// make this API public.
virtual void CancelDataWatcher(DataWatcherInterface* watcher) = 0;
// Return the address in URI format.
virtual std::string address() const = 0;
protected:
void Orphaned() override {}
};
@ -136,6 +140,10 @@ class DelegatingSubchannel : public SubchannelInterface {
wrapped_subchannel_->CancelDataWatcher(watcher);
}
std::string address() const override {
return wrapped_subchannel_->address();
}
private:
RefCountedPtr<SubchannelInterface> wrapped_subchannel_;
};

@ -80,6 +80,8 @@ class LoadBalancedCallDestinationTraits {
return call_destination_;
}
std::string address() const override { return "test"; }
private:
const RefCountedPtr<UnstartedCallDestination> call_destination_;
};

@ -118,6 +118,8 @@ class LoadBalancedCallDestinationTest : public YodelTest {
return call_destination_;
}
std::string address() const override { return "test"; }
private:
const RefCountedPtr<UnstartedCallDestination> call_destination_;
};

@ -124,6 +124,8 @@ class BenchmarkHelper : public std::enable_shared_from_this<BenchmarkHelper> {
void CancelDataWatcher(DataWatcherInterface* watcher) override {}
std::string address() const override { return "test"; }
private:
void AddConnectivityWatcherInternal(
std::shared_ptr<ConnectivityStateWatcherInterface> watcher) {

@ -121,6 +121,8 @@ class LoadBalancingPolicyTest : public ::testing::Test {
SubchannelState* state() const { return state_; }
std::string address() const override { return state_->address_; }
private:
// Converts between
// SubchannelInterface::ConnectivityStateWatcherInterface and

Loading…
Cancel
Save