[call-v3][client-channel] Add benchmarks for lb picks (#37052)
Two new benchmarks here-in.
Benchmark 1: `bm_picker`
------
Measures various load balancing policies pick performance. For now we cover `pick_first` and `weighted_round_robin` at 1, 10, 100, 1000, 10000, and 100000 backends.
Today's output:
```
------------------------------------------------------------------------------
Benchmark Time CPU Iterations
------------------------------------------------------------------------------
BM_Pick/pick_first/1 20.4 ns 20.4 ns 68285
BM_Pick/pick_first/10 20.6 ns 20.6 ns 68274
BM_Pick/pick_first/100 20.5 ns 20.5 ns 67817
BM_Pick/pick_first/1000 20.6 ns 20.6 ns 67347
BM_Pick/pick_first/10000 20.7 ns 20.7 ns 67317
BM_Pick/pick_first/100000 20.9 ns 20.9 ns 67385
BM_Pick/weighted_round_robin/1 54.7 ns 54.7 ns 26641
BM_Pick/weighted_round_robin/10 54.2 ns 54.2 ns 25828
BM_Pick/weighted_round_robin/100 55.2 ns 55.2 ns 26210
BM_Pick/weighted_round_robin/1000 54.1 ns 54.1 ns 25678
BM_Pick/weighted_round_robin/10000 77.3 ns 76.6 ns 15776
BM_Pick/weighted_round_robin/100000 148 ns 148 ns 9882
```
Benchmark 2: `bm_load_balanced_call_destination`
-----
This benchmark measures call performance when a call spine passes through a `LoadBalancedCallDestination`, and with `BM_LoadBalancedCallDestination` also the construction/destruction cost of this object.
We do not consider picker performance in this benchmark as it's separately covered by `bm_picker` above.
Today's output:
```
-----------------------------------------------------------------------------------------------------------------------------------------
Benchmark Time CPU Iterations
-----------------------------------------------------------------------------------------------------------------------------------------
BM_UnaryWithSpawnPerEnd<UnstartedCallDestinationFixture<LoadBalancedCallDestinationTraits>> 1255 ns 1255 ns 1076
BM_UnaryWithSpawnPerOp<UnstartedCallDestinationFixture<LoadBalancedCallDestinationTraits>> 1459 ns 1459 ns 939
BM_ClientToServerStreaming<UnstartedCallDestinationFixture<LoadBalancedCallDestinationTraits>> 209 ns 209 ns 6775
BM_LoadBalancedCallDestination 92.8 ns 92.8 ns 15063
```
Notes
------
There's some duplicated code between the benchmarks & tests -- this is ok -- as the tests evolve we'll likely want to add more checks to the fixtures, whereas as the benchmarks evolve we may well want to optimize the fixtures so that performance of the systems under test dominate more. That is, the duplicated code is expected to have different evolutionary tracks.
Closes #37052
COPYBARA_INTEGRATE_REVIEW=https://github.com/grpc/grpc/pull/37052 from ctiller:moar-benchy 30c7072d87
PiperOrigin-RevId: 658181731
pull/37347/head
parent
b40d919cf2
commit
5638afba6e
10 changed files with 598 additions and 1 deletions
@ -0,0 +1,138 @@ |
||||
// Copyright 2024 gRPC authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <benchmark/benchmark.h> |
||||
|
||||
#include "absl/memory/memory.h" |
||||
#include "absl/strings/string_view.h" |
||||
|
||||
#include <grpc/grpc.h> |
||||
|
||||
#include "src/core/client_channel/load_balanced_call_destination.h" |
||||
#include "src/core/lib/address_utils/parse_address.h" |
||||
#include "test/core/transport/call_spine_benchmarks.h" |
||||
|
||||
namespace grpc_core { |
||||
namespace { |
||||
|
||||
const Slice kTestPath = Slice::FromExternalString("/foo/bar"); |
||||
|
||||
class LoadBalancedCallDestinationTraits { |
||||
public: |
||||
RefCountedPtr<UnstartedCallDestination> CreateCallDestination( |
||||
RefCountedPtr<UnstartedCallDestination> final_destination) { |
||||
picker_observable_.Set(MakeRefCounted<TestPicker>( |
||||
MakeRefCounted<TestSubchannel>(std::move(final_destination)))); |
||||
return MakeRefCounted<LoadBalancedCallDestination>(picker_observable_); |
||||
} |
||||
|
||||
ClientMetadataHandle MakeClientInitialMetadata() { |
||||
auto md = Arena::MakePooled<ClientMetadata>(); |
||||
md->Set(HttpPathMetadata(), kTestPath.Copy()); |
||||
return md; |
||||
} |
||||
|
||||
ServerMetadataHandle MakeServerInitialMetadata() { |
||||
return Arena::MakePooled<ServerMetadata>(); |
||||
} |
||||
|
||||
MessageHandle MakePayload() { return Arena::MakePooled<Message>(); } |
||||
|
||||
ServerMetadataHandle MakeServerTrailingMetadata() { |
||||
return Arena::MakePooled<ServerMetadata>(); |
||||
} |
||||
|
||||
private: |
||||
class TestSubchannel : public SubchannelInterfaceWithCallDestination { |
||||
public: |
||||
explicit TestSubchannel( |
||||
RefCountedPtr<UnstartedCallDestination> call_destination) |
||||
: call_destination_(std::move(call_destination)) {} |
||||
|
||||
void WatchConnectivityState( |
||||
std::unique_ptr<ConnectivityStateWatcherInterface>) override { |
||||
Crash("not implemented"); |
||||
} |
||||
void CancelConnectivityStateWatch( |
||||
ConnectivityStateWatcherInterface*) override { |
||||
Crash("not implemented"); |
||||
} |
||||
void RequestConnection() override { Crash("not implemented"); } |
||||
void ResetBackoff() override { Crash("not implemented"); } |
||||
void AddDataWatcher(std::unique_ptr<DataWatcherInterface>) override { |
||||
Crash("not implemented"); |
||||
} |
||||
void CancelDataWatcher(DataWatcherInterface*) override { |
||||
Crash("not implemented"); |
||||
} |
||||
RefCountedPtr<UnstartedCallDestination> call_destination() override { |
||||
return call_destination_; |
||||
} |
||||
|
||||
private: |
||||
const RefCountedPtr<UnstartedCallDestination> call_destination_; |
||||
}; |
||||
|
||||
class TestPicker final : public LoadBalancingPolicy::SubchannelPicker { |
||||
public: |
||||
explicit TestPicker(RefCountedPtr<TestSubchannel> subchannel) |
||||
: subchannel_{subchannel} {} |
||||
|
||||
LoadBalancingPolicy::PickResult Pick( |
||||
LoadBalancingPolicy::PickArgs) override { |
||||
return LoadBalancingPolicy::PickResult::Complete(subchannel_); |
||||
} |
||||
|
||||
private: |
||||
RefCountedPtr<TestSubchannel> subchannel_; |
||||
}; |
||||
|
||||
ClientChannel::PickerObservable picker_observable_{nullptr}; |
||||
}; |
||||
GRPC_CALL_SPINE_BENCHMARK( |
||||
UnstartedCallDestinationFixture<LoadBalancedCallDestinationTraits>); |
||||
|
||||
void BM_LoadBalancedCallDestination(benchmark::State& state) { |
||||
class FinalDestination : public UnstartedCallDestination { |
||||
public: |
||||
void StartCall(UnstartedCallHandler) override {} |
||||
void Orphaned() override {} |
||||
}; |
||||
LoadBalancedCallDestinationTraits traits; |
||||
auto final_destination = MakeRefCounted<FinalDestination>(); |
||||
for (auto _ : state) { |
||||
traits.CreateCallDestination(final_destination); |
||||
} |
||||
} |
||||
BENCHMARK(BM_LoadBalancedCallDestination); |
||||
|
||||
} // namespace
|
||||
} // namespace grpc_core
|
||||
|
||||
// Some distros have RunSpecifiedBenchmarks under the benchmark namespace,
|
||||
// and others do not. This allows us to support both modes.
|
||||
namespace benchmark { |
||||
void RunTheBenchmarksNamespaced() { RunSpecifiedBenchmarks(); } |
||||
} // namespace benchmark
|
||||
|
||||
int main(int argc, char** argv) { |
||||
::benchmark::Initialize(&argc, argv); |
||||
grpc_init(); |
||||
{ |
||||
auto ee = grpc_event_engine::experimental::GetDefaultEventEngine(); |
||||
benchmark::RunTheBenchmarksNamespaced(); |
||||
} |
||||
grpc_shutdown(); |
||||
return 0; |
||||
} |
@ -0,0 +1,261 @@ |
||||
// Copyright 2024 gRPC authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <memory> |
||||
|
||||
#include <benchmark/benchmark.h> |
||||
|
||||
#include "absl/strings/string_view.h" |
||||
|
||||
#include <grpc/grpc.h> |
||||
|
||||
#include "src/core/client_channel/subchannel_interface_internal.h" |
||||
#include "src/core/lib/address_utils/parse_address.h" |
||||
#include "src/core/lib/config/core_configuration.h" |
||||
#include "src/core/lib/event_engine/channel_args_endpoint_config.h" |
||||
#include "src/core/lib/event_engine/default_event_engine.h" |
||||
#include "src/core/lib/transport/connectivity_state.h" |
||||
#include "src/core/load_balancing/health_check_client_internal.h" |
||||
#include "src/core/load_balancing/lb_policy.h" |
||||
#include "src/core/util/json/json_reader.h" |
||||
|
||||
namespace grpc_core { |
||||
namespace { |
||||
|
||||
class BenchmarkHelper : public std::enable_shared_from_this<BenchmarkHelper> { |
||||
public: |
||||
BenchmarkHelper(absl::string_view name, absl::string_view config) |
||||
: name_(name), config_json_(config) { |
||||
CHECK(lb_policy_ != nullptr) << "Failed to create LB policy: " << name; |
||||
auto parsed_json = JsonParse(std::string(config_json_)); |
||||
CHECK_OK(parsed_json); |
||||
auto config_parsed = |
||||
CoreConfiguration::Get().lb_policy_registry().ParseLoadBalancingConfig( |
||||
*parsed_json); |
||||
CHECK_OK(config_parsed); |
||||
config_ = std::move(*config_parsed); |
||||
} |
||||
|
||||
RefCountedPtr<LoadBalancingPolicy::SubchannelPicker> GetPicker() { |
||||
MutexLock lock(&mu_); |
||||
while (picker_ == nullptr) { |
||||
cv_.Wait(&mu_); |
||||
} |
||||
return picker_; |
||||
} |
||||
|
||||
void UpdateLbPolicy(size_t num_endpoints) { |
||||
{ |
||||
MutexLock lock(&mu_); |
||||
picker_ = nullptr; |
||||
work_serializer_->Schedule( |
||||
[this, num_endpoints]() { |
||||
EndpointAddressesList addresses; |
||||
for (size_t i = 0; i < num_endpoints; i++) { |
||||
grpc_resolved_address addr; |
||||
int port = i % 65536; |
||||
int ip = i / 65536; |
||||
CHECK_LT(ip, 256); |
||||
CHECK(grpc_parse_uri( |
||||
URI::Parse(absl::StrCat("ipv4:127.0.0.", ip, ":", port)) |
||||
.value(), |
||||
&addr)); |
||||
addresses.emplace_back(addr, ChannelArgs()); |
||||
} |
||||
CHECK_OK(lb_policy_->UpdateLocked(LoadBalancingPolicy::UpdateArgs{ |
||||
std::make_shared<EndpointAddressesListIterator>( |
||||
std::move(addresses)), |
||||
config_, "", ChannelArgs()})); |
||||
}, |
||||
DEBUG_LOCATION); |
||||
} |
||||
work_serializer_->DrainQueue(); |
||||
} |
||||
|
||||
private: |
||||
class SubchannelFake final : public SubchannelInterface { |
||||
public: |
||||
explicit SubchannelFake(BenchmarkHelper* helper) : helper_(helper) {} |
||||
|
||||
void WatchConnectivityState( |
||||
std::unique_ptr<ConnectivityStateWatcherInterface> unique_watcher) |
||||
override { |
||||
AddConnectivityWatcherInternal( |
||||
std::shared_ptr<ConnectivityStateWatcherInterface>( |
||||
std::move(unique_watcher))); |
||||
} |
||||
|
||||
void CancelConnectivityStateWatch( |
||||
ConnectivityStateWatcherInterface* watcher) override { |
||||
MutexLock lock(&helper_->mu_); |
||||
helper_->connectivity_watchers_.erase(watcher); |
||||
} |
||||
|
||||
void RequestConnection() override { LOG(FATAL) << "unimplemented"; } |
||||
|
||||
void ResetBackoff() override { LOG(FATAL) << "unimplemented"; } |
||||
|
||||
void AddDataWatcher( |
||||
std::unique_ptr<DataWatcherInterface> watcher) override { |
||||
auto* watcher_internal = |
||||
DownCast<InternalSubchannelDataWatcherInterface*>(watcher.get()); |
||||
if (watcher_internal->type() == HealthProducer::Type()) { |
||||
AddConnectivityWatcherInternal( |
||||
DownCast<HealthWatcher*>(watcher_internal)->TakeWatcher()); |
||||
} else { |
||||
LOG(FATAL) << "unimplemented watcher type: " |
||||
<< watcher_internal->type(); |
||||
} |
||||
} |
||||
|
||||
void CancelDataWatcher(DataWatcherInterface* watcher) override {} |
||||
|
||||
private: |
||||
void AddConnectivityWatcherInternal( |
||||
std::shared_ptr<ConnectivityStateWatcherInterface> watcher) { |
||||
{ |
||||
MutexLock lock(&helper_->mu_); |
||||
helper_->work_serializer_->Schedule( |
||||
[watcher]() { |
||||
watcher->OnConnectivityStateChange(GRPC_CHANNEL_READY, |
||||
absl::OkStatus()); |
||||
}, |
||||
DEBUG_LOCATION); |
||||
helper_->connectivity_watchers_.insert(std::move(watcher)); |
||||
} |
||||
helper_->work_serializer_->DrainQueue(); |
||||
} |
||||
|
||||
BenchmarkHelper* helper_; |
||||
}; |
||||
|
||||
class LbHelper final : public LoadBalancingPolicy::ChannelControlHelper { |
||||
public: |
||||
explicit LbHelper(BenchmarkHelper* helper) : helper_(helper) {} |
||||
|
||||
RefCountedPtr<SubchannelInterface> CreateSubchannel( |
||||
const grpc_resolved_address& address, |
||||
const ChannelArgs& per_address_args, const ChannelArgs& args) override { |
||||
return MakeRefCounted<SubchannelFake>(helper_); |
||||
} |
||||
|
||||
void UpdateState( |
||||
grpc_connectivity_state state, const absl::Status& status, |
||||
RefCountedPtr<LoadBalancingPolicy::SubchannelPicker> picker) override { |
||||
MutexLock lock(&helper_->mu_); |
||||
helper_->picker_ = std::move(picker); |
||||
helper_->cv_.SignalAll(); |
||||
} |
||||
|
||||
void RequestReresolution() override { LOG(FATAL) << "unimplemented"; } |
||||
|
||||
absl::string_view GetTarget() override { return "foo"; } |
||||
|
||||
absl::string_view GetAuthority() override { return "foo"; } |
||||
|
||||
RefCountedPtr<grpc_channel_credentials> GetChannelCredentials() override { |
||||
LOG(FATAL) << "unimplemented"; |
||||
} |
||||
|
||||
RefCountedPtr<grpc_channel_credentials> GetUnsafeChannelCredentials() |
||||
override { |
||||
LOG(FATAL) << "unimplemented"; |
||||
} |
||||
|
||||
grpc_event_engine::experimental::EventEngine* GetEventEngine() override { |
||||
return helper_->event_engine_.get(); |
||||
} |
||||
|
||||
GlobalStatsPluginRegistry::StatsPluginGroup& GetStatsPluginGroup() |
||||
override { |
||||
return helper_->stats_plugin_group_; |
||||
} |
||||
|
||||
void AddTraceEvent(TraceSeverity severity, |
||||
absl::string_view message) override { |
||||
LOG(FATAL) << "unimplemented"; |
||||
} |
||||
|
||||
BenchmarkHelper* helper_; |
||||
}; |
||||
|
||||
const absl::string_view name_; |
||||
const absl::string_view config_json_; |
||||
std::shared_ptr<grpc_event_engine::experimental::EventEngine> event_engine_ = |
||||
grpc_event_engine::experimental::GetDefaultEventEngine(); |
||||
std::shared_ptr<WorkSerializer> work_serializer_ = |
||||
std::make_shared<WorkSerializer>(event_engine_); |
||||
OrphanablePtr<LoadBalancingPolicy> lb_policy_ = |
||||
CoreConfiguration::Get().lb_policy_registry().CreateLoadBalancingPolicy( |
||||
name_, LoadBalancingPolicy::Args{work_serializer_, |
||||
std::make_unique<LbHelper>(this), |
||||
ChannelArgs()}); |
||||
RefCountedPtr<LoadBalancingPolicy::Config> config_; |
||||
Mutex mu_; |
||||
CondVar cv_; |
||||
RefCountedPtr<LoadBalancingPolicy::SubchannelPicker> picker_ |
||||
ABSL_GUARDED_BY(mu_); |
||||
absl::flat_hash_set< |
||||
std::shared_ptr<SubchannelInterface::ConnectivityStateWatcherInterface>> |
||||
connectivity_watchers_ ABSL_GUARDED_BY(mu_); |
||||
GlobalStatsPluginRegistry::StatsPluginGroup stats_plugin_group_ = |
||||
GlobalStatsPluginRegistry::GetStatsPluginsForChannel( |
||||
experimental::StatsPluginChannelScope( |
||||
"foo", "foo", |
||||
grpc_event_engine::experimental::ChannelArgsEndpointConfig{ |
||||
ChannelArgs{}})); |
||||
}; |
||||
|
||||
void BM_Pick(benchmark::State& state, BenchmarkHelper& helper) { |
||||
helper.UpdateLbPolicy(state.range(0)); |
||||
auto picker = helper.GetPicker(); |
||||
for (auto _ : state) { |
||||
picker->Pick(LoadBalancingPolicy::PickArgs{ |
||||
"/foo/bar", |
||||
nullptr, |
||||
nullptr, |
||||
}); |
||||
} |
||||
} |
||||
#define PICKER_BENCHMARK(policy, config) \ |
||||
BENCHMARK_CAPTURE(BM_Pick, policy, \
|
||||
[]() -> BenchmarkHelper& { \
|
||||
static auto* helper = \
|
||||
new BenchmarkHelper(#policy, config); \
|
||||
return *helper; \
|
||||
}()) \
|
||||
->RangeMultiplier(10) \
|
||||
->Range(1, 100000) |
||||
|
||||
PICKER_BENCHMARK(pick_first, "[{\"pick_first\":{}}]"); |
||||
PICKER_BENCHMARK( |
||||
weighted_round_robin, |
||||
"[{\"weighted_round_robin\":{\"enableOobLoadReport\":false}}]"); |
||||
|
||||
} // namespace
|
||||
} // namespace grpc_core
|
||||
|
||||
// Some distros have RunSpecifiedBenchmarks under the benchmark namespace,
|
||||
// and others do not. This allows us to support both modes.
|
||||
namespace benchmark { |
||||
void RunTheBenchmarksNamespaced() { RunSpecifiedBenchmarks(); } |
||||
} // namespace benchmark
|
||||
|
||||
int main(int argc, char** argv) { |
||||
::benchmark::Initialize(&argc, argv); |
||||
grpc_init(); |
||||
benchmark::RunTheBenchmarksNamespaced(); |
||||
grpc_shutdown(); |
||||
return 0; |
||||
} |
Loading…
Reference in new issue