From b71b3b410cb1adfbd2a0495b4f325825e86df364 Mon Sep 17 00:00:00 2001 From: Craig Tiller Date: Mon, 22 May 2023 09:14:34 -0700 Subject: [PATCH] [memory] Trim down 'per-cpu' sharding a little (#33203) Most of these data structures need to scale a bit like per-cpu, but not entirely. We can have more than one cpu hit the same instance in most cases, and probably want to cap out before the hundreds of shards some platforms have. --------- Co-authored-by: ctiller --- CMakeLists.txt | 4 +++ Makefile | 2 ++ build_autogenerated.yaml | 4 +++ config.m4 | 1 + config.w32 | 1 + gRPC-Core.podspec | 1 + grpc.gemspec | 1 + grpc.gyp | 3 ++ package.xml | 1 + src/core/BUILD | 4 +++ src/core/ext/xds/xds_client_stats.h | 2 +- src/core/lib/channel/channelz.h | 2 +- src/core/lib/debug/event_log.h | 2 +- src/core/lib/debug/stats_data.h | 2 +- src/core/lib/gprpp/per_cpu.cc | 33 +++++++++++++++++++ src/core/lib/gprpp/per_cpu.h | 35 +++++++++++++++++---- src/python/grpcio/grpc_core_dependencies.py | 1 + tools/codegen/core/gen_stats_data.py | 4 ++- tools/doxygen/Doxyfile.c++.internal | 1 + tools/doxygen/Doxyfile.core.internal | 1 + 20 files changed, 94 insertions(+), 11 deletions(-) create mode 100644 src/core/lib/gprpp/per_cpu.cc diff --git a/CMakeLists.txt b/CMakeLists.txt index 593e69c6a92..af50afbce59 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2098,6 +2098,7 @@ add_library(grpc src/core/lib/experiments/config.cc src/core/lib/experiments/experiments.cc src/core/lib/gprpp/load_file.cc + src/core/lib/gprpp/per_cpu.cc src/core/lib/gprpp/status_helper.cc src/core/lib/gprpp/time.cc src/core/lib/gprpp/time_averaged_stats.cc @@ -2799,6 +2800,7 @@ add_library(grpc_unsecure src/core/lib/experiments/config.cc src/core/lib/experiments/experiments.cc src/core/lib/gprpp/load_file.cc + src/core/lib/gprpp/per_cpu.cc src/core/lib/gprpp/status_helper.cc src/core/lib/gprpp/time.cc src/core/lib/gprpp/time_averaged_stats.cc @@ -4332,6 +4334,7 @@ add_library(grpc_authorization_provider src/core/lib/experiments/config.cc src/core/lib/experiments/experiments.cc src/core/lib/gprpp/load_file.cc + src/core/lib/gprpp/per_cpu.cc src/core/lib/gprpp/status_helper.cc src/core/lib/gprpp/time.cc src/core/lib/gprpp/time_averaged_stats.cc @@ -11566,6 +11569,7 @@ add_executable(frame_test src/core/lib/experiments/config.cc src/core/lib/experiments/experiments.cc src/core/lib/gprpp/load_file.cc + src/core/lib/gprpp/per_cpu.cc src/core/lib/gprpp/status_helper.cc src/core/lib/gprpp/time.cc src/core/lib/gprpp/time_averaged_stats.cc diff --git a/Makefile b/Makefile index c950d86e080..e27291e1928 100644 --- a/Makefile +++ b/Makefile @@ -1478,6 +1478,7 @@ LIBGRPC_SRC = \ src/core/lib/experiments/config.cc \ src/core/lib/experiments/experiments.cc \ src/core/lib/gprpp/load_file.cc \ + src/core/lib/gprpp/per_cpu.cc \ src/core/lib/gprpp/status_helper.cc \ src/core/lib/gprpp/time.cc \ src/core/lib/gprpp/time_averaged_stats.cc \ @@ -2033,6 +2034,7 @@ LIBGRPC_UNSECURE_SRC = \ src/core/lib/experiments/config.cc \ src/core/lib/experiments/experiments.cc \ src/core/lib/gprpp/load_file.cc \ + src/core/lib/gprpp/per_cpu.cc \ src/core/lib/gprpp/status_helper.cc \ src/core/lib/gprpp/time.cc \ src/core/lib/gprpp/time_averaged_stats.cc \ diff --git a/build_autogenerated.yaml b/build_autogenerated.yaml index 12fa8f8905d..e315244a8b1 100644 --- a/build_autogenerated.yaml +++ b/build_autogenerated.yaml @@ -1527,6 +1527,7 @@ libs: - src/core/lib/experiments/config.cc - src/core/lib/experiments/experiments.cc - src/core/lib/gprpp/load_file.cc + - src/core/lib/gprpp/per_cpu.cc - src/core/lib/gprpp/status_helper.cc - src/core/lib/gprpp/time.cc - src/core/lib/gprpp/time_averaged_stats.cc @@ -2514,6 +2515,7 @@ libs: - src/core/lib/experiments/config.cc - src/core/lib/experiments/experiments.cc - src/core/lib/gprpp/load_file.cc + - src/core/lib/gprpp/per_cpu.cc - src/core/lib/gprpp/status_helper.cc - src/core/lib/gprpp/time.cc - src/core/lib/gprpp/time_averaged_stats.cc @@ -3891,6 +3893,7 @@ libs: - src/core/lib/experiments/config.cc - src/core/lib/experiments/experiments.cc - src/core/lib/gprpp/load_file.cc + - src/core/lib/gprpp/per_cpu.cc - src/core/lib/gprpp/status_helper.cc - src/core/lib/gprpp/time.cc - src/core/lib/gprpp/time_averaged_stats.cc @@ -7812,6 +7815,7 @@ targets: - src/core/lib/experiments/config.cc - src/core/lib/experiments/experiments.cc - src/core/lib/gprpp/load_file.cc + - src/core/lib/gprpp/per_cpu.cc - src/core/lib/gprpp/status_helper.cc - src/core/lib/gprpp/time.cc - src/core/lib/gprpp/time_averaged_stats.cc diff --git a/config.m4 b/config.m4 index 557bbb23dce..2575a5b4aca 100644 --- a/config.m4 +++ b/config.m4 @@ -594,6 +594,7 @@ if test "$PHP_GRPC" != "no"; then src/core/lib/gprpp/linux/env.cc \ src/core/lib/gprpp/load_file.cc \ src/core/lib/gprpp/mpscq.cc \ + src/core/lib/gprpp/per_cpu.cc \ src/core/lib/gprpp/posix/env.cc \ src/core/lib/gprpp/posix/stat.cc \ src/core/lib/gprpp/posix/thd.cc \ diff --git a/config.w32 b/config.w32 index 4049ab7309b..c7d6232e7dc 100644 --- a/config.w32 +++ b/config.w32 @@ -559,6 +559,7 @@ if (PHP_GRPC != "no") { "src\\core\\lib\\gprpp\\linux\\env.cc " + "src\\core\\lib\\gprpp\\load_file.cc " + "src\\core\\lib\\gprpp\\mpscq.cc " + + "src\\core\\lib\\gprpp\\per_cpu.cc " + "src\\core\\lib\\gprpp\\posix\\env.cc " + "src\\core\\lib\\gprpp\\posix\\stat.cc " + "src\\core\\lib\\gprpp\\posix\\thd.cc " + diff --git a/gRPC-Core.podspec b/gRPC-Core.podspec index 54c40fc247a..d05939de6a2 100644 --- a/gRPC-Core.podspec +++ b/gRPC-Core.podspec @@ -1311,6 +1311,7 @@ Pod::Spec.new do |s| 'src/core/lib/gprpp/orphanable.h', 'src/core/lib/gprpp/overload.h', 'src/core/lib/gprpp/packed_table.h', + 'src/core/lib/gprpp/per_cpu.cc', 'src/core/lib/gprpp/per_cpu.h', 'src/core/lib/gprpp/posix/env.cc', 'src/core/lib/gprpp/posix/stat.cc', diff --git a/grpc.gemspec b/grpc.gemspec index cc5629fd796..7e5b9a3da5c 100644 --- a/grpc.gemspec +++ b/grpc.gemspec @@ -1217,6 +1217,7 @@ Gem::Specification.new do |s| s.files += %w( src/core/lib/gprpp/orphanable.h ) s.files += %w( src/core/lib/gprpp/overload.h ) s.files += %w( src/core/lib/gprpp/packed_table.h ) + s.files += %w( src/core/lib/gprpp/per_cpu.cc ) s.files += %w( src/core/lib/gprpp/per_cpu.h ) s.files += %w( src/core/lib/gprpp/posix/env.cc ) s.files += %w( src/core/lib/gprpp/posix/stat.cc ) diff --git a/grpc.gyp b/grpc.gyp index e9722e6f3f2..b047a97fd11 100644 --- a/grpc.gyp +++ b/grpc.gyp @@ -781,6 +781,7 @@ 'src/core/lib/experiments/config.cc', 'src/core/lib/experiments/experiments.cc', 'src/core/lib/gprpp/load_file.cc', + 'src/core/lib/gprpp/per_cpu.cc', 'src/core/lib/gprpp/status_helper.cc', 'src/core/lib/gprpp/time.cc', 'src/core/lib/gprpp/time_averaged_stats.cc', @@ -1275,6 +1276,7 @@ 'src/core/lib/experiments/config.cc', 'src/core/lib/experiments/experiments.cc', 'src/core/lib/gprpp/load_file.cc', + 'src/core/lib/gprpp/per_cpu.cc', 'src/core/lib/gprpp/status_helper.cc', 'src/core/lib/gprpp/time.cc', 'src/core/lib/gprpp/time_averaged_stats.cc', @@ -1793,6 +1795,7 @@ 'src/core/lib/experiments/config.cc', 'src/core/lib/experiments/experiments.cc', 'src/core/lib/gprpp/load_file.cc', + 'src/core/lib/gprpp/per_cpu.cc', 'src/core/lib/gprpp/status_helper.cc', 'src/core/lib/gprpp/time.cc', 'src/core/lib/gprpp/time_averaged_stats.cc', diff --git a/package.xml b/package.xml index ff04fd18946..72ee09f1e2a 100644 --- a/package.xml +++ b/package.xml @@ -1199,6 +1199,7 @@ + diff --git a/src/core/BUILD b/src/core/BUILD index 240e6c362ca..81fb36f5afd 100644 --- a/src/core/BUILD +++ b/src/core/BUILD @@ -2395,10 +2395,14 @@ grpc_cc_library( grpc_cc_library( name = "per_cpu", + srcs = [ + "lib/gprpp/per_cpu.cc", + ], hdrs = [ "lib/gprpp/per_cpu.h", ], deps = [ + "useful", "//:exec_ctx", "//:gpr", ], diff --git a/src/core/ext/xds/xds_client_stats.h b/src/core/ext/xds/xds_client_stats.h index d2f592e41e6..3f3a776f613 100644 --- a/src/core/ext/xds/xds_client_stats.h +++ b/src/core/ext/xds/xds_client_stats.h @@ -239,7 +239,7 @@ class XdsClusterLocalityStats : public RefCounted { absl::string_view cluster_name_; absl::string_view eds_service_name_; RefCountedPtr name_; - PerCpu stats_{32}; + PerCpu stats_{PerCpuOptions().SetMaxShards(32).SetCpusPerShard(4)}; }; } // namespace grpc_core diff --git a/src/core/lib/channel/channelz.h b/src/core/lib/channel/channelz.h index f226188c999..772aa0fcde0 100644 --- a/src/core/lib/channel/channelz.h +++ b/src/core/lib/channel/channelz.h @@ -158,7 +158,7 @@ class PerCpuCallCountingHelper { std::atomic calls_failed{0}; std::atomic last_call_started_cycle{0}; }; - PerCpu per_cpu_data_; + PerCpu per_cpu_data_{PerCpuOptions().SetCpusPerShard(4)}; }; // Handles channelz bookkeeping for channels diff --git a/src/core/lib/debug/event_log.h b/src/core/lib/debug/event_log.h index e6669f03c4b..3458d95ea7d 100644 --- a/src/core/lib/debug/event_log.h +++ b/src/core/lib/debug/event_log.h @@ -71,7 +71,7 @@ class EventLog { std::vector EndCollection( absl::Span wanted_events); - PerCpu fragments_; + PerCpu fragments_{PerCpuOptions().SetCpusPerShard(2)}; gpr_cycle_counter collection_begin_; static std::atomic g_instance_; }; diff --git a/src/core/lib/debug/stats_data.h b/src/core/lib/debug/stats_data.h index ba4943eb0c1..c9e4d349eb5 100644 --- a/src/core/lib/debug/stats_data.h +++ b/src/core/lib/debug/stats_data.h @@ -294,7 +294,7 @@ class GlobalStatsCollector { HistogramCollector_16777216_20 http2_send_message_size; HistogramCollector_65536_26 http2_metadata_size; }; - PerCpu data_; + PerCpu data_{PerCpuOptions().SetCpusPerShard(4).SetMaxShards(32)}; }; } // namespace grpc_core diff --git a/src/core/lib/gprpp/per_cpu.cc b/src/core/lib/gprpp/per_cpu.cc new file mode 100644 index 00000000000..ee2edae764b --- /dev/null +++ b/src/core/lib/gprpp/per_cpu.cc @@ -0,0 +1,33 @@ +// Copyright 2023 gRPC authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "src/core/lib/gprpp/per_cpu.h" + +#include + +#include "src/core/lib/gpr/useful.h" + +namespace grpc_core { + +size_t PerCpuOptions::Shards() { + return ShardsForCpuCount(gpr_cpu_num_cores()); +} + +size_t PerCpuOptions::ShardsForCpuCount(size_t cpu_count) { + return Clamp(cpu_count / cpus_per_shard_, 1, max_shards_); +} + +} // namespace grpc_core diff --git a/src/core/lib/gprpp/per_cpu.h b/src/core/lib/gprpp/per_cpu.h index 2476b6f370f..5d287f8c7f0 100644 --- a/src/core/lib/gprpp/per_cpu.h +++ b/src/core/lib/gprpp/per_cpu.h @@ -22,18 +22,41 @@ #include #include -#include - #include "src/core/lib/iomgr/exec_ctx.h" namespace grpc_core { +class PerCpuOptions { + public: + // Set the number of cpus that colocate on the same shard + PerCpuOptions SetCpusPerShard(size_t cpus_per_shard) { + cpus_per_shard_ = std::max(1, cpus_per_shard); + return *this; + } + + // Set the maximum number of allowable shards + PerCpuOptions SetMaxShards(size_t max_shards) { + max_shards_ = std::max(1, max_shards); + return *this; + } + + size_t cpus_per_shard() const { return cpus_per_shard_; } + size_t max_shards() const { return max_shards_; } + + size_t Shards(); + size_t ShardsForCpuCount(size_t cpu_count); + + private: + size_t cpus_per_shard_ = 1; + size_t max_shards_ = std::numeric_limits::max(); +}; + template class PerCpu { public: - explicit PerCpu(size_t max = std::numeric_limits::max()) - : cpus_(std::min(max, gpr_cpu_num_cores())), - data_{new T[cpus_]} {} + // Options are not defaulted to try and force consideration of what the + // options specify. + explicit PerCpu(PerCpuOptions options) : cpus_(options.Shards()) {} T& this_cpu() { return data_[ExecCtx::Get()->starting_cpu() % cpus_]; } @@ -44,7 +67,7 @@ class PerCpu { private: const size_t cpus_; - std::unique_ptr data_; + std::unique_ptr data_{new T[cpus_]}; }; } // namespace grpc_core diff --git a/src/python/grpcio/grpc_core_dependencies.py b/src/python/grpcio/grpc_core_dependencies.py index 6f3165a3152..c61a17daaa4 100644 --- a/src/python/grpcio/grpc_core_dependencies.py +++ b/src/python/grpcio/grpc_core_dependencies.py @@ -568,6 +568,7 @@ CORE_SOURCE_FILES = [ 'src/core/lib/gprpp/linux/env.cc', 'src/core/lib/gprpp/load_file.cc', 'src/core/lib/gprpp/mpscq.cc', + 'src/core/lib/gprpp/per_cpu.cc', 'src/core/lib/gprpp/posix/env.cc', 'src/core/lib/gprpp/posix/stat.cc', 'src/core/lib/gprpp/posix/thd.cc', diff --git a/tools/codegen/core/gen_stats_data.py b/tools/codegen/core/gen_stats_data.py index 85e22146f76..f1e979760d0 100755 --- a/tools/codegen/core/gen_stats_data.py +++ b/tools/codegen/core/gen_stats_data.py @@ -369,7 +369,9 @@ with open('src/core/lib/debug/stats_data.h', 'w') as H: (ctr.max, ctr.buckets, ctr.name), file=H) print(" };", file=H) - print(" PerCpu data_;", file=H) + print( + " PerCpu data_{PerCpuOptions().SetCpusPerShard(4).SetMaxShards(32)};", + file=H) print("};", file=H) print("}", file=H) diff --git a/tools/doxygen/Doxyfile.c++.internal b/tools/doxygen/Doxyfile.c++.internal index 7eed490f54c..85fa376ed3c 100644 --- a/tools/doxygen/Doxyfile.c++.internal +++ b/tools/doxygen/Doxyfile.c++.internal @@ -2213,6 +2213,7 @@ src/core/lib/gprpp/notification.h \ src/core/lib/gprpp/orphanable.h \ src/core/lib/gprpp/overload.h \ src/core/lib/gprpp/packed_table.h \ +src/core/lib/gprpp/per_cpu.cc \ src/core/lib/gprpp/per_cpu.h \ src/core/lib/gprpp/posix/env.cc \ src/core/lib/gprpp/posix/stat.cc \ diff --git a/tools/doxygen/Doxyfile.core.internal b/tools/doxygen/Doxyfile.core.internal index a8c4e9d7131..afc79ccc943 100644 --- a/tools/doxygen/Doxyfile.core.internal +++ b/tools/doxygen/Doxyfile.core.internal @@ -1993,6 +1993,7 @@ src/core/lib/gprpp/notification.h \ src/core/lib/gprpp/orphanable.h \ src/core/lib/gprpp/overload.h \ src/core/lib/gprpp/packed_table.h \ +src/core/lib/gprpp/per_cpu.cc \ src/core/lib/gprpp/per_cpu.h \ src/core/lib/gprpp/posix/env.cc \ src/core/lib/gprpp/posix/stat.cc \