mirror of https://github.com/grpc/grpc.git
[EventEngine] Improve lock contention in WorkStealingThreadPool (alternative) (#34065)
Proposed alternative to https://github.com/grpc/grpc/pull/34024. This version has a simpler, faster busy-count implementation based on a sharded set of atomic counts: fast increment/decrement operations, relatively slower summation of total counts (which need to happen much less frequently).pull/33175/head
parent
b85b57fdc7
commit
108af0a94f
21 changed files with 489 additions and 153 deletions
@ -0,0 +1,58 @@ |
|||||||
|
// Copyright 2023 The gRPC Authors
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
#include <grpc/support/port_platform.h> |
||||||
|
|
||||||
|
#include "src/core/lib/event_engine/thread_pool/thread_count.h" |
||||||
|
|
||||||
|
#include <inttypes.h> |
||||||
|
|
||||||
|
#include "absl/time/clock.h" |
||||||
|
#include "absl/time/time.h" |
||||||
|
|
||||||
|
#include <grpc/support/log.h> |
||||||
|
|
||||||
|
namespace grpc_event_engine { |
||||||
|
namespace experimental { |
||||||
|
|
||||||
|
// -------- LivingThreadCount --------
|
||||||
|
|
||||||
|
void LivingThreadCount::BlockUntilThreadCount(size_t desired_threads, |
||||||
|
const char* why) { |
||||||
|
constexpr grpc_core::Duration log_rate = grpc_core::Duration::Seconds(3); |
||||||
|
while (true) { |
||||||
|
auto curr_threads = WaitForCountChange(desired_threads, log_rate); |
||||||
|
if (curr_threads == desired_threads) break; |
||||||
|
GRPC_LOG_EVERY_N_SEC_DELAYED( |
||||||
|
log_rate.seconds(), GPR_DEBUG, |
||||||
|
"Waiting for thread pool to idle before %s. (%" PRIdPTR " to %" PRIdPTR |
||||||
|
")", |
||||||
|
why, curr_threads, desired_threads); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
size_t LivingThreadCount::WaitForCountChange(size_t desired_threads, |
||||||
|
grpc_core::Duration timeout) { |
||||||
|
size_t count; |
||||||
|
auto deadline = absl::Now() + absl::Milliseconds(timeout.millis()); |
||||||
|
do { |
||||||
|
grpc_core::MutexLock lock(&mu_); |
||||||
|
count = CountLocked(); |
||||||
|
if (count == desired_threads) break; |
||||||
|
cv_.WaitWithDeadline(&mu_, deadline); |
||||||
|
} while (absl::Now() < deadline); |
||||||
|
return count; |
||||||
|
} |
||||||
|
|
||||||
|
} // namespace experimental
|
||||||
|
} // namespace grpc_event_engine
|
@ -0,0 +1,161 @@ |
|||||||
|
// Copyright 2023 The gRPC Authors
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
#ifndef GRPC_SRC_CORE_LIB_EVENT_ENGINE_THREAD_POOL_THREAD_COUNT_H |
||||||
|
#define GRPC_SRC_CORE_LIB_EVENT_ENGINE_THREAD_POOL_THREAD_COUNT_H |
||||||
|
|
||||||
|
#include <grpc/support/port_platform.h> |
||||||
|
|
||||||
|
#include <atomic> |
||||||
|
#include <cstddef> |
||||||
|
#include <numeric> |
||||||
|
#include <utility> |
||||||
|
#include <vector> |
||||||
|
|
||||||
|
#include "absl/base/thread_annotations.h" |
||||||
|
|
||||||
|
#include <grpc/support/cpu.h> |
||||||
|
|
||||||
|
#include "src/core/lib/gpr/useful.h" |
||||||
|
#include "src/core/lib/gprpp/sync.h" |
||||||
|
#include "src/core/lib/gprpp/time.h" |
||||||
|
|
||||||
|
namespace grpc_event_engine { |
||||||
|
namespace experimental { |
||||||
|
|
||||||
|
// Tracks counts across some fixed number of shards.
|
||||||
|
// It is intended for fast increment/decrement operations, but a slower overall
|
||||||
|
// count operation.
|
||||||
|
class BusyThreadCount { |
||||||
|
public: |
||||||
|
// Increments a per-shard counter on construction, decrements on destruction.
|
||||||
|
class AutoThreadCounter { |
||||||
|
public: |
||||||
|
AutoThreadCounter(BusyThreadCount* counter, size_t idx) |
||||||
|
: counter_(counter), idx_(idx) { |
||||||
|
counter_->Increment(idx_); |
||||||
|
} |
||||||
|
~AutoThreadCounter() { |
||||||
|
if (counter_ != nullptr) counter_->Decrement(idx_); |
||||||
|
} |
||||||
|
// not copyable
|
||||||
|
AutoThreadCounter(const AutoThreadCounter&) = delete; |
||||||
|
AutoThreadCounter& operator=(const AutoThreadCounter&) = delete; |
||||||
|
// moveable
|
||||||
|
AutoThreadCounter(AutoThreadCounter&& other) noexcept { |
||||||
|
counter_ = std::exchange(other.counter_, nullptr); |
||||||
|
idx_ = other.idx_; |
||||||
|
} |
||||||
|
AutoThreadCounter& operator=(AutoThreadCounter&& other) noexcept { |
||||||
|
counter_ = std::exchange(other.counter_, nullptr); |
||||||
|
idx_ = other.idx_; |
||||||
|
return *this; |
||||||
|
} |
||||||
|
|
||||||
|
private: |
||||||
|
BusyThreadCount* counter_; |
||||||
|
size_t idx_; |
||||||
|
}; |
||||||
|
|
||||||
|
BusyThreadCount() : shards_(grpc_core::Clamp(gpr_cpu_num_cores(), 2u, 64u)) {} |
||||||
|
AutoThreadCounter MakeAutoThreadCounter(size_t idx) { |
||||||
|
return AutoThreadCounter(this, idx); |
||||||
|
}; |
||||||
|
void Increment(size_t idx) { |
||||||
|
shards_[idx].busy_count.fetch_add(1, std::memory_order_relaxed); |
||||||
|
} |
||||||
|
void Decrement(size_t idx) { |
||||||
|
shards_[idx].busy_count.fetch_sub(1, std::memory_order_relaxed); |
||||||
|
} |
||||||
|
size_t count() { |
||||||
|
return std::accumulate( |
||||||
|
shards_.begin(), shards_.end(), 0, [](size_t running, ShardedData& d) { |
||||||
|
return running + d.busy_count.load(std::memory_order_relaxed); |
||||||
|
}); |
||||||
|
} |
||||||
|
// Returns some valid index into the per-shard data, which is rotated on every
|
||||||
|
// call to distribute load and reduce contention.
|
||||||
|
size_t NextIndex() { return next_idx_.fetch_add(1) % shards_.size(); } |
||||||
|
|
||||||
|
private: |
||||||
|
struct ShardedData { |
||||||
|
std::atomic<size_t> busy_count{0}; |
||||||
|
} GPR_ALIGN_STRUCT(GPR_CACHELINE_SIZE); |
||||||
|
|
||||||
|
std::vector<ShardedData> shards_; |
||||||
|
std::atomic<size_t> next_idx_{0}; |
||||||
|
}; |
||||||
|
|
||||||
|
// Tracks the number of living threads. It is intended for a fast count
|
||||||
|
// operation, with relatively slower increment/decrement operations.
|
||||||
|
class LivingThreadCount { |
||||||
|
public: |
||||||
|
// Increments the global counter on construction, decrements on destruction.
|
||||||
|
class AutoThreadCounter { |
||||||
|
public: |
||||||
|
explicit AutoThreadCounter(LivingThreadCount* counter) : counter_(counter) { |
||||||
|
counter_->Increment(); |
||||||
|
} |
||||||
|
~AutoThreadCounter() { |
||||||
|
if (counter_ != nullptr) counter_->Decrement(); |
||||||
|
} |
||||||
|
// not copyable
|
||||||
|
AutoThreadCounter(const AutoThreadCounter&) = delete; |
||||||
|
AutoThreadCounter& operator=(const AutoThreadCounter&) = delete; |
||||||
|
// moveable
|
||||||
|
AutoThreadCounter(AutoThreadCounter&& other) noexcept { |
||||||
|
counter_ = std::exchange(other.counter_, nullptr); |
||||||
|
} |
||||||
|
AutoThreadCounter& operator=(AutoThreadCounter&& other) noexcept { |
||||||
|
counter_ = std::exchange(other.counter_, nullptr); |
||||||
|
return *this; |
||||||
|
} |
||||||
|
|
||||||
|
private: |
||||||
|
LivingThreadCount* counter_; |
||||||
|
}; |
||||||
|
|
||||||
|
AutoThreadCounter MakeAutoThreadCounter() { return AutoThreadCounter(this); }; |
||||||
|
void Increment() ABSL_LOCKS_EXCLUDED(mu_) { |
||||||
|
grpc_core::MutexLock lock(&mu_); |
||||||
|
++living_count_; |
||||||
|
cv_.SignalAll(); |
||||||
|
} |
||||||
|
void Decrement() ABSL_LOCKS_EXCLUDED(mu_) { |
||||||
|
grpc_core::MutexLock lock(&mu_); |
||||||
|
--living_count_; |
||||||
|
cv_.SignalAll(); |
||||||
|
} |
||||||
|
void BlockUntilThreadCount(size_t desired_threads, const char* why) |
||||||
|
ABSL_LOCKS_EXCLUDED(mu_); |
||||||
|
size_t count() ABSL_LOCKS_EXCLUDED(mu_) { |
||||||
|
grpc_core::MutexLock lock(&mu_); |
||||||
|
return CountLocked(); |
||||||
|
} |
||||||
|
|
||||||
|
private: |
||||||
|
size_t CountLocked() ABSL_EXCLUSIVE_LOCKS_REQUIRED(mu_) { |
||||||
|
return living_count_; |
||||||
|
} |
||||||
|
size_t WaitForCountChange(size_t desired_threads, |
||||||
|
grpc_core::Duration timeout); |
||||||
|
|
||||||
|
grpc_core::Mutex mu_; |
||||||
|
grpc_core::CondVar cv_ ABSL_GUARDED_BY(mu_); |
||||||
|
size_t living_count_ ABSL_GUARDED_BY(mu_) = 0; |
||||||
|
}; |
||||||
|
|
||||||
|
} // namespace experimental
|
||||||
|
} // namespace grpc_event_engine
|
||||||
|
|
||||||
|
#endif // GRPC_SRC_CORE_LIB_EVENT_ENGINE_THREAD_POOL_THREAD_COUNT_H
|
Loading…
Reference in new issue