From f3dec9c995e5c2b85460107a9ad9d937facd9a49 Mon Sep 17 00:00:00 2001 From: Craig Tiller Date: Tue, 21 Feb 2017 10:02:31 -0800 Subject: [PATCH 1/3] Add counters for rmw atomic operations to microbenchmarks --- Makefile | 2 +- build.yaml | 2 +- include/grpc/impl/codegen/atm_gcc_atomic.h | 30 +++++++++---- src/core/lib/support/sync_posix.c | 9 ++-- test/cpp/microbenchmarks/bm_closure.cc | 51 ++++++++++++++++++++++ test/cpp/microbenchmarks/bm_fullstack.cc | 20 ++++++--- 6 files changed, 92 insertions(+), 22 deletions(-) diff --git a/Makefile b/Makefile index 8f92816672b..d47d967e477 100644 --- a/Makefile +++ b/Makefile @@ -217,7 +217,7 @@ CC_counters = $(DEFAULT_CC) CXX_counters = $(DEFAULT_CXX) LD_counters = $(DEFAULT_CC) LDXX_counters = $(DEFAULT_CXX) -CPPFLAGS_counters = -O2 -DGPR_MU_COUNTERS +CPPFLAGS_counters = -O2 -DGPR_LOW_LEVEL_COUNTERS DEFINES_counters = NDEBUG diff --git a/build.yaml b/build.yaml index 141526cb594..120c7a85b1e 100644 --- a/build.yaml +++ b/build.yaml @@ -3919,7 +3919,7 @@ configs: CPPFLAGS: -O2 -DGRPC_BASIC_PROFILER -DGRPC_TIMERS_RDTSC DEFINES: NDEBUG counters: - CPPFLAGS: -O2 -DGPR_MU_COUNTERS + CPPFLAGS: -O2 -DGPR_LOW_LEVEL_COUNTERS DEFINES: NDEBUG dbg: CPPFLAGS: -O0 diff --git a/include/grpc/impl/codegen/atm_gcc_atomic.h b/include/grpc/impl/codegen/atm_gcc_atomic.h index 7d4ae98cf78..e5a623f723c 100644 --- a/include/grpc/impl/codegen/atm_gcc_atomic.h +++ b/include/grpc/impl/codegen/atm_gcc_atomic.h @@ -40,6 +40,15 @@ typedef intptr_t gpr_atm; +#ifdef GPR_LOW_LEVEL_COUNTERS +extern gpr_atm gpr_counter_rmw; +#define GPR_ATM_INC_COUNTER(counter) \ + __atomic_fetch_add(&counter, 1, __ATOMIC_RELAXED) +#define GPR_ATM_INC_RMW_THEN(blah) (GPR_ATM_INC_COUNTER(gpr_counter_rmw), blah) +#else +#define GPR_ATM_INC_RMW_THEN(blah) blah +#endif + #define gpr_atm_full_barrier() (__atomic_thread_fence(__ATOMIC_SEQ_CST)) #define gpr_atm_acq_load(p) (__atomic_load_n((p), __ATOMIC_ACQUIRE)) @@ -50,25 +59,28 @@ typedef intptr_t gpr_atm; (__atomic_store_n((p), (intptr_t)(value), __ATOMIC_RELAXED)) #define gpr_atm_no_barrier_fetch_add(p, delta) \ - (__atomic_fetch_add((p), (intptr_t)(delta), __ATOMIC_RELAXED)) + GPR_ATM_INC_RMW_THEN( \ + __atomic_fetch_add((p), (intptr_t)(delta), __ATOMIC_RELAXED)) #define gpr_atm_full_fetch_add(p, delta) \ - (__atomic_fetch_add((p), (intptr_t)(delta), __ATOMIC_ACQ_REL)) + GPR_ATM_INC_RMW_THEN( \ + __atomic_fetch_add((p), (intptr_t)(delta), __ATOMIC_ACQ_REL)) static __inline int gpr_atm_no_barrier_cas(gpr_atm *p, gpr_atm o, gpr_atm n) { - return __atomic_compare_exchange_n(p, &o, n, 0, __ATOMIC_RELAXED, - __ATOMIC_RELAXED); + return GPR_ATM_INC_RMW_THEN(__atomic_compare_exchange_n( + p, &o, n, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED)); } static __inline int gpr_atm_acq_cas(gpr_atm *p, gpr_atm o, gpr_atm n) { - return __atomic_compare_exchange_n(p, &o, n, 0, __ATOMIC_ACQUIRE, - __ATOMIC_RELAXED); + return GPR_ATM_INC_RMW_THEN(__atomic_compare_exchange_n( + p, &o, n, 0, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)); } static __inline int gpr_atm_rel_cas(gpr_atm *p, gpr_atm o, gpr_atm n) { - return __atomic_compare_exchange_n(p, &o, n, 0, __ATOMIC_RELEASE, - __ATOMIC_RELAXED); + return GPR_ATM_INC_RMW_THEN(__atomic_compare_exchange_n( + p, &o, n, 0, __ATOMIC_RELEASE, __ATOMIC_RELAXED)); } -#define gpr_atm_full_xchg(p, n) __atomic_exchange_n((p), (n), __ATOMIC_ACQ_REL) +#define gpr_atm_full_xchg(p, n) \ + GPR_ATM_INC_RMW_THEN(__atomic_exchange_n((p), (n), __ATOMIC_ACQ_REL)) #endif /* GRPC_IMPL_CODEGEN_ATM_GCC_ATOMIC_H */ diff --git a/src/core/lib/support/sync_posix.c b/src/core/lib/support/sync_posix.c index de0f0484b57..3b7d7806082 100644 --- a/src/core/lib/support/sync_posix.c +++ b/src/core/lib/support/sync_posix.c @@ -42,8 +42,9 @@ #include #include "src/core/lib/profiling/timers.h" -#ifdef GPR_MU_COUNTERS -gpr_atm grpc_mu_locks = 0; +#ifdef GPR_LOW_LEVEL_COUNTERS +gpr_atm gpr_mu_locks = 0; +gpr_atm gpr_counter_rmw = 0; #endif void gpr_mu_init(gpr_mu* mu) { GPR_ASSERT(pthread_mutex_init(mu, NULL) == 0); } @@ -51,8 +52,8 @@ void gpr_mu_init(gpr_mu* mu) { GPR_ASSERT(pthread_mutex_init(mu, NULL) == 0); } void gpr_mu_destroy(gpr_mu* mu) { GPR_ASSERT(pthread_mutex_destroy(mu) == 0); } void gpr_mu_lock(gpr_mu* mu) { -#ifdef GPR_MU_COUNTERS - gpr_atm_no_barrier_fetch_add(&grpc_mu_locks, 1); +#ifdef GPR_LOW_LEVEL_COUNTERS + GPR_ATM_INC_COUNTER(gpr_mu_locks); #endif GPR_TIMER_BEGIN("gpr_mu_lock", 0); GPR_ASSERT(pthread_mutex_lock(mu) == 0); diff --git a/test/cpp/microbenchmarks/bm_closure.cc b/test/cpp/microbenchmarks/bm_closure.cc index 80d6610e13b..16d05781bb4 100644 --- a/test/cpp/microbenchmarks/bm_closure.cc +++ b/test/cpp/microbenchmarks/bm_closure.cc @@ -43,13 +43,46 @@ extern "C" { #include "third_party/benchmark/include/benchmark/benchmark.h" +#include + +#ifdef GPR_LOW_LEVEL_COUNTERS +extern "C" gpr_atm gpr_mu_locks; +#endif + static class InitializeStuff { public: InitializeStuff() { grpc_init(); } ~InitializeStuff() { grpc_shutdown(); } } initialize_stuff; +class TrackCounters { + public: + TrackCounters(benchmark::State& state) : state_(state) {} + + ~TrackCounters() { + std::ostringstream out; +#ifdef GPR_LOW_LEVEL_COUNTERS + out << " locks/iter:" << ((double)(gpr_atm_no_barrier_load(&gpr_mu_locks) - + mu_locks_at_start_) / + (double)state_.iterations()) + << " atm_rmw/iter:" + << ((double)(gpr_atm_no_barrier_load(&gpr_counter_rmw) - + rmw_at_start_) / + (double)state_.iterations()); +#endif + state_.SetLabel(out.str()); + } + + private: + benchmark::State& state_; +#ifdef GPR_LOW_LEVEL_COUNTERS + const size_t mu_locks_at_start_ = gpr_atm_no_barrier_load(&gpr_mu_locks); + const size_t rmw_at_start_ = gpr_atm_no_barrier_load(&gpr_counter_rmw); +#endif +}; + static void BM_NoOpExecCtx(benchmark::State& state) { + TrackCounters track_counters(state); while (state.KeepRunning()) { grpc_exec_ctx exec_ctx = GRPC_EXEC_CTX_INIT; grpc_exec_ctx_finish(&exec_ctx); @@ -58,6 +91,7 @@ static void BM_NoOpExecCtx(benchmark::State& state) { BENCHMARK(BM_NoOpExecCtx); static void BM_WellFlushed(benchmark::State& state) { + TrackCounters track_counters(state); grpc_exec_ctx exec_ctx = GRPC_EXEC_CTX_INIT; while (state.KeepRunning()) { grpc_exec_ctx_flush(&exec_ctx); @@ -69,6 +103,7 @@ BENCHMARK(BM_WellFlushed); static void DoNothing(grpc_exec_ctx* exec_ctx, void* arg, grpc_error* error) {} static void BM_ClosureInitAgainstExecCtx(benchmark::State& state) { + TrackCounters track_counters(state); grpc_closure c; while (state.KeepRunning()) { benchmark::DoNotOptimize( @@ -78,6 +113,7 @@ static void BM_ClosureInitAgainstExecCtx(benchmark::State& state) { BENCHMARK(BM_ClosureInitAgainstExecCtx); static void BM_ClosureInitAgainstCombiner(benchmark::State& state) { + TrackCounters track_counters(state); grpc_combiner* combiner = grpc_combiner_create(NULL); grpc_closure c; grpc_exec_ctx exec_ctx = GRPC_EXEC_CTX_INIT; @@ -91,6 +127,7 @@ static void BM_ClosureInitAgainstCombiner(benchmark::State& state) { BENCHMARK(BM_ClosureInitAgainstCombiner); static void BM_ClosureRunOnExecCtx(benchmark::State& state) { + TrackCounters track_counters(state); grpc_closure c; grpc_closure_init(&c, DoNothing, NULL, grpc_schedule_on_exec_ctx); grpc_exec_ctx exec_ctx = GRPC_EXEC_CTX_INIT; @@ -103,6 +140,7 @@ static void BM_ClosureRunOnExecCtx(benchmark::State& state) { BENCHMARK(BM_ClosureRunOnExecCtx); static void BM_ClosureCreateAndRun(benchmark::State& state) { + TrackCounters track_counters(state); grpc_exec_ctx exec_ctx = GRPC_EXEC_CTX_INIT; while (state.KeepRunning()) { grpc_closure_run(&exec_ctx, grpc_closure_create(DoNothing, NULL, @@ -114,6 +152,7 @@ static void BM_ClosureCreateAndRun(benchmark::State& state) { BENCHMARK(BM_ClosureCreateAndRun); static void BM_ClosureInitAndRun(benchmark::State& state) { + TrackCounters track_counters(state); grpc_exec_ctx exec_ctx = GRPC_EXEC_CTX_INIT; grpc_closure c; while (state.KeepRunning()) { @@ -126,6 +165,7 @@ static void BM_ClosureInitAndRun(benchmark::State& state) { BENCHMARK(BM_ClosureInitAndRun); static void BM_ClosureSchedOnExecCtx(benchmark::State& state) { + TrackCounters track_counters(state); grpc_closure c; grpc_closure_init(&c, DoNothing, NULL, grpc_schedule_on_exec_ctx); grpc_exec_ctx exec_ctx = GRPC_EXEC_CTX_INIT; @@ -138,6 +178,7 @@ static void BM_ClosureSchedOnExecCtx(benchmark::State& state) { BENCHMARK(BM_ClosureSchedOnExecCtx); static void BM_ClosureSched2OnExecCtx(benchmark::State& state) { + TrackCounters track_counters(state); grpc_closure c1; grpc_closure c2; grpc_closure_init(&c1, DoNothing, NULL, grpc_schedule_on_exec_ctx); @@ -153,6 +194,7 @@ static void BM_ClosureSched2OnExecCtx(benchmark::State& state) { BENCHMARK(BM_ClosureSched2OnExecCtx); static void BM_ClosureSched3OnExecCtx(benchmark::State& state) { + TrackCounters track_counters(state); grpc_closure c1; grpc_closure c2; grpc_closure c3; @@ -171,6 +213,7 @@ static void BM_ClosureSched3OnExecCtx(benchmark::State& state) { BENCHMARK(BM_ClosureSched3OnExecCtx); static void BM_AcquireMutex(benchmark::State& state) { + TrackCounters track_counters(state); // for comparison with the combiner stuff below gpr_mu mu; gpr_mu_init(&mu); @@ -185,6 +228,7 @@ static void BM_AcquireMutex(benchmark::State& state) { BENCHMARK(BM_AcquireMutex); static void BM_ClosureSchedOnCombiner(benchmark::State& state) { + TrackCounters track_counters(state); grpc_combiner* combiner = grpc_combiner_create(NULL); grpc_closure c; grpc_closure_init(&c, DoNothing, NULL, @@ -200,6 +244,7 @@ static void BM_ClosureSchedOnCombiner(benchmark::State& state) { BENCHMARK(BM_ClosureSchedOnCombiner); static void BM_ClosureSched2OnCombiner(benchmark::State& state) { + TrackCounters track_counters(state); grpc_combiner* combiner = grpc_combiner_create(NULL); grpc_closure c1; grpc_closure c2; @@ -219,6 +264,7 @@ static void BM_ClosureSched2OnCombiner(benchmark::State& state) { BENCHMARK(BM_ClosureSched2OnCombiner); static void BM_ClosureSched3OnCombiner(benchmark::State& state) { + TrackCounters track_counters(state); grpc_combiner* combiner = grpc_combiner_create(NULL); grpc_closure c1; grpc_closure c2; @@ -242,6 +288,7 @@ static void BM_ClosureSched3OnCombiner(benchmark::State& state) { BENCHMARK(BM_ClosureSched3OnCombiner); static void BM_ClosureSched2OnTwoCombiners(benchmark::State& state) { + TrackCounters track_counters(state); grpc_combiner* combiner1 = grpc_combiner_create(NULL); grpc_combiner* combiner2 = grpc_combiner_create(NULL); grpc_closure c1; @@ -263,6 +310,7 @@ static void BM_ClosureSched2OnTwoCombiners(benchmark::State& state) { BENCHMARK(BM_ClosureSched2OnTwoCombiners); static void BM_ClosureSched4OnTwoCombiners(benchmark::State& state) { + TrackCounters track_counters(state); grpc_combiner* combiner1 = grpc_combiner_create(NULL); grpc_combiner* combiner2 = grpc_combiner_create(NULL); grpc_closure c1; @@ -323,6 +371,7 @@ class Rescheduler { }; static void BM_ClosureReschedOnExecCtx(benchmark::State& state) { + TrackCounters track_counters(state); grpc_exec_ctx exec_ctx = GRPC_EXEC_CTX_INIT; Rescheduler r(state, grpc_schedule_on_exec_ctx); r.ScheduleFirst(&exec_ctx); @@ -331,6 +380,7 @@ static void BM_ClosureReschedOnExecCtx(benchmark::State& state) { BENCHMARK(BM_ClosureReschedOnExecCtx); static void BM_ClosureReschedOnCombiner(benchmark::State& state) { + TrackCounters track_counters(state); grpc_exec_ctx exec_ctx = GRPC_EXEC_CTX_INIT; grpc_combiner* combiner = grpc_combiner_create(NULL); Rescheduler r(state, grpc_combiner_scheduler(combiner, false)); @@ -342,6 +392,7 @@ static void BM_ClosureReschedOnCombiner(benchmark::State& state) { BENCHMARK(BM_ClosureReschedOnCombiner); static void BM_ClosureReschedOnCombinerFinally(benchmark::State& state) { + TrackCounters track_counters(state); grpc_exec_ctx exec_ctx = GRPC_EXEC_CTX_INIT; grpc_combiner* combiner = grpc_combiner_create(NULL); Rescheduler r(state, grpc_combiner_finally_scheduler(combiner, false)); diff --git a/test/cpp/microbenchmarks/bm_fullstack.cc b/test/cpp/microbenchmarks/bm_fullstack.cc index c63de0ce0ab..5bb456ab468 100644 --- a/test/cpp/microbenchmarks/bm_fullstack.cc +++ b/test/cpp/microbenchmarks/bm_fullstack.cc @@ -99,8 +99,9 @@ static void ApplyCommonChannelArguments(ChannelArguments* c) { c->SetInt(GRPC_ARG_MAX_SEND_MESSAGE_LENGTH, INT_MAX); } -#ifdef GPR_MU_COUNTERS -extern "C" gpr_atm grpc_mu_locks; +#ifdef GPR_LOW_LEVEL_COUNTERS +extern "C" gpr_atm gpr_mu_locks; +extern "C" gpr_atm gpr_counter_rmw; #endif class BaseFixture { @@ -108,10 +109,14 @@ class BaseFixture { void Finish(benchmark::State& s) { std::ostringstream out; this->AddToLabel(out, s); -#ifdef GPR_MU_COUNTERS - out << " locks/iter:" << ((double)(gpr_atm_no_barrier_load(&grpc_mu_locks) - +#ifdef GPR_LOW_LEVEL_COUNTERS + out << " locks/iter:" << ((double)(gpr_atm_no_barrier_load(&gpr_mu_locks) - mu_locks_at_start_) / - (double)s.iterations()); + (double)s.iterations()) + << " atm_rmw/iter:" + << ((double)(gpr_atm_no_barrier_load(&gpr_counter_rmw) - + rmw_at_start_) / + (double)s.iterations()); #endif grpc_memory_counters counters_at_end = grpc_memory_counters_snapshot(); out << " allocs/iter:" @@ -128,8 +133,9 @@ class BaseFixture { virtual void AddToLabel(std::ostream& out, benchmark::State& s) = 0; private: -#ifdef GPR_MU_COUNTERS - const size_t mu_locks_at_start_ = gpr_atm_no_barrier_load(&grpc_mu_locks); +#ifdef GPR_LOW_LEVEL_COUNTERS + const size_t mu_locks_at_start_ = gpr_atm_no_barrier_load(&gpr_mu_locks); + const size_t rmw_at_start_ = gpr_atm_no_barrier_load(&gpr_counter_rmw); #endif grpc_memory_counters counters_at_start_ = grpc_memory_counters_snapshot(); }; From 3d826b9e0acf79466726c5ef188975cb3cec45e0 Mon Sep 17 00:00:00 2001 From: Craig Tiller Date: Tue, 21 Feb 2017 10:05:59 -0800 Subject: [PATCH 2/3] Fix script to track atomic rmws --- tools/profiling/microbenchmarks/bm2bq.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/profiling/microbenchmarks/bm2bq.py b/tools/profiling/microbenchmarks/bm2bq.py index a7d82269f55..62a2f699c7a 100755 --- a/tools/profiling/microbenchmarks/bm2bq.py +++ b/tools/profiling/microbenchmarks/bm2bq.py @@ -66,6 +66,7 @@ columns = [ ('cli_stream_stalls_per_iteration', 'float'), ('svr_transport_stalls_per_iteration', 'float'), ('svr_stream_stalls_per_iteration', 'float'), + ('atm_rmw_per_iteration', 'float') ] if sys.argv[1] == '--schema': @@ -158,7 +159,7 @@ def parse_name(name): for bm in js['benchmarks']: context = js['context'] if 'label' in bm: - labels_list = [s.split(':') for s in bm['label'].split(' ')] + labels_list = [s.split(':') for s in bm['label'].strip().split(' ')] for el in labels_list: el[0] = el[0].replace('/iter', '_per_iteration') labels = dict(labels_list) From 7f4d30a0321a042d4f8512815c1029cbaad6fac8 Mon Sep 17 00:00:00 2001 From: Craig Tiller Date: Tue, 21 Feb 2017 10:24:00 -0800 Subject: [PATCH 3/3] Separate CAS/ADD RMWs --- include/grpc/impl/codegen/atm_gcc_atomic.h | 23 +++++++++++++--------- src/core/lib/support/sync_posix.c | 3 ++- test/cpp/microbenchmarks/bm_closure.cc | 15 ++++++++++---- test/cpp/microbenchmarks/bm_fullstack.cc | 18 ++++++++++++----- tools/profiling/microbenchmarks/bm2bq.py | 3 ++- 5 files changed, 42 insertions(+), 20 deletions(-) diff --git a/include/grpc/impl/codegen/atm_gcc_atomic.h b/include/grpc/impl/codegen/atm_gcc_atomic.h index e5a623f723c..4bd3b257413 100644 --- a/include/grpc/impl/codegen/atm_gcc_atomic.h +++ b/include/grpc/impl/codegen/atm_gcc_atomic.h @@ -41,12 +41,17 @@ typedef intptr_t gpr_atm; #ifdef GPR_LOW_LEVEL_COUNTERS -extern gpr_atm gpr_counter_rmw; +extern gpr_atm gpr_counter_atm_cas; +extern gpr_atm gpr_counter_atm_add; #define GPR_ATM_INC_COUNTER(counter) \ __atomic_fetch_add(&counter, 1, __ATOMIC_RELAXED) -#define GPR_ATM_INC_RMW_THEN(blah) (GPR_ATM_INC_COUNTER(gpr_counter_rmw), blah) +#define GPR_ATM_INC_CAS_THEN(blah) \ + (GPR_ATM_INC_COUNTER(gpr_counter_atm_cas), blah) +#define GPR_ATM_INC_ADD_THEN(blah) \ + (GPR_ATM_INC_COUNTER(gpr_counter_atm_add), blah) #else -#define GPR_ATM_INC_RMW_THEN(blah) blah +#define GPR_ATM_INC_CAS_THEN(blah) blah +#define GPR_ATM_INC_ADD_THEN(blah) blah #endif #define gpr_atm_full_barrier() (__atomic_thread_fence(__ATOMIC_SEQ_CST)) @@ -59,28 +64,28 @@ extern gpr_atm gpr_counter_rmw; (__atomic_store_n((p), (intptr_t)(value), __ATOMIC_RELAXED)) #define gpr_atm_no_barrier_fetch_add(p, delta) \ - GPR_ATM_INC_RMW_THEN( \ + GPR_ATM_INC_ADD_THEN( \ __atomic_fetch_add((p), (intptr_t)(delta), __ATOMIC_RELAXED)) #define gpr_atm_full_fetch_add(p, delta) \ - GPR_ATM_INC_RMW_THEN( \ + GPR_ATM_INC_ADD_THEN( \ __atomic_fetch_add((p), (intptr_t)(delta), __ATOMIC_ACQ_REL)) static __inline int gpr_atm_no_barrier_cas(gpr_atm *p, gpr_atm o, gpr_atm n) { - return GPR_ATM_INC_RMW_THEN(__atomic_compare_exchange_n( + return GPR_ATM_INC_CAS_THEN(__atomic_compare_exchange_n( p, &o, n, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED)); } static __inline int gpr_atm_acq_cas(gpr_atm *p, gpr_atm o, gpr_atm n) { - return GPR_ATM_INC_RMW_THEN(__atomic_compare_exchange_n( + return GPR_ATM_INC_CAS_THEN(__atomic_compare_exchange_n( p, &o, n, 0, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)); } static __inline int gpr_atm_rel_cas(gpr_atm *p, gpr_atm o, gpr_atm n) { - return GPR_ATM_INC_RMW_THEN(__atomic_compare_exchange_n( + return GPR_ATM_INC_CAS_THEN(__atomic_compare_exchange_n( p, &o, n, 0, __ATOMIC_RELEASE, __ATOMIC_RELAXED)); } #define gpr_atm_full_xchg(p, n) \ - GPR_ATM_INC_RMW_THEN(__atomic_exchange_n((p), (n), __ATOMIC_ACQ_REL)) + GPR_ATM_INC_CAS_THEN(__atomic_exchange_n((p), (n), __ATOMIC_ACQ_REL)) #endif /* GRPC_IMPL_CODEGEN_ATM_GCC_ATOMIC_H */ diff --git a/src/core/lib/support/sync_posix.c b/src/core/lib/support/sync_posix.c index 3b7d7806082..16e7d6e12a5 100644 --- a/src/core/lib/support/sync_posix.c +++ b/src/core/lib/support/sync_posix.c @@ -44,7 +44,8 @@ #ifdef GPR_LOW_LEVEL_COUNTERS gpr_atm gpr_mu_locks = 0; -gpr_atm gpr_counter_rmw = 0; +gpr_atm gpr_counter_atm_cas = 0; +gpr_atm gpr_counter_atm_add = 0; #endif void gpr_mu_init(gpr_mu* mu) { GPR_ASSERT(pthread_mutex_init(mu, NULL) == 0); } diff --git a/test/cpp/microbenchmarks/bm_closure.cc b/test/cpp/microbenchmarks/bm_closure.cc index 16d05781bb4..03aede35b27 100644 --- a/test/cpp/microbenchmarks/bm_closure.cc +++ b/test/cpp/microbenchmarks/bm_closure.cc @@ -65,9 +65,13 @@ class TrackCounters { out << " locks/iter:" << ((double)(gpr_atm_no_barrier_load(&gpr_mu_locks) - mu_locks_at_start_) / (double)state_.iterations()) - << " atm_rmw/iter:" - << ((double)(gpr_atm_no_barrier_load(&gpr_counter_rmw) - - rmw_at_start_) / + << " atm_cas/iter:" + << ((double)(gpr_atm_no_barrier_load(&gpr_counter_atm_cas) - + atm_cas_at_start_) / + (double)state_.iterations()) + << " atm_add/iter:" + << ((double)(gpr_atm_no_barrier_load(&gpr_counter_atm_add) - + atm_add_at_start_) / (double)state_.iterations()); #endif state_.SetLabel(out.str()); @@ -77,7 +81,10 @@ class TrackCounters { benchmark::State& state_; #ifdef GPR_LOW_LEVEL_COUNTERS const size_t mu_locks_at_start_ = gpr_atm_no_barrier_load(&gpr_mu_locks); - const size_t rmw_at_start_ = gpr_atm_no_barrier_load(&gpr_counter_rmw); + const size_t atm_cas_at_start_ = + gpr_atm_no_barrier_load(&gpr_counter_atm_cas); + const size_t atm_add_at_start_ = + gpr_atm_no_barrier_load(&gpr_counter_atm_add); #endif }; diff --git a/test/cpp/microbenchmarks/bm_fullstack.cc b/test/cpp/microbenchmarks/bm_fullstack.cc index 5bb456ab468..48e131f1be0 100644 --- a/test/cpp/microbenchmarks/bm_fullstack.cc +++ b/test/cpp/microbenchmarks/bm_fullstack.cc @@ -101,7 +101,8 @@ static void ApplyCommonChannelArguments(ChannelArguments* c) { #ifdef GPR_LOW_LEVEL_COUNTERS extern "C" gpr_atm gpr_mu_locks; -extern "C" gpr_atm gpr_counter_rmw; +extern "C" gpr_atm gpr_counter_atm_cas; +extern "C" gpr_atm gpr_counter_atm_add; #endif class BaseFixture { @@ -113,9 +114,13 @@ class BaseFixture { out << " locks/iter:" << ((double)(gpr_atm_no_barrier_load(&gpr_mu_locks) - mu_locks_at_start_) / (double)s.iterations()) - << " atm_rmw/iter:" - << ((double)(gpr_atm_no_barrier_load(&gpr_counter_rmw) - - rmw_at_start_) / + << " atm_cas/iter:" + << ((double)(gpr_atm_no_barrier_load(&gpr_counter_atm_cas) - + atm_cas_at_start_) / + (double)s.iterations()) + << " atm_add/iter:" + << ((double)(gpr_atm_no_barrier_load(&gpr_counter_atm_add) - + atm_add_at_start_) / (double)s.iterations()); #endif grpc_memory_counters counters_at_end = grpc_memory_counters_snapshot(); @@ -135,7 +140,10 @@ class BaseFixture { private: #ifdef GPR_LOW_LEVEL_COUNTERS const size_t mu_locks_at_start_ = gpr_atm_no_barrier_load(&gpr_mu_locks); - const size_t rmw_at_start_ = gpr_atm_no_barrier_load(&gpr_counter_rmw); + const size_t atm_cas_at_start_ = + gpr_atm_no_barrier_load(&gpr_counter_atm_cas); + const size_t atm_add_at_start_ = + gpr_atm_no_barrier_load(&gpr_counter_atm_add); #endif grpc_memory_counters counters_at_start_ = grpc_memory_counters_snapshot(); }; diff --git a/tools/profiling/microbenchmarks/bm2bq.py b/tools/profiling/microbenchmarks/bm2bq.py index 62a2f699c7a..8ead4b44559 100755 --- a/tools/profiling/microbenchmarks/bm2bq.py +++ b/tools/profiling/microbenchmarks/bm2bq.py @@ -66,7 +66,8 @@ columns = [ ('cli_stream_stalls_per_iteration', 'float'), ('svr_transport_stalls_per_iteration', 'float'), ('svr_stream_stalls_per_iteration', 'float'), - ('atm_rmw_per_iteration', 'float') + ('atm_cas_per_iteration', 'float') + ('atm_add_per_iteration', 'float') ] if sys.argv[1] == '--schema':