From 0b13caed53e31bf5a6cd66670fac4ccdba2c8fec Mon Sep 17 00:00:00 2001 From: AJ Heller Date: Wed, 16 Oct 2024 14:21:54 -0700 Subject: [PATCH] [benchmark] Measure all threads' CPU usage in callback benchmarks (#37936) google/benchmark only measures the main thread's CPU by default, and benchmark iteration depends on CPU utilization. These benchmarks were taking a long time since the callback API uses a lot of internal threads, and the benchmarks weren't producing useful metrics. See the documentation here https://google.github.io/benchmark/user_guide.html#cpu-timers Test command for master: `bazel test --config=opt --test_output=streamed //test/cpp/microbenchmarks:bm_callback_streaming_ping_pong --test_arg='--benchmark_min_time=0.05s' --test_arg='--benchmark_min_warmup_time=0.01' --test_arg='--benchmark_filter=.*/0/1 38932 ns 1052 ns 10000 bytes_per_second=0/s BM_CallbackBidiStreaming/1/1 39002 ns 0.000 ns 10000 bytes_per_second=inf/s BM_CallbackBidiStreaming/8/1 39457 ns 0.000 ns 10000 bytes_per_second=inf/s BM_CallbackBidiStreaming/64/1 51693 ns 0.000 ns 10000 bytes_per_second=inf/s BM_CallbackBidiStreaming/512/1 41174 ns 0.000 ns 10000 bytes_per_second=inf/s BM_CallbackBidiStreaming/4096/1 45006 ns 0.000 ns 10000 bytes_per_second=inf/s BM_CallbackBidiStreaming/32768/1 70990 ns 0.000 ns 10000 bytes_per_second=inf/s BM_CallbackBidiStreaming/262144/1 302831 ns 0.000 ns 1000 bytes_per_second=inf/s BM_CallbackBidiStreaming/2097152/1 2202385 ns 0.000 ns 1000 bytes_per_second=inf/s BM_CallbackBidiStreaming/16777216/1 30137348 ns 0.000 ns 10 bytes_per_second=inf/s BM_CallbackBidiStreaming/134217728/1 437821865 ns 0.000 ns 1 bytes_per_second=inf/s ... //test/cpp/microbenchmarks:bm_callback_streaming_ping_pong PASSED in 25.7s ``` After this change: ``` -------------------------------------------------------------------------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations UserCounters... -------------------------------------------------------------------------------------------------------------------------------------------------------------- BM_CallbackBidiStreaming/0/1/process_time/real_time 38233 ns 75471 ns 1768 bytes_per_second=0/s BM_CallbackBidiStreaming/1/1/process_time/real_time 38847 ns 76616 ns 1781 bytes_per_second=50.2778Ki/s BM_CallbackBidiStreaming/8/1/process_time/real_time 38848 ns 76929 ns 1758 bytes_per_second=402.21Ki/s BM_CallbackBidiStreaming/64/1/process_time/real_time 49873 ns 95929 ns 1685 bytes_per_second=2.44761Mi/s BM_CallbackBidiStreaming/512/1/process_time/real_time 41606 ns 80703 ns 1431 bytes_per_second=23.4718Mi/s BM_CallbackBidiStreaming/4096/1/process_time/real_time 45495 ns 86097 ns 1335 bytes_per_second=171.722Mi/s BM_CallbackBidiStreaming/32768/1/process_time/real_time 71506 ns 117093 ns 806 bytes_per_second=874.048Mi/s BM_CallbackBidiStreaming/262144/1/process_time/real_time 353012 ns 433389 ns 183 bytes_per_second=1.38319Gi/s BM_CallbackBidiStreaming/2097152/1/process_time/real_time 3059811 ns 3215404 ns 18 bytes_per_second=1.27663Gi/s BM_CallbackBidiStreaming/16777216/1/process_time/real_time 34951210 ns 35279138 ns 2 bytes_per_second=915.562Mi/s BM_CallbackBidiStreaming/134217728/1/process_time/real_time 443034887 ns 444528037 ns 1 bytes_per_second=577.833Mi/s ... //test/cpp/microbenchmarks:bm_callback_streaming_ping_pong PASSED in 5.1s ``` Closes #37936 COPYBARA_INTEGRATE_REVIEW=https://github.com/grpc/grpc/pull/37936 from drfloob:bm-callback-measures-all-thread-cpu d5ba8420bd00e390209c49dc91ef08be9f31840e PiperOrigin-RevId: 686640919 --- test/cpp/microbenchmarks/bm_callback_streaming_ping_pong.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/test/cpp/microbenchmarks/bm_callback_streaming_ping_pong.cc b/test/cpp/microbenchmarks/bm_callback_streaming_ping_pong.cc index 2188ae49333..e7f7fef1964 100644 --- a/test/cpp/microbenchmarks/bm_callback_streaming_ping_pong.cc +++ b/test/cpp/microbenchmarks/bm_callback_streaming_ping_pong.cc @@ -47,6 +47,7 @@ static void StreamingPingPongArgs(benchmark::internal::Benchmark* b) { msg_size == 0 ? msg_size++ : msg_size *= 8) { b->Args({msg_size, 1}); b->Args({msg_size, 2}); + b->MeasureProcessCPUTime()->UseRealTime(); } }