From 911fa984bceed97525362f2b1ccea4363f4be1ab Mon Sep 17 00:00:00 2001 From: Derek Benson Date: Wed, 22 May 2024 09:16:02 -0700 Subject: [PATCH] Add benchmarks for Protobuf Rust that demonstrate the overhead of the Rust implementations. PiperOrigin-RevId: 636190227 --- rust/test/benchmarks/BUILD | 99 +++++++ rust/test/benchmarks/bench_data.proto | 12 + rust/test/benchmarks/benchmark_thunks.cc | 13 + rust/test/benchmarks/benchmarks.rs | 39 +++ rust/test/benchmarks/proto_benchmarks.rs | 136 +++++++++ .../benchmarks/rust_protobuf_benchmarks.cc | 259 ++++++++++++++++++ 6 files changed, 558 insertions(+) create mode 100644 rust/test/benchmarks/BUILD create mode 100644 rust/test/benchmarks/bench_data.proto create mode 100644 rust/test/benchmarks/benchmark_thunks.cc create mode 100644 rust/test/benchmarks/benchmarks.rs create mode 100644 rust/test/benchmarks/proto_benchmarks.rs create mode 100644 rust/test/benchmarks/rust_protobuf_benchmarks.cc diff --git a/rust/test/benchmarks/BUILD b/rust/test/benchmarks/BUILD new file mode 100644 index 0000000000..5fc92f97da --- /dev/null +++ b/rust/test/benchmarks/BUILD @@ -0,0 +1,99 @@ +# Benchmarks for Rust Protobuf. +load( + "@rules_rust//rust:defs.bzl", + "rust_library", +) +load("//bazel:cc_proto_library.bzl", "cc_proto_library") +load( + "//protos/bazel:upb_cc_proto_library.bzl", + "upb_cc_proto_library", +) +load( + "//rust:defs.bzl", + "rust_cc_proto_library", + "rust_upb_proto_library", +) + +proto_library( + name = "bench_data_proto", + srcs = ["bench_data.proto"], +) + +cc_proto_library( + name = "bench_data_cc_proto", + deps = [":bench_data_proto"], +) + +rust_cc_proto_library( + name = "bench_data_cpp_rust_proto", + deps = [":bench_data_proto"], +) + +rust_upb_proto_library( + name = "bench_data_upb_rust_proto", + deps = [":bench_data_proto"], +) + +upb_cc_proto_library( + name = "bench_data_upb_cc_proto", + deps = [":bench_data_proto"], +) + +cc_test( + name = "rust_protobuf_benchmarks", + testonly = True, + srcs = ["rust_protobuf_benchmarks.cc"], + deps = [ + ":bench_data_cc_proto", + ":bench_data_upb_cc_proto", + ":benchmarks", + ":proto_benchmarks_cpp", # build_cleaner: keep + ":proto_benchmarks_upb", # build_cleaner: keep + "//protos", + "//protos:repeated_field", + "//src/google/protobuf:protobuf_lite", + "//testing/base/public:gunit", + "//third_party/benchmark", + "@com_google_absl//absl/log:absl_check", + "@com_google_googletest//:gtest", + "@com_google_googletest//:gtest_main", + ], +) + +rust_library( + name = "proto_benchmarks_cpp", + srcs = ["proto_benchmarks.rs"], + proc_macro_deps = [ + "@crate_index//:paste", + ], + rustc_flags = [ + "--cfg=bench_cpp", + ], + deps = [":bench_data_cpp_rust_proto"], +) + +rust_library( + name = "proto_benchmarks_upb", + srcs = ["proto_benchmarks.rs"], + proc_macro_deps = [ + "@crate_index//:paste", + ], + rustc_flags = [ + "--cfg=bench_upb", + ], + deps = [":bench_data_upb_rust_proto"], +) + +rust_library( + name = "benchmarks", + srcs = ["benchmarks.rs"], + deps = [ + ":benchmark_thunks", #build_cleaner: keep + ], +) + +cc_library( + name = "benchmark_thunks", + srcs = ["benchmark_thunks.cc"], + deps = [":bench_data_cc_proto"], +) diff --git a/rust/test/benchmarks/bench_data.proto b/rust/test/benchmarks/bench_data.proto new file mode 100644 index 0000000000..6ab04d200d --- /dev/null +++ b/rust/test/benchmarks/bench_data.proto @@ -0,0 +1,12 @@ +syntax = "proto3"; + +package benchmarks; + +message BenchData { + string name = 1; + float num1 = 2; + int32 num2 = 3; + repeated BenchData subs = 4; + repeated int32 nums = 5; + BenchData recursive = 6; +} diff --git a/rust/test/benchmarks/benchmark_thunks.cc b/rust/test/benchmarks/benchmark_thunks.cc new file mode 100644 index 0000000000..e4815a22c3 --- /dev/null +++ b/rust/test/benchmarks/benchmark_thunks.cc @@ -0,0 +1,13 @@ +#include + +#include "google/protobuf/rust/test/benchmarks/bench_data.pb.h" + +using benchmarks::BenchData; + +extern "C" void benchmark_thunk_set_num2(void* proto, int32_t num) { + static_cast(proto)->set_num2(num); +} + +extern "C" void benchmark_thunk_add_num(void* proto, int32_t num) { + static_cast(proto)->add_nums(num); +} diff --git a/rust/test/benchmarks/benchmarks.rs b/rust/test/benchmarks/benchmarks.rs new file mode 100644 index 0000000000..7e7112bad9 --- /dev/null +++ b/rust/test/benchmarks/benchmarks.rs @@ -0,0 +1,39 @@ +// All benchmarks that don't need to be duplicated for cpp and upb should be +// defined here. + +use std::sync::OnceLock; + +#[no_mangle] +pub extern "C" fn extend_100_ints_vec_rs_bench() { + let data: Vec = std::hint::black_box((0..100).collect()); + assert!(data.len() == 100); +} + +static ONE_THOUSAND: OnceLock> = OnceLock::new(); +#[no_mangle] +pub extern "C" fn sum_1000_ints_vec_rs_bench() { + let source = ONE_THOUSAND.get_or_init(|| (0..1000).collect()); + let data: i32 = source.iter().sum(); + assert!(data == 499500); +} + +extern "C" { + fn benchmark_thunk_set_num2(proto: *mut std::ffi::c_void, num: i32); + fn benchmark_thunk_add_num(proto: *mut std::ffi::c_void, num: i32); +} + +/// # Safety +/// This function is safe because it just passes the arguments to the C +/// function. +#[no_mangle] +pub unsafe extern "C" fn benchmark_thunk_set_num2_rs(proto: *mut std::ffi::c_void, num: i32) { + benchmark_thunk_set_num2(proto, num); +} + +/// # Safety +/// This function is safe because it just passes the arguments to the C +/// function. +#[no_mangle] +pub unsafe extern "C" fn benchmark_thunk_add_num_rs(proto: *mut std::ffi::c_void, num: i32) { + benchmark_thunk_add_num(proto, num); +} diff --git a/rust/test/benchmarks/proto_benchmarks.rs b/rust/test/benchmarks/proto_benchmarks.rs new file mode 100644 index 0000000000..bd2bcf4603 --- /dev/null +++ b/rust/test/benchmarks/proto_benchmarks.rs @@ -0,0 +1,136 @@ +// All benchmarks are exported as C functions so that they can be called from +// the C++. The benchmarks are exported with the name of the benchmark +// followed by the name of the kernel and _bench. + +use bench_data_rust_proto::BenchData; +use paste::paste; +use std::sync::OnceLock; + +macro_rules! export_proto_bench { + ($name:ident, $protokernel:ident) => { + paste! { + #[no_mangle] + pub extern "C" fn [<$name _rs_ $protokernel _bench>] () { + $name(); + } + } + }; +} + +macro_rules! export_benches { + ($name:ident, $($rest:tt)*) => { + #[cfg(bench_upb)] + export_proto_bench!($name, upb); + #[cfg(bench_cpp)] + export_proto_bench!($name, cpp); + + export_benches!($($rest)*); + }; + ($name:ident) => { + #[cfg(bench_upb)] + export_proto_bench!($name, upb); + #[cfg(bench_cpp)] + export_proto_bench!($name, cpp); + }; +} + +export_benches!( + set_string, + set_int, + add_10_repeated_msg, + copy_from_10_repeated_msg, + extend_10_repeated_msg, + add_100_ints, + copy_from_100_ints, + extend_100_ints, + sum_1000_ints +); + +pub fn set_string() { + let mut data = BenchData::new(); + data.set_name("a relatively long string that will avoid any short string optimizations."); +} + +pub fn set_int() { + let mut data = BenchData::new(); + data.set_num2(123456789); + assert_eq!(data.num2(), 123456789); +} + +pub fn add_10_repeated_msg() { + let mut data = BenchData::new(); + let mut repeated = data.subs_mut(); + for i in 0..10 { + let mut new = BenchData::new(); + new.set_num2(i); + repeated.push(new.as_view()); + } +} + +static COPY_FROM_10: OnceLock = OnceLock::new(); + +fn create_10_repeated() -> BenchData { + let mut data = BenchData::new(); + let mut repeated = data.subs_mut(); + for i in 0..10 { + let mut new = BenchData::new(); + new.set_num2(i); + repeated.push(new.as_view()); + } + data +} + +pub fn copy_from_10_repeated_msg() { + let source = COPY_FROM_10.get_or_init(create_10_repeated); + + let mut data = BenchData::new(); + data.subs_mut().copy_from(source.subs()); +} + +pub fn extend_10_repeated_msg() { + let source = COPY_FROM_10.get_or_init(create_10_repeated); + let mut data = BenchData::new(); + data.subs_mut().extend(source.subs()); +} + +pub fn add_100_ints() { + let mut data = BenchData::new(); + let mut repeated = data.nums_mut(); + for i in 0..100 { + repeated.push(i); + } +} + +fn create_100_ints() -> BenchData { + let mut data = BenchData::new(); + data.nums_mut().extend(0..100); + data +} + +static COPY_FROM_100: OnceLock = OnceLock::new(); + +pub fn copy_from_100_ints() { + let source = COPY_FROM_100.get_or_init(create_100_ints); + + let mut data = BenchData::new(); + data.nums_mut().copy_from(source.nums()); +} + +pub fn extend_100_ints() { + let mut data = BenchData::new(); + let mut repeated = data.nums_mut(); + repeated.extend(0..100); + assert!(repeated.len() == 100); +} + +static ONE_THOUSAND: OnceLock = OnceLock::new(); +pub fn sum_1000_ints() { + let source = ONE_THOUSAND.get_or_init(|| { + let mut data = BenchData::new(); + data.nums_mut().extend(0..1000); + data + }); + + let sum: i32 = source.nums().iter().sum(); + assert!(sum == 499500); +} diff --git a/rust/test/benchmarks/rust_protobuf_benchmarks.cc b/rust/test/benchmarks/rust_protobuf_benchmarks.cc new file mode 100644 index 0000000000..e8c882419f --- /dev/null +++ b/rust/test/benchmarks/rust_protobuf_benchmarks.cc @@ -0,0 +1,259 @@ +#include +#include +#include +#include + +#include +#include "absl/log/absl_check.h" +#include "google/protobuf/repeated_ptr_field.h" +#include "google/protobuf/rust/test/benchmarks/bench_data.pb.h" +#include "google/protobuf/rust/test/benchmarks/bench_data.upb.proto.h" +#include "protos/protos.h" + +using benchmarks::BenchData; + +#define EXTERN_BENCHMARK(NAME) \ + extern "C" { \ + void NAME##_bench(); \ + } \ + void BM_##NAME(benchmark::State& state) { \ + for (auto s : state) { \ + NAME##_bench(); \ + } \ + } \ + BENCHMARK(BM_##NAME); + +#define PROTO_BENCHMARK(NAME) \ + EXTERN_BENCHMARK(NAME##_cpp); \ + EXTERN_BENCHMARK(NAME##_upb); + +void BM_set_string_cpp(benchmark::State& state) { + for (auto s : state) { + auto data = std::make_unique(); + data->set_name( + "a relatively long string that will avoid any short string " + "optimizations."); + } +} + +BENCHMARK(BM_set_string_cpp); + +PROTO_BENCHMARK(set_string_rs); + +void BM_set_int_cpp(benchmark::State& state) { + for (auto s : state) { + auto data = std::make_unique(); + data->set_num2(123456789); + ABSL_CHECK_EQ(data->num2(), 123456789); + } +} + +BENCHMARK(BM_set_int_cpp); + +PROTO_BENCHMARK(set_int_rs); + +extern "C" { +void benchmark_thunk_set_num2_rs(void* data, int32_t num2); +} + +void BM_set_int_cpp_roundtrip(benchmark::State& state) { + for (auto s : state) { + auto data = std::make_unique(); + benchmark_thunk_set_num2_rs((void*)data.get(), 123456789); + ABSL_CHECK_EQ(data->num2(), 123456789); + } +} + +BENCHMARK(BM_set_int_cpp_roundtrip); + +void BM_add_10_repeated_msg_copy_cpp(benchmark::State& state) { + for (auto s : state) { + auto data = std::make_unique(); + for (int i = 0; i < 10; ++i) { + auto sub_data = std::make_unique(); + sub_data->set_num2(i); + *data->add_subs() = *sub_data; + } + } +} + +BENCHMARK(BM_add_10_repeated_msg_copy_cpp); + +PROTO_BENCHMARK(add_10_repeated_msg_rs); + +void BM_add_10_repeated_msg_direct_cpp(benchmark::State& state) { + for (auto s : state) { + auto data = std::make_unique(); + for (int i = 0; i < 10; ++i) { + auto sub_data = data->add_subs(); + sub_data->set_num2(i); + } + } +} + +BENCHMARK(BM_add_10_repeated_msg_direct_cpp); + +void BM_copy_from_10_repeated_msg_cpp(benchmark::State& state) { + auto source = std::make_unique(); + for (int i = 0; i < 10; ++i) { + auto sub_data = source->add_subs(); + sub_data->set_num2(i); + } + for (auto s : state) { + benchmarks::BenchData data; + *data.mutable_subs() = source->subs(); + } +} + +BENCHMARK(BM_copy_from_10_repeated_msg_cpp); + +void BM_back_inserter_from_10_repeated_msg_cpp(benchmark::State& state) { + auto source = std::make_unique(); + for (int i = 0; i < 10; ++i) { + auto sub_data = source->add_subs(); + sub_data->set_num2(i); + } + for (auto s : state) { + benchmarks::BenchData data; + std::copy(source->subs().begin(), source->subs().end(), + google::protobuf::RepeatedFieldBackInserter(data.mutable_subs())); + } +} + +BENCHMARK(BM_back_inserter_from_10_repeated_msg_cpp); + +PROTO_BENCHMARK(copy_from_10_repeated_msg_rs); + +PROTO_BENCHMARK(extend_10_repeated_msg_rs); + +void BM_add_100_ints_cpp(benchmark::State& state) { + for (auto s : state) { + auto data = std::make_unique(); + for (int i = 0; i < 100; ++i) { + data->mutable_nums()->Add(i); + } + } +} + +BENCHMARK(BM_add_100_ints_cpp); + +void BM_add_100_ints_upb(benchmark::State& state) { + for (auto s : state) { + ::protos::Arena arena; + auto data = ::protos::CreateMessage(arena); + for (int i = 0; i < 100; ++i) { + data.add_nums(i); + } + } +} + +BENCHMARK(BM_add_100_ints_upb); + +PROTO_BENCHMARK(add_100_ints_rs); + +extern "C" { +void benchmark_thunk_add_num_rs(void* data, int32_t num); +} + +void BM_add_100_ints_rs_roundtrip(benchmark::State& state) { + for (auto s : state) { + auto data = std::make_unique(); + for (int i = 0; i < 100; ++i) { + benchmark_thunk_add_num_rs((void*)data.get(), i); + } + } +} + +BENCHMARK(BM_add_100_ints_rs_roundtrip); + +void BM_copy_from_100_ints_cpp(benchmark::State& state) { + auto source = std::make_unique(); + for (int i = 0; i < 100; ++i) { + source->add_nums(i); + } + for (auto s : state) { + auto data = std::make_unique(); + *data->mutable_nums() = source->nums(); + ABSL_CHECK_EQ(data->nums()[99], 99); + } +} + +BENCHMARK(BM_copy_from_100_ints_cpp); + +void BM_copy_from_100_ints_upb(benchmark::State& state) { + ::protos::Arena arena; + auto source = ::protos::CreateMessage(arena); + for (int i = 0; i < 100; ++i) { + source.add_nums(i); + } + for (auto s : state) { + auto data = ::protos::CreateMessage(arena); + data.resize_nums(source.nums_size()); + std::copy(source.nums().begin(), source.nums().end(), + data.mutable_nums()->begin()); + ABSL_CHECK_EQ(data.nums()[99], 99); + } +} + +BENCHMARK(BM_copy_from_100_ints_upb); + +PROTO_BENCHMARK(copy_from_100_ints_rs); + +PROTO_BENCHMARK(extend_100_ints_rs); + +EXTERN_BENCHMARK(extend_100_ints_vec_rs); + +PROTO_BENCHMARK(sum_1000_ints_rs); + +EXTERN_BENCHMARK(sum_1000_ints_vec_rs); + +void BM_sum_1000_ints_cpp(benchmark::State& state) { + auto source = std::make_unique(); + for (int i = 0; i < 1000; ++i) { + source->add_nums(i); + } + + for (auto s : state) { + int sum = 0; + for (auto x : source->nums()) { + sum += x; + } + ABSL_CHECK_EQ(sum, 499500); + } +} + +BENCHMARK(BM_sum_1000_ints_cpp); + +void BM_sum_1000_ints_upb(benchmark::State& state) { + ::protos::Arena arena; + auto data = ::protos::CreateMessage(arena); + for (int i = 0; i < 1000; ++i) { + data.add_nums(i); + } + for (auto s : state) { + int sum = 0; + for (auto x : data.nums()) { + sum += x; + } + ABSL_CHECK_EQ(sum, 499500); + } +} + +BENCHMARK(BM_sum_1000_ints_upb); + +void BM_sum_1000_ints_vector(benchmark::State& state) { + std::vector nums; + nums.reserve(1000); + for (int i = 0; i < 1000; ++i) { + nums.push_back(i); + } + for (auto s : state) { + int sum = 0; + for (auto x : nums) { + sum += x; + } + ABSL_CHECK_EQ(sum, 499500); + } +} + +BENCHMARK(BM_sum_1000_ints_vector);