From 03687b798b463dad0a6cdb3bc4cb840b968d00ed Mon Sep 17 00:00:00 2001 From: Jakob Buchgraber Date: Wed, 6 Mar 2024 06:19:44 -0800 Subject: [PATCH] #rust #protobuf Implement custom Debug for C++ kernel This change implements a custom Debug for messages, views and muts in the C++ kernel. Debug defers to proto2::Utf8Format. It implements this only for the C++ kernel. We will need to pull in additional dependencies beyond minitables to implement it for UPB as well. This will be done at a later point. PiperOrigin-RevId: 613191236 --- rust/cpp.rs | 47 ++++++++++++++++++++ rust/cpp_kernel/cpp_api.cc | 32 ++++++++++++- rust/cpp_kernel/cpp_api.h | 20 +++++++++ rust/test/cpp/BUILD | 28 ++++++++++++ rust/test/cpp/debug.proto | 8 ++++ rust/test/cpp/debug_test.rs | 12 +++++ src/google/protobuf/compiler/rust/message.cc | 46 +++++++++++++++++-- 7 files changed, 188 insertions(+), 5 deletions(-) create mode 100644 rust/test/cpp/debug.proto create mode 100644 rust/test/cpp/debug_test.rs diff --git a/rust/cpp.rs b/rust/cpp.rs index 7ca34bf577..0712925411 100644 --- a/rust/cpp.rs +++ b/rust/cpp.rs @@ -151,6 +151,47 @@ impl SettableValue<[u8]> for SerializedData { } } +/// A type to transfer an owned Rust string across the FFI boundary: +/// * This struct is ABI-compatible with the equivalent C struct. +/// * It owns its data but does not drop it. Immediately turn it into a +/// `String` by calling `.into()` on it. +/// * `.data` points to a valid UTF-8 string that has been allocated with the +/// Rust allocator and is 1-byte aligned. +/// * `.data` contains exactly `.len` bytes. +/// * The empty string is represented as `.data.is_null() == true`. +#[repr(C)] +pub struct RustStringRawParts { + data: *const u8, + len: usize, +} + +impl From for String { + fn from(value: RustStringRawParts) -> Self { + if value.data.is_null() { + // Handle the case where the string is empty. + return String::new(); + } + // SAFETY: + // - `value.data` contains valid UTF-8 bytes as promised by + // `RustStringRawParts`. + // - `value.data` has been allocated with the Rust allocator and is 1-byte + // aligned as promised by `RustStringRawParts`. + // - `value.data` contains and is allocated for exactly `value.len` bytes. + unsafe { String::from_raw_parts(value.data as *mut u8, value.len, value.len) } + } +} + +extern "C" { + fn utf8_debug_string(msg: RawMessage) -> RustStringRawParts; +} + +pub fn debug_string(_private: Private, msg: RawMessage, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // SAFETY: + // - `msg` is a valid protobuf message. + let dbg_str: String = unsafe { utf8_debug_string(msg) }.into(); + write!(f, "{dbg_str}") +} + pub type MessagePresentMutData<'msg, T> = crate::vtable::RawVTableOptionalMutatorData<'msg, T>; pub type MessageAbsentMutData<'msg, T> = crate::vtable::RawVTableOptionalMutatorData<'msg, T>; pub type BytesPresentMutData<'msg> = crate::vtable::RawVTableOptionalMutatorData<'msg, [u8]>; @@ -676,4 +717,10 @@ mod tests { let serialized_data = SerializedData { data: NonNull::new(ptr).unwrap(), len }; assert_that!(&*serialized_data, eq(b"Hello world")); } + + #[test] + fn test_empty_string() { + let empty_str: String = RustStringRawParts { data: std::ptr::null(), len: 0 }.into(); + assert_that!(empty_str, eq("")); + } } diff --git a/rust/cpp_kernel/cpp_api.cc b/rust/cpp_kernel/cpp_api.cc index c41e04f565..f37a1c7e5a 100644 --- a/rust/cpp_kernel/cpp_api.cc +++ b/rust/cpp_kernel/cpp_api.cc @@ -2,14 +2,15 @@ #include #include +#include #include #include "google/protobuf/map.h" +#include "google/protobuf/message.h" #include "google/protobuf/repeated_field.h" #include "google/protobuf/repeated_ptr_field.h" extern "C" { - #define expose_repeated_field_methods(ty, rust_ty) \ google::protobuf::RepeatedField* __pb_rust_RepeatedField_##rust_ty##_new() { \ return new google::protobuf::RepeatedField(); \ @@ -123,4 +124,33 @@ __PB_RUST_EXPOSE_SCALAR_MAP_METHODS_FOR_VALUE_TYPE( std::string, ProtoStr, google::protobuf::rust_internal::PtrAndLen, std::string(value.ptr, value.len), google::protobuf::rust_internal::PtrAndLen(cpp_value.data(), cpp_value.size())); + +#undef expose_scalar_map_methods +#undef expose_map_methods + +google::protobuf::rust_internal::RustStringRawParts utf8_debug_string( + const google::protobuf::Message* msg) { + std::string text = google::protobuf::Utf8Format(*msg); + return google::protobuf::rust_internal::RustStringRawParts(text); } +} + +namespace google { +namespace protobuf { +namespace rust_internal { + +RustStringRawParts::RustStringRawParts(std::string src) { + if (src.empty()) { + data = nullptr; + len = 0; + } else { + void* data_ = google::protobuf::rust_internal::__pb_rust_alloc(src.length(), 1); + std::memcpy(data_, src.data(), src.length()); + data = static_cast(data_); + len = src.length(); + } +} + +} // namespace rust_internal +} // namespace protobuf +} // namespace google diff --git a/rust/cpp_kernel/cpp_api.h b/rust/cpp_kernel/cpp_api.h index 5ef9317f98..409d169b31 100644 --- a/rust/cpp_kernel/cpp_api.h +++ b/rust/cpp_kernel/cpp_api.h @@ -11,6 +11,8 @@ #define GOOGLE_PROTOBUF_RUST_CPP_KERNEL_CPP_H__ #include +#include +#include #include "google/protobuf/message.h" @@ -144,6 +146,24 @@ struct PtrAndLen { google::protobuf::rust_internal::PtrAndLen(cpp_key.data(), cpp_key.size()), \ value_ty, rust_value_ty, ffi_value_ty, to_cpp_value, to_ffi_value); +// Represents an owned string for FFI purposes. +// +// This must only be used to transfer a string from C++ to Rust. The +// below invariants must hold: +// * Rust and C++ versions of this struct are ABI compatible. +// * The data were allocated using the Rust allocator and are 1 byte aligned. +// * The data is valid UTF-8. +struct RustStringRawParts { + // Owns the memory. + const char* data; + size_t len; + + RustStringRawParts() = delete; + // Copies src. + explicit RustStringRawParts(std::string src); +}; + +extern "C" RustStringRawParts utf8_debug_string(const google::protobuf::Message* msg); } // namespace rust_internal } // namespace protobuf } // namespace google diff --git a/rust/test/cpp/BUILD b/rust/test/cpp/BUILD index 91782bf48b..feaed8d728 100644 --- a/rust/test/cpp/BUILD +++ b/rust/test/cpp/BUILD @@ -9,3 +9,31 @@ # To do that use: # * `rust_cc_proto_library` instead of `rust_proto_library`. # * `//rust:protobuf_cpp` instead of `//rust:protobuf``. + +load("@rules_rust//rust:defs.bzl", "rust_test") +load( + "//rust:defs.bzl", + "rust_cc_proto_library", +) + +proto_library( + name = "debug_proto", + testonly = True, + srcs = ["debug.proto"], +) + +rust_cc_proto_library( + name = "debug_rust_proto", + testonly = True, + deps = [":debug_proto"], +) + +rust_test( + name = "debug_test", + srcs = ["debug_test.rs"], + deps = [ + ":debug_rust_proto", + "//rust:protobuf_cpp", + "@crate_index//:googletest", + ], +) diff --git a/rust/test/cpp/debug.proto b/rust/test/cpp/debug.proto new file mode 100644 index 0000000000..5317c9d206 --- /dev/null +++ b/rust/test/cpp/debug.proto @@ -0,0 +1,8 @@ +syntax = "proto3"; + +package debug; + +message DebugMsg { + int32 id = 1; + string secret_user_data = 2 [debug_redact = true]; +} diff --git a/rust/test/cpp/debug_test.rs b/rust/test/cpp/debug_test.rs new file mode 100644 index 0000000000..d6feec36bc --- /dev/null +++ b/rust/test/cpp/debug_test.rs @@ -0,0 +1,12 @@ +use debug_proto::DebugMsg; +use googletest::prelude::*; + +#[test] +fn test_debug() { + let mut msg = DebugMsg::new(); + msg.set_id(1); + msg.secret_user_data_mut().set("password"); + + assert_that!(format!("{msg:?}"), contains_substring("id: 1")); + assert_that!(format!("{msg:?}"), not(contains_substring("password"))); +} diff --git a/src/google/protobuf/compiler/rust/message.cc b/src/google/protobuf/compiler/rust/message.cc index 6cd69b9320..bfa1e3486a 100644 --- a/src/google/protobuf/compiler/rust/message.cc +++ b/src/google/protobuf/compiler/rust/message.cc @@ -124,6 +124,28 @@ void MessageDeserialize(Context& ctx, const Descriptor& msg) { ABSL_LOG(FATAL) << "unreachable"; } +void MessageDebug(Context& ctx, const Descriptor& msg) { + switch (ctx.opts().kernel) { + case Kernel::kCpp: + ctx.Emit({}, + R"rs( + $pbr$::debug_string($pbi$::Private, self.raw_msg(), f) + )rs"); + return; + + case Kernel::kUpb: + ctx.Emit({}, + R"rs( + f.debug_struct(std::any::type_name::()) + .field("raw_msg", &self.raw_msg()) + .finish() + )rs"); + return; + } + + ABSL_LOG(FATAL) << "unreachable"; +} + void MessageExterns(Context& ctx, const Descriptor& msg) { switch (ctx.opts().kernel) { case Kernel::kCpp: @@ -672,6 +694,7 @@ void GenerateRs(Context& ctx, const Descriptor& msg) { {"Msg::serialize", [&] { MessageSerialize(ctx, msg); }}, {"Msg::deserialize", [&] { MessageDeserialize(ctx, msg); }}, {"Msg::drop", [&] { MessageDrop(ctx, msg); }}, + {"Msg::debug", [&] { MessageDebug(ctx, msg); }}, {"Msg_externs", [&] { MessageExterns(ctx, msg); }}, {"accessor_fns", [&] { @@ -792,12 +815,16 @@ void GenerateRs(Context& ctx, const Descriptor& msg) { }}}, R"rs( #[allow(non_camel_case_types)] - //~ TODO: Implement support for debug redaction - #[derive(Debug)] pub struct $Msg$ { inner: $pbr$::MessageInner } + impl std::fmt::Debug for $Msg$ { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + $Msg::debug$ + } + } + // SAFETY: // - `$Msg$` is `Sync` because it does not implement interior mutability. // Neither does `$Msg$Mut`. @@ -813,13 +840,19 @@ void GenerateRs(Context& ctx, const Descriptor& msg) { type Mut<'msg> = $Msg$Mut<'msg>; } - #[derive(Debug, Copy, Clone)] + #[derive(Copy, Clone)] #[allow(dead_code)] pub struct $Msg$View<'msg> { msg: $pbi$::RawMessage, _phantom: $Phantom$<&'msg ()>, } + impl std::fmt::Debug for $Msg$View<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + $Msg::debug$ + } + } + #[allow(dead_code)] impl<'msg> $Msg$View<'msg> { #[doc(hidden)] @@ -931,13 +964,18 @@ void GenerateRs(Context& ctx, const Descriptor& msg) { $repeated_impl$ $map_value_impl$ - #[derive(Debug)] #[allow(dead_code)] #[allow(non_camel_case_types)] pub struct $Msg$Mut<'msg> { inner: $pbr$::MutatorMessageRef<'msg>, } + impl std::fmt::Debug for $Msg$Mut<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + $Msg::debug$ + } + } + #[allow(dead_code)] impl<'msg> $Msg$Mut<'msg> { #[doc(hidden)]