Implement ctype=CORD for singular string & byte fields

- We introduce two new view types ProtoStringCow and ProtoBytesCow.
 - In UPB, for cord field accessors we always return a Cow::Borrowed.
 - In C++, for coed field accessors we check if the underlying absl::Cord is flat (contigous) and if so return a Cow::Borrowed. If it's not flat we copy the data to a ProtoString and return a Cow::Owned.
   - We expect the absl::Cord to be flat almost all the time. We have experimentally verified that for small strings (<4 KiB) and less than 6 appends the cord is in fact flat [1].
 - This change lifts the requirement of all ViewProxy types to be Copy. Our Cow types cannot be Copy because the owned types aren't copy.

[1] https://source.corp.google.com/piper///depot/google3/experimental/users/buchgr/cords/cords.cc

PiperOrigin-RevId: 655485943
pull/17574/head
Jakob Buchgraber 8 months ago committed by Copybara-Service
parent cdb723815b
commit 8cdc700b5b
  1. 1
      rust/BUILD
  2. 117
      rust/cord.rs
  3. 6
      rust/cpp.rs
  4. 2
      rust/proxied.rs
  5. 2
      rust/shared.rs
  6. 42
      rust/string.rs
  7. 26
      rust/test/shared/BUILD
  8. 38
      rust/test/shared/ctype_cord_test.rs
  9. 8
      rust/test/upb/string_ctypes_test.rs
  10. 1
      src/google/protobuf/compiler/rust/accessors/BUILD.bazel
  11. 7
      src/google/protobuf/compiler/rust/accessors/accessors.cc
  12. 9
      src/google/protobuf/compiler/rust/accessors/generator.h
  13. 288
      src/google/protobuf/compiler/rust/accessors/singular_cord.cc

@ -52,6 +52,7 @@ PROTOBUF_SHARED = [
"internal.rs",
"primitive.rs",
"optional.rs",
"cord.rs",
"proxied.rs",
"repeated.rs",
"shared.rs",

@ -0,0 +1,117 @@
// Protocol Buffers - Google's data interchange format
// Copyright 2024 Google LLC. All rights reserved.
//
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file or at
// https://developers.google.com/open-source/licenses/bsd
use crate::__internal::Private;
use crate::{
AsView, IntoProxied, IntoView, ProtoBytes, ProtoStr, ProtoString, Proxied, Proxy, View,
ViewProxy,
};
use paste::paste;
use std::cmp::PartialEq;
use std::ops::Deref;
macro_rules! impl_cord_types {
($($t:ty, $vt:ty);*) => {
paste! { $(
#[derive(Debug)]
pub struct [< $t Cord>];
#[derive(Debug)]
pub enum [< $t Cow>]<'a> {
Borrowed(View<'a, $t>),
Owned($t),
}
impl Proxied for [< $t Cord>] {
type View<'msg> = [< $t Cow>]<'msg>;
}
impl<'msg> Proxy<'msg> for [< $t Cow>]<'msg> {}
impl<'msg> ViewProxy<'msg> for [< $t Cow>]<'msg> {}
impl<'msg> AsView for [< $t Cow>]<'msg> {
type Proxied = [< $t Cord>];
fn as_view(&self) -> [< $t Cow>]<'_> {
match self {
[< $t Cow>]::Owned(owned) => [< $t Cow>]::Borrowed((*owned).as_view()),
[< $t Cow>]::Borrowed(borrowed) => [< $t Cow>]::Borrowed(borrowed),
}
}
}
impl<'msg> IntoView<'msg> for [< $t Cow>]<'msg> {
fn into_view<'shorter>(self) -> [< $t Cow>]<'shorter>
where
'msg: 'shorter, {
match self {
[< $t Cow>]::Owned(owned) => [< $t Cow>]::Owned(owned),
[< $t Cow>]::Borrowed(borrow) => [< $t Cow>]::Borrowed(borrow.into_view()),
}
}
}
impl IntoProxied<$t> for [< $t Cow>]<'_> {
fn into_proxied(self, _private: Private) -> $t {
match self {
[< $t Cow>]::Owned(owned) => owned,
[< $t Cow>]::Borrowed(borrowed) => borrowed.into_proxied(Private),
}
}
}
impl<'a> Deref for [< $t Cow>]<'a> {
type Target = $vt;
fn deref(&self) -> View<'_, $t> {
match self {
[< $t Cow>]::Borrowed(borrow) => borrow,
[< $t Cow>]::Owned(owned) => (*owned).as_view(),
}
}
}
impl AsRef<[u8]> for [< $t Cow>]<'_> {
fn as_ref(&self) -> &[u8] {
match self {
[< $t Cow>]::Borrowed(borrow) => borrow.as_ref(),
[< $t Cow>]::Owned(owned) => owned.as_ref(),
}
}
}
)*
}
}
}
impl_cord_types!(
ProtoString, ProtoStr;
ProtoBytes, [u8]
);
macro_rules! impl_eq {
($($t1:ty, $t2:ty);*) => {
paste! { $(
impl PartialEq<$t1> for $t2 {
fn eq(&self, rhs: &$t1) -> bool {
AsRef::<[u8]>::as_ref(self) == AsRef::<[u8]>::as_ref(rhs)
}
}
)*
}
}
}
impl_eq!(
ProtoStringCow<'_>, ProtoStringCow<'_>;
str, ProtoStringCow<'_>;
ProtoStringCow<'_>, str;
ProtoBytesCow<'_>, ProtoBytesCow<'_>;
[u8], ProtoBytesCow<'_>;
ProtoBytesCow<'_>, [u8]
);

@ -114,6 +114,12 @@ impl InnerProtoString {
let s = ManuallyDrop::new(self);
s.owned_ptr
}
/// # Safety
/// - `src` points to a valid CppStdString.
pub unsafe fn from_raw(_private: Private, src: CppStdString) -> InnerProtoString {
InnerProtoString { owned_ptr: src }
}
}
impl From<&[u8]> for InnerProtoString {

@ -209,7 +209,7 @@ pub trait IntoMut<'msg>: AsMut {
pub trait Proxy<'msg>: 'msg + IntoView<'msg> + Sync + Unpin + Sized + Debug {}
/// Declares conversion operations common to view proxies.
pub trait ViewProxy<'msg>: Proxy<'msg> + Copy + Send {}
pub trait ViewProxy<'msg>: Proxy<'msg> + Send {}
/// Declares operations common to all mut proxies.
///

@ -28,6 +28,7 @@ pub mod __public {
write::{Clear, ClearAndParse},
Message, MessageMut, MessageView,
};
pub use crate::cord::{ProtoBytesCow, ProtoStringCow};
pub use crate::r#enum::{Enum, UnknownEnumValue};
pub use crate::map::{Map, MapIter, MapMut, MapView, ProxiedInMapValue};
pub use crate::optional::Optional;
@ -61,6 +62,7 @@ pub mod __runtime;
pub mod __runtime;
mod codegen_traits;
mod cord;
#[path = "enum.rs"]
mod r#enum;
mod map;

@ -39,6 +39,15 @@ impl ProtoBytes {
pub fn into_inner(self, _private: Private) -> InnerProtoString {
self.inner
}
#[doc(hidden)]
pub fn from_inner(_private: Private, inner: InnerProtoString) -> ProtoBytes {
Self { inner }
}
pub fn as_view(&self) -> &[u8] {
self.inner.as_bytes()
}
}
impl AsRef<[u8]> for ProtoBytes {
@ -177,6 +186,10 @@ pub struct ProtoString {
}
impl ProtoString {
pub fn as_view(&self) -> &ProtoStr {
unsafe { ProtoStr::from_utf8_unchecked(self.as_bytes()) }
}
pub fn as_bytes(&self) -> &[u8] {
self.inner.as_bytes()
}
@ -187,6 +200,17 @@ impl ProtoString {
pub fn into_inner(self, _private: Private) -> InnerProtoString {
self.inner
}
#[doc(hidden)]
pub fn from_inner(_private: Private, inner: InnerProtoString) -> ProtoString {
Self { inner }
}
}
impl AsRef<[u8]> for ProtoString {
fn as_ref(&self) -> &[u8] {
self.inner.as_bytes()
}
}
impl From<ProtoString> for ProtoBytes {
@ -539,6 +563,24 @@ impl_bytes_partial_cmp!(
<()> str => ProtoStr,
);
impl std::fmt::Debug for ProtoString {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
std::fmt::Debug::fmt(self.as_view(), f)
}
}
impl std::fmt::Debug for ProtoBytes {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
std::fmt::Debug::fmt(self.as_view(), f)
}
}
unsafe impl Sync for ProtoString {}
unsafe impl Send for ProtoString {}
unsafe impl Send for ProtoBytes {}
unsafe impl Sync for ProtoBytes {}
#[cfg(test)]
mod tests {
use super::*;

@ -16,6 +16,32 @@
load("@rules_rust//rust:defs.bzl", "rust_test")
rust_test(
name = "ctype_cord_upb_test",
srcs = ["ctype_cord_test.rs"],
aliases = {
"//rust:protobuf_upb_export": "protobuf",
},
deps = [
"//rust:protobuf_upb_export",
"//rust/test:unittest_upb_rust_proto",
"@crate_index//:googletest",
],
)
rust_test(
name = "ctype_cord_cpp_test",
srcs = ["ctype_cord_test.rs"],
aliases = {
"//rust:protobuf_cpp_export": "protobuf",
},
deps = [
"//rust:protobuf_cpp_export",
"//rust/test:unittest_cpp_rust_proto",
"@crate_index//:googletest",
],
)
rust_test(
name = "child_parent_upb_test",
srcs = ["child_parent_test.rs"],

@ -0,0 +1,38 @@
// Protocol Buffers - Google's data interchange format
// Copyright 2024 Google LLC. All rights reserved.
//
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file or at
// https://developers.google.com/open-source/licenses/bsd
use googletest::prelude::*;
use unittest_rust_proto::{TestAllTypes, TestCord};
#[googletest::test]
fn test_string_cord() {
let mut msg = TestAllTypes::new();
assert_that!(msg.has_optional_cord(), eq(false));
assert_that!(msg.optional_cord(), eq(""));
msg.set_optional_cord("hello");
assert_that!(msg.has_optional_cord(), eq(true));
assert_that!(msg.optional_cord(), eq("hello"));
let mut msg2 = TestAllTypes::new();
msg2.set_optional_cord(msg.optional_cord());
assert_that!(msg2.optional_cord(), eq("hello"));
}
#[googletest::test]
fn test_bytes_cord() {
let mut msg = TestCord::new();
assert_that!(msg.has_optional_bytes_cord(), eq(false));
assert_that!(msg.optional_bytes_cord(), eq("".as_bytes()));
msg.set_optional_bytes_cord(b"hello");
assert_that!(msg.has_optional_bytes_cord(), eq(true));
assert_that!(msg.optional_bytes_cord(), eq("hello".as_bytes()));
let mut msg2 = TestCord::new();
msg2.set_optional_bytes_cord(msg.optional_bytes_cord());
assert_that!(msg2.optional_bytes_cord(), eq("hello".as_bytes()));
}

@ -17,14 +17,6 @@ fn test_stringpiece_repeated() {
assert_that!(msg.repeated_string_piece().get(0), some(eq("hello")));
}
#[googletest::test]
fn test_cord() {
let mut msg = TestAllTypes::new();
assert_that!(msg.optional_cord(), eq(""));
msg.set_optional_cord("hello");
assert_that!(msg.optional_cord(), eq("hello"));
}
#[googletest::test]
fn test_cord_repeated() {
let mut msg = TestAllTypes::new();

@ -13,6 +13,7 @@ cc_library(
"default_value.cc",
"map.cc",
"repeated_field.cc",
"singular_cord.cc",
"singular_message.cc",
"singular_scalar.cc",
"singular_string.cc",

@ -30,8 +30,8 @@ std::unique_ptr<AccessorGenerator> AccessorGeneratorFor(
// ctype=STRING_PIECE fields on cpp kernel yet (upb doesn't care about ctype).
auto ctype = field.options().ctype();
if (ctx.is_cpp() &&
(ctype == FieldOptions::CORD ||
(ctype == FieldOptions::STRING_PIECE && field.is_repeated()))) {
(ctype == FieldOptions::CORD || ctype == FieldOptions::STRING_PIECE) &&
field.is_repeated()) {
return std::make_unique<UnsupportedField>(
"fields has an unsupported ctype");
}
@ -56,6 +56,9 @@ std::unique_ptr<AccessorGenerator> AccessorGeneratorFor(
return std::make_unique<SingularScalar>();
case RustFieldType::BYTES:
case RustFieldType::STRING:
if (ctype == FieldOptions::CORD) {
return std::make_unique<SingularCord>();
}
return std::make_unique<SingularString>();
case RustFieldType::MESSAGE:
return std::make_unique<SingularMessage>();

@ -94,6 +94,15 @@ class SingularString final : public AccessorGenerator {
void InThunkCc(Context& ctx, const FieldDescriptor& field) const override;
};
class SingularCord final : public AccessorGenerator {
public:
~SingularCord() override = default;
void InMsgImpl(Context& ctx, const FieldDescriptor& field,
AccessorCase accessor_case) const override;
void InExternC(Context& ctx, const FieldDescriptor& field) const override;
void InThunkCc(Context& ctx, const FieldDescriptor& field) const override;
};
class SingularMessage final : public AccessorGenerator {
public:
~SingularMessage() override = default;

@ -0,0 +1,288 @@
// Protocol Buffers - Google's data interchange format
// Copyright 2024 Google LLC. All rights reserved.
//
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file or at
// https://developers.google.com/open-source/licenses/bsd
#include <string>
#include "absl/strings/string_view.h"
#include "google/protobuf/compiler/cpp/helpers.h"
#include "google/protobuf/compiler/rust/accessors/accessor_case.h"
#include "google/protobuf/compiler/rust/accessors/generator.h"
#include "google/protobuf/compiler/rust/context.h"
#include "google/protobuf/compiler/rust/naming.h"
#include "google/protobuf/descriptor.h"
namespace google {
namespace protobuf {
namespace compiler {
namespace rust {
void SingularCord::InMsgImpl(Context& ctx, const FieldDescriptor& field,
AccessorCase accessor_case) const {
std::string field_name = FieldNameWithCollisionAvoidance(field);
bool is_string_type = field.type() == FieldDescriptor::TYPE_STRING;
ctx.Emit(
{{"field", RsSafeName(field_name)},
{"raw_field_name", field_name},
{"hazzer_thunk", ThunkName(ctx, field, "has")},
{"borrowed_getter_thunk", ThunkName(ctx, field, "get_cord_borrowed")},
{"owned_getter_thunk", ThunkName(ctx, field, "get_cord_owned")},
{"is_flat_thunk", ThunkName(ctx, field, "cord_is_flat")},
{"setter_thunk", ThunkName(ctx, field, "set")},
{"clearer_thunk", ThunkName(ctx, field, "clear")},
{"getter_thunk", ThunkName(ctx, field, "get")},
{"proxied_type", RsTypePath(ctx, field)},
{"borrowed_type",
[&] {
if (is_string_type) {
ctx.Emit("$pb$::ProtoStr");
} else {
ctx.Emit("[u8]");
}
}},
{"transform_borrowed",
[&] {
if (is_string_type) {
ctx.Emit(R"rs(
$pb$::ProtoStringCow::Borrowed(
// SAFETY: The runtime doesn't require ProtoStr to be UTF-8.
unsafe { $pb$::ProtoStr::from_utf8_unchecked(view) }
)
)rs");
} else {
ctx.Emit(R"rs(
$pb$::ProtoBytesCow::Borrowed(
view
)
)rs");
}
}},
{"transform_owned",
[&] {
if (is_string_type) {
ctx.Emit(R"rs(
$pb$::ProtoStringCow::Owned(
$pb$::ProtoString::from_inner($pbi$::Private, inner)
)
)rs");
} else {
ctx.Emit(R"rs(
$pb$::ProtoBytesCow::Owned(
$pb$::ProtoBytes::from_inner($pbi$::Private, inner)
)
)rs");
}
}},
{"view_lifetime", ViewLifetime(accessor_case)},
{"view_type",
[&] {
if (is_string_type) {
ctx.Emit("$pb$::ProtoStringCow<$view_lifetime$>");
} else {
ctx.Emit("$pb$::ProtoBytesCow<$view_lifetime$>");
}
}},
{"view_self", ViewReceiver(accessor_case)},
{"getter_impl",
[&] {
if (ctx.is_cpp()) {
ctx.Emit(R"rs(
let cord_is_flat = unsafe { $is_flat_thunk$(self.raw_msg()) };
if cord_is_flat {
let view = unsafe { $borrowed_getter_thunk$(self.raw_msg()).as_ref() };
return $transform_borrowed$;
}
let owned = unsafe { $owned_getter_thunk$(self.raw_msg()) };
let inner = unsafe { $pbr$::InnerProtoString::from_raw($pbi$::Private, owned) };
$transform_owned$
)rs");
} else {
ctx.Emit(R"rs(
let view = unsafe { $getter_thunk$(self.raw_msg()).as_ref() };
$transform_borrowed$
)rs");
}
}},
{"getter",
[&] {
ctx.Emit(R"rs(
pub fn $field$($view_self$) -> $view_type$ {
$getter_impl$
}
)rs");
}},
{"setter_impl",
[&] {
if (ctx.is_cpp()) {
ctx.Emit({},
R"rs(
let s = val.into_proxied($pbi$::Private);
unsafe {
$setter_thunk$(
self.as_mutator_message_ref($pbi$::Private).msg(),
s.into_inner($pbi$::Private).into_raw($pbi$::Private)
);
}
)rs");
} else {
ctx.Emit(R"rs(
let s = val.into_proxied($pbi$::Private);
let (view, arena) =
s.into_inner($pbi$::Private).into_raw_parts($pbi$::Private);
let mm_ref =
self.as_mutator_message_ref($pbi$::Private);
let parent_arena = mm_ref.arena($pbi$::Private);
parent_arena.fuse(&arena);
unsafe {
$setter_thunk$(
self.as_mutator_message_ref($pbi$::Private).msg(),
view
);
}
)rs");
}
}},
{"setter",
[&] {
if (accessor_case == AccessorCase::VIEW) return;
ctx.Emit({},
R"rs(
pub fn set_$raw_field_name$(&mut self, val: impl $pb$::IntoProxied<$proxied_type$>) {
$setter_impl$
}
)rs");
}},
{"hazzer",
[&] {
if (!field.has_presence()) return;
ctx.Emit({}, R"rs(
pub fn has_$raw_field_name$($view_self$) -> bool {
unsafe { $hazzer_thunk$(self.raw_msg()) }
})rs");
}},
{"clearer",
[&] {
if (accessor_case == AccessorCase::VIEW) return;
if (!field.has_presence()) return;
ctx.Emit({}, R"rs(
pub fn clear_$raw_field_name$(&mut self) {
unsafe { $clearer_thunk$(self.raw_msg()) }
})rs");
}}},
R"rs(
$getter$
$setter$
$hazzer$
$clearer$
)rs");
}
void SingularCord::InExternC(Context& ctx, const FieldDescriptor& field) const {
ctx.Emit(
{{"hazzer_thunk", ThunkName(ctx, field, "has")},
{"borrowed_getter_thunk", ThunkName(ctx, field, "get_cord_borrowed")},
{"owned_getter_thunk", ThunkName(ctx, field, "get_cord_owned")},
{"is_flat_thunk", ThunkName(ctx, field, "cord_is_flat")},
{"getter_thunk", ThunkName(ctx, field, "get")},
{"setter_thunk", ThunkName(ctx, field, "set")},
{"setter",
[&] {
if (ctx.is_cpp()) {
ctx.Emit(R"rs(
fn $setter_thunk$(raw_msg: $pbr$::RawMessage, val: $pbr$::CppStdString);
)rs");
} else {
ctx.Emit(R"rs(
fn $setter_thunk$(raw_msg: $pbr$::RawMessage, val: $pbr$::PtrAndLen);
)rs");
}
}},
{"clearer_thunk", ThunkName(ctx, field, "clear")},
{"getter_thunks",
[&] {
if (ctx.is_cpp()) {
ctx.Emit(R"rs(
fn $is_flat_thunk$(raw_msg: $pbr$::RawMessage) -> bool;
fn $borrowed_getter_thunk$(raw_msg: $pbr$::RawMessage) -> $pbr$::PtrAndLen;
fn $owned_getter_thunk$(raw_msg: $pbr$::RawMessage) -> $pbr$::CppStdString;
)rs");
} else {
ctx.Emit(R"rs(
fn $getter_thunk$(raw_msg: $pbr$::RawMessage) -> $pbr$::PtrAndLen;
)rs");
}
}},
{"with_presence_fields_thunks",
[&] {
if (field.has_presence()) {
ctx.Emit(R"rs(
fn $hazzer_thunk$(raw_msg: $pbr$::RawMessage) -> bool;
fn $clearer_thunk$(raw_msg: $pbr$::RawMessage);
)rs");
}
}}},
R"rs(
$with_presence_fields_thunks$
$getter_thunks$
$setter$
)rs");
}
void SingularCord::InThunkCc(Context& ctx, const FieldDescriptor& field) const {
ctx.Emit(
{{"field", cpp::FieldName(&field)},
{"QualifiedMsg", cpp::QualifiedClassName(field.containing_type())},
{"hazzer_thunk", ThunkName(ctx, field, "has")},
{"setter_thunk", ThunkName(ctx, field, "set")},
{"clearer_thunk", ThunkName(ctx, field, "clear")},
{"borrowed_getter_thunk", ThunkName(ctx, field, "get_cord_borrowed")},
{"owned_getter_thunk", ThunkName(ctx, field, "get_cord_owned")},
{"is_flat_thunk", ThunkName(ctx, field, "cord_is_flat")},
{"with_presence_fields_thunks",
[&] {
if (field.has_presence()) {
ctx.Emit(R"cc(
bool $hazzer_thunk$($QualifiedMsg$* msg) {
return msg->has_$field$();
}
void $clearer_thunk$($QualifiedMsg$* msg) { msg->clear_$field$(); }
)cc");
}
}}},
R"cc(
$with_presence_fields_thunks$;
bool $is_flat_thunk$($QualifiedMsg$* msg) {
const absl::Cord& cord = msg->$field$();
return cord.TryFlat().has_value();
}
::google::protobuf::rust::PtrAndLen $borrowed_getter_thunk$($QualifiedMsg$* msg) {
const absl::Cord& cord = msg->$field$();
absl::string_view s = cord.TryFlat().value();
return ::google::protobuf::rust::PtrAndLen(s.data(), s.size());
}
std::string* $owned_getter_thunk$($QualifiedMsg$* msg) {
const absl::Cord& cord = msg->$field$();
std::string* owned = new std::string();
absl::CopyCordToString(cord, owned);
return owned;
}
void $setter_thunk$($QualifiedMsg$* msg, std::string* s) {
msg->set_$field$(absl::Cord(std::move(*s)));
delete s;
}
)cc");
}
} // namespace rust
} // namespace compiler
} // namespace protobuf
} // namespace google
Loading…
Cancel
Save