Create the concept of 'owned data' in upb/rust as a generalization of the upb.rs SerializedData (which is a arena + data for arbitrary types, both thin and wide ref types), use that for the wire parse/serialize path.

PiperOrigin-RevId: 627814154
pull/16561/head
Protobuf Team Bot 7 months ago committed by Copybara-Service
parent ef02087534
commit 734729afc2
  1. 68
      rust/upb.rs
  2. 1
      rust/upb/BUILD
  3. 49
      rust/upb/arena.rs
  4. 11
      rust/upb/lib.rs
  5. 4
      rust/upb/message.rs
  6. 111
      rust/upb/owned_arena_box.rs
  7. 66
      rust/upb/wire.rs
  8. 40
      src/google/protobuf/compiler/rust/message.cc

@ -14,9 +14,7 @@ use crate::{
}; };
use core::fmt::Debug; use core::fmt::Debug;
use std::alloc::Layout; use std::alloc::Layout;
use std::fmt;
use std::mem::{size_of, ManuallyDrop, MaybeUninit}; use std::mem::{size_of, ManuallyDrop, MaybeUninit};
use std::ops::Deref;
use std::ptr::{self, NonNull}; use std::ptr::{self, NonNull};
use std::slice; use std::slice;
use std::sync::OnceLock; use std::sync::OnceLock;
@ -60,55 +58,7 @@ impl ScratchSpace {
} }
} }
/// Serialized Protobuf wire format data. pub type SerializedData = upb::OwnedArenaBox<[u8]>;
///
/// It's typically produced by `<Message>::serialize()`.
pub struct SerializedData {
data: NonNull<u8>,
len: usize,
// The arena that owns `data`.
_arena: Arena,
}
impl SerializedData {
/// Construct `SerializedData` from raw pointers and its owning arena.
///
/// # Safety
/// - `arena` must be have allocated `data`
/// - `data` must be readable for `len` bytes and not mutate while this
/// struct exists
pub unsafe fn from_raw_parts(arena: Arena, data: NonNull<u8>, len: usize) -> Self {
SerializedData { _arena: arena, data, len }
}
/// Gets a raw slice pointer.
pub fn as_ptr(&self) -> *const [u8] {
ptr::slice_from_raw_parts(self.data.as_ptr(), self.len)
}
}
impl Deref for SerializedData {
type Target = [u8];
fn deref(&self) -> &Self::Target {
// SAFETY: `data` is valid for `len` bytes as promised by
// the caller of `SerializedData::from_raw_parts`.
unsafe { slice::from_raw_parts(self.data.as_ptr(), self.len) }
}
}
// TODO: remove after IntoProxied has been implemented for bytes.
impl AsRef<[u8]> for SerializedData {
fn as_ref(&self) -> &[u8] {
self
}
}
impl fmt::Debug for SerializedData {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
fmt::Debug::fmt(self.deref(), f)
}
}
// TODO: Investigate replacing this with direct access to UPB bits. // TODO: Investigate replacing this with direct access to UPB bits.
pub type MessagePresentMutData<'msg, T> = crate::vtable::RawVTableOptionalMutatorData<'msg, T>; pub type MessagePresentMutData<'msg, T> = crate::vtable::RawVTableOptionalMutatorData<'msg, T>;
@ -812,22 +762,6 @@ mod tests {
use super::*; use super::*;
use googletest::prelude::*; use googletest::prelude::*;
#[test]
fn test_serialized_data_roundtrip() {
let arena = Arena::new();
let original_data = b"Hello world";
let len = original_data.len();
let serialized_data = unsafe {
SerializedData::from_raw_parts(
arena,
NonNull::new(original_data as *const _ as *mut _).unwrap(),
len,
)
};
assert_that!(&*serialized_data, eq(b"Hello world"));
}
#[test] #[test]
fn assert_c_type_sizes() { fn assert_c_type_sizes() {
// TODO: add these same asserts in C++. // TODO: add these same asserts in C++.

@ -23,6 +23,7 @@ rust_library(
"message_value.rs", "message_value.rs",
"mini_table.rs", "mini_table.rs",
"opaque_pointee.rs", "opaque_pointee.rs",
"owned_arena_box.rs",
"string_view.rs", "string_view.rs",
"wire.rs", "wire.rs",
], ],

@ -3,7 +3,7 @@ use std::alloc::{self, Layout};
use std::cell::UnsafeCell; use std::cell::UnsafeCell;
use std::marker::PhantomData; use std::marker::PhantomData;
use std::mem::{align_of, MaybeUninit}; use std::mem::{align_of, MaybeUninit};
use std::ptr::NonNull; use std::ptr::{self, NonNull};
use std::slice; use std::slice;
opaque_pointee!(upb_Arena); opaque_pointee!(upb_Arena);
@ -95,6 +95,53 @@ impl Arena {
// `UPB_MALLOC_ALIGN` boundary. // `UPB_MALLOC_ALIGN` boundary.
unsafe { slice::from_raw_parts_mut(ptr.cast(), layout.size()) } unsafe { slice::from_raw_parts_mut(ptr.cast(), layout.size()) }
} }
/// Same as alloc() but panics if `layout.align() > UPB_MALLOC_ALIGN`.
#[allow(clippy::mut_from_ref)]
#[inline]
pub fn checked_alloc(&self, layout: Layout) -> &mut [MaybeUninit<u8>] {
assert!(layout.align() <= UPB_MALLOC_ALIGN);
// SAFETY: layout.align() <= UPB_MALLOC_ALIGN asserted.
unsafe { self.alloc(layout) }
}
/// Copies the T into this arena and returns a pointer to the T data inside
/// the arena.
pub fn copy_in<'a, T: Copy>(&'a self, data: &T) -> &'a T {
let layout = Layout::for_value(data);
let alloc = self.checked_alloc(layout);
// SAFETY:
// - alloc is valid for `layout.len()` bytes and is the uninit bytes are written
// to not read from until written.
// - T is copy so copying the bytes of the value is sound.
unsafe {
let alloc = alloc.as_mut_ptr().cast::<MaybeUninit<T>>();
// let data = (data as *const T).cast::<MaybeUninit<T>>();
(*alloc).write(*data)
}
}
pub fn copy_str_in<'a>(&'a self, s: &str) -> &'a str {
let copied_bytes = self.copy_slice_in(s.as_bytes());
// SAFETY: `copied_bytes` has same contents as `s` and so must meet &str
// criteria.
unsafe { std::str::from_utf8_unchecked(copied_bytes) }
}
pub fn copy_slice_in<'a, T: Copy>(&'a self, data: &[T]) -> &'a [T] {
let layout = Layout::for_value(data);
let alloc: *mut T = self.checked_alloc(layout).as_mut_ptr().cast();
// SAFETY:
// - uninit_alloc is valid for `layout.len()` bytes and is the uninit bytes are
// written to not read from until written.
// - T is copy so copying the bytes of the values is sound.
unsafe {
ptr::copy_nonoverlapping(data.as_ptr(), alloc, data.len());
slice::from_raw_parts_mut(alloc, data.len())
}
}
} }
impl Default for Arena { impl Default for Arena {

@ -21,7 +21,9 @@ pub use map::{
}; };
mod message; mod message;
pub use message::{upb_Message, upb_Message_DeepClone, upb_Message_DeepCopy, RawMessage}; pub use message::{
upb_Message, upb_Message_DeepClone, upb_Message_DeepCopy, upb_Message_New, RawMessage,
};
mod message_value; mod message_value;
pub use message_value::{upb_MessageValue, upb_MutableMessageValue}; pub use message_value::{upb_MessageValue, upb_MutableMessageValue};
@ -31,8 +33,11 @@ pub use mini_table::{upb_MiniTable, RawMiniTable};
mod opaque_pointee; mod opaque_pointee;
mod owned_arena_box;
pub use owned_arena_box::OwnedArenaBox;
mod string_view; mod string_view;
pub use string_view::StringView; pub use string_view::StringView;
mod wire; pub mod wire;
pub use wire::{upb_Decode, upb_Encode, DecodeStatus, EncodeStatus}; pub use wire::{upb_Decode, DecodeStatus, EncodeStatus};

@ -6,6 +6,10 @@ opaque_pointee!(upb_Message);
pub type RawMessage = NonNull<upb_Message>; pub type RawMessage = NonNull<upb_Message>;
extern "C" { extern "C" {
/// SAFETY: No constraints.
pub fn upb_Message_New(mini_table: *const upb_MiniTable, arena: RawArena)
-> Option<RawMessage>;
pub fn upb_Message_DeepCopy( pub fn upb_Message_DeepCopy(
dst: RawMessage, dst: RawMessage,
src: RawMessage, src: RawMessage,

@ -0,0 +1,111 @@
use crate::Arena;
use std::fmt::{self, Debug};
use std::ops::{Deref, DerefMut};
use std::ptr::NonNull;
/// An 'owned' T, similar to a Box<T> where the T is data
/// held in a upb Arena. By holding the data pointer and a corresponding arena
/// together the data liveness is be maintained.
///
/// This struct is conceptually self-referential, where `data` points at memory
/// inside `arena`. This avoids typical concerns of self-referential data
/// structures because `arena` modifications (other than drop) will never
/// invalidate `data`, and `data` and `arena` are both behind indirections which
/// avoids any concern with std::mem::swap.
pub struct OwnedArenaBox<T: ?Sized + 'static> {
data: NonNull<T>,
arena: Arena,
}
impl<T: ?Sized + 'static> OwnedArenaBox<T> {
/// Construct `OwnedArenaBox` from raw pointers and its owning arena.
///
/// # Safety
/// - `data` must satisfy the safety constraints of pointer::as_mut::<'a>()
/// where 'a is the passed arena's lifetime (`data` should be valid and
/// not mutated while this struct is live).
/// - `data` should be a pointer into a block from a previous allocation on
/// `arena`, or to another arena fused to it, or should be pointing at
/// 'static data (and if it is pointing at any struct like upb_Message,
/// all data transitively reachable should similarly be kept live by
/// `arena` or be 'static).
pub unsafe fn new(data: NonNull<T>, arena: Arena) -> Self {
OwnedArenaBox { arena, data }
}
pub fn data(&self) -> *const T {
self.data.as_ptr()
}
pub fn into_parts(self) -> (NonNull<T>, Arena) {
(self.data, self.arena)
}
}
impl<T: ?Sized + 'static> Deref for OwnedArenaBox<T> {
type Target = T;
fn deref(&self) -> &Self::Target {
self.as_ref()
}
}
impl<T: ?Sized + 'static> DerefMut for OwnedArenaBox<T> {
fn deref_mut(&mut self) -> &mut Self::Target {
self.as_mut()
}
}
impl<T: ?Sized + 'static> AsRef<T> for OwnedArenaBox<T> {
fn as_ref(&self) -> &T {
// SAFETY:
// - `data` is valid under the conditions set on ::new().
unsafe { self.data.as_ref() }
}
}
impl<T: ?Sized + 'static> AsMut<T> for OwnedArenaBox<T> {
fn as_mut(&mut self) -> &mut T {
// SAFETY:
// - `data` is valid under the conditions set on ::new().
unsafe { self.data.as_mut() }
}
}
impl<T: Debug + 'static> Debug for OwnedArenaBox<T> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
f.debug_tuple("OwnedArenaBox").field(self.deref()).finish()
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::str;
#[test]
fn test_byte_slice_pointer_roundtrip() {
let arena = Arena::new();
let original_data: &'static [u8] = b"Hello world";
let owned_data = unsafe { OwnedArenaBox::new(original_data.into(), arena) };
assert_eq!(&*owned_data, b"Hello world");
}
#[test]
fn test_alloc_str_roundtrip() {
let arena = Arena::new();
let s: &str = "Hello";
let arena_alloc_str: NonNull<str> = arena.copy_str_in(s).into();
let owned_data = unsafe { OwnedArenaBox::new(arena_alloc_str, arena) };
assert_eq!(&*owned_data, s);
}
#[test]
fn test_sized_type_roundtrip() {
let arena = Arena::new();
let arena_alloc_u32: NonNull<u32> = arena.copy_in(&7u32).into();
let mut owned_data = unsafe { OwnedArenaBox::new(arena_alloc_u32, arena) };
assert_eq!(*owned_data, 7);
*owned_data = 8;
assert_eq!(*owned_data, 8);
}
}

@ -1,8 +1,9 @@
use crate::{upb_ExtensionRegistry, upb_MiniTable, RawArena, RawMessage}; use crate::{upb_ExtensionRegistry, upb_MiniTable, Arena, OwnedArenaBox, RawArena, RawMessage};
use std::ptr::NonNull;
// LINT.IfChange(encode_status) // LINT.IfChange(encode_status)
#[repr(C)] #[repr(C)]
#[derive(PartialEq, Eq, Copy, Clone)] #[derive(PartialEq, Eq, Copy, Clone, Debug)]
pub enum EncodeStatus { pub enum EncodeStatus {
Ok = 0, Ok = 0,
OutOfMemory = 1, OutOfMemory = 1,
@ -13,7 +14,7 @@ pub enum EncodeStatus {
// LINT.IfChange(decode_status) // LINT.IfChange(decode_status)
#[repr(C)] #[repr(C)]
#[derive(PartialEq, Eq, Copy, Clone)] #[derive(PartialEq, Eq, Copy, Clone, Debug)]
pub enum DecodeStatus { pub enum DecodeStatus {
Ok = 0, Ok = 0,
Malformed = 1, Malformed = 1,
@ -25,7 +26,62 @@ pub enum DecodeStatus {
} }
// LINT.ThenChange() // LINT.ThenChange()
/// If Err, then EncodeStatus != Ok.
///
/// SAFETY:
/// - `msg` must be associated with `mini_table`.
pub unsafe fn encode(
msg: RawMessage,
mini_table: *const upb_MiniTable,
) -> Result<OwnedArenaBox<[u8]>, EncodeStatus> {
let arena = Arena::new();
let mut buf: *mut u8 = std::ptr::null_mut();
let mut len = 0usize;
// SAFETY:
// - `mini_table` is the one associated with `msg`.
// - `buf` and `buf_size` are legally writable.
let status = upb_Encode(msg, mini_table, 0, arena.raw(), &mut buf, &mut len);
if status == EncodeStatus::Ok {
assert!(!buf.is_null()); // EncodeStatus Ok should never return NULL data, even for len=0.
// SAFETY: upb guarantees that `buf` is valid to read for `len`.
let slice = NonNull::new_unchecked(std::ptr::slice_from_raw_parts_mut(buf, len));
Ok(OwnedArenaBox::new(slice, arena))
} else {
Err(status)
}
}
/// Decodes into the provided message (merge semantics). If Err, then
/// DecodeStatus != Ok.
///
/// SAFETY:
/// - `msg` must be mutable.
/// - `msg` must be associated with `mini_table`.
pub unsafe fn decode(
buf: &[u8],
msg: RawMessage,
mini_table: *const upb_MiniTable,
arena: &Arena,
) -> Result<(), DecodeStatus> {
let len = buf.len();
let buf = buf.as_ptr();
// SAFETY:
// - `mini_table` is the one associated with `msg`
// - `buf` is legally readable for at least `buf_size` bytes.
// - `extreg` is null.
let status = upb_Decode(buf, len, msg, mini_table, std::ptr::null(), 0, arena.raw());
match status {
DecodeStatus::Ok => Ok(()),
_ => Err(status),
}
}
extern "C" { extern "C" {
// SAFETY:
// - `mini_table` is the one associated with `msg`
// - `buf` and `buf_size` are legally writable.
pub fn upb_Encode( pub fn upb_Encode(
msg: RawMessage, msg: RawMessage,
mini_table: *const upb_MiniTable, mini_table: *const upb_MiniTable,
@ -35,6 +91,10 @@ extern "C" {
buf_size: *mut usize, buf_size: *mut usize,
) -> EncodeStatus; ) -> EncodeStatus;
// SAFETY:
// - `mini_table` is the one associated with `msg`
// - `buf` is legally readable for at least `buf_size` bytes.
// - `extreg` is either null or points at a valid upb_ExtensionRegistry.
pub fn upb_Decode( pub fn upb_Decode(
buf: *const u8, buf: *const u8,
buf_size: usize, buf_size: usize,

@ -68,35 +68,17 @@ void MessageSerialize(Context& ctx, const Descriptor& msg) {
case Kernel::kUpb: case Kernel::kUpb:
ctx.Emit({{"minitable", UpbMinitableName(msg)}}, ctx.Emit({{"minitable", UpbMinitableName(msg)}},
R"rs( R"rs(
let arena = $pbr$::Arena::new();
// SAFETY: $minitable$ is a static of a const object. // SAFETY: $minitable$ is a static of a const object.
let mini_table = unsafe { $std$::ptr::addr_of!($minitable$) }; let mini_table = unsafe { $std$::ptr::addr_of!($minitable$) };
let options = 0; // SAFETY: $minitable$ is the one associated with raw_msg().
let mut buf: *mut u8 = std::ptr::null_mut(); let encoded = unsafe {
let mut len = 0; $pbr$::wire::encode(self.raw_msg(), mini_table)
// SAFETY: `mini_table` is the corresponding one that was used to
// construct `self.raw_msg()`.
let status = unsafe {
$pbr$::upb_Encode(self.raw_msg(), mini_table, options, arena.raw(),
&mut buf, &mut len)
}; };
//~ TODO: Currently serialize() on the Rust API is an //~ TODO: Currently serialize() on the Rust API is an
//~ infallible fn, so if upb signals an error here we can only panic. //~ infallible fn, so if upb signals an error here we can only panic.
assert!(status == $pbr$::EncodeStatus::Ok); let serialized = encoded.expect("serialize is not allowed to fail");
let data = if len == 0 { serialized
std::ptr::NonNull::dangling()
} else {
std::ptr::NonNull::new(buf).unwrap()
};
// SAFETY:
// - `arena` allocated `data`.
// - `data` is valid for reads up to `len` and will not be mutated.
unsafe {
$pbr$::SerializedData::from_raw_parts(arena, data, len)
}
)rs"); )rs");
return; return;
} }
@ -131,27 +113,25 @@ void MessageClearAndParse(Context& ctx, const Descriptor& msg) {
let mut msg = Self::new(); let mut msg = Self::new();
// SAFETY: $minitable$ is a static of a const object. // SAFETY: $minitable$ is a static of a const object.
let mini_table = unsafe { $std$::ptr::addr_of!($minitable$) }; let mini_table = unsafe { $std$::ptr::addr_of!($minitable$) };
let ext_reg = std::ptr::null();
let options = 0;
// SAFETY: // SAFETY:
// - `data.as_ptr()` is valid to read for `data.len()` // - `data.as_ptr()` is valid to read for `data.len()`
// - `mini_table` is the one used to construct `msg.raw_msg()` // - `mini_table` is the one used to construct `msg.raw_msg()`
// - `msg.arena().raw()` is held for the same lifetime as `msg`. // - `msg.arena().raw()` is held for the same lifetime as `msg`.
let status = unsafe { let status = unsafe {
$pbr$::upb_Decode( $pbr$::wire::decode(
data.as_ptr(), data.len(), msg.raw_msg(), data, msg.raw_msg(),
mini_table, ext_reg, options, msg.arena().raw()) mini_table, msg.arena())
}; };
match status { match status {
$pbr$::DecodeStatus::Ok => { Ok(_) => {
//~ This swap causes the old self.inner.arena to be moved into `msg` //~ This swap causes the old self.inner.arena to be moved into `msg`
//~ which we immediately drop, which will release any previous //~ which we immediately drop, which will release any previous
//~ message that was held here. //~ message that was held here.
std::mem::swap(self, &mut msg); std::mem::swap(self, &mut msg);
Ok(()) Ok(())
} }
_ => Err($pb$::ParseError) Err(_) => Err($pb$::ParseError)
} }
)rs"); )rs");
return; return;

Loading…
Cancel
Save