protobuf/rust/string.rs

// Protocol Buffers - Google's data interchange format
// Copyright 2023 Google LLC.  All rights reserved.
//
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file or at
// https://developers.google.com/open-source/licenses/bsd

//! Items specific to `bytes` and `string` fields.
#![allow(dead_code)]
#![allow(unused)]

use crate::__internal::Private;
use crate::__runtime::{InnerProtoString, PtrAndLen, RawMessage};
use crate::{IntoProxied, Mut, MutProxied, MutProxy, Optional, Proxied, View, ViewProxy};
use std::borrow::Cow;
use std::cmp::{Eq, Ord, Ordering, PartialEq, PartialOrd};
use std::convert::{AsMut, AsRef};
use std::ffi::{OsStr, OsString};
use std::fmt;
use std::hash::{Hash, Hasher};
use std::iter;
use std::ops::{Deref, DerefMut};
use std::ptr;
use std::rc::Rc;
use std::sync::Arc;
use utf8::Utf8Chunks;

pub struct ProtoBytes {
    pub(crate) inner: InnerProtoString,
}

impl AsRef<[u8]> for ProtoBytes {
    fn as_ref(&self) -> &[u8] {
        self.inner.as_bytes()
    }
}

impl From<&[u8]> for ProtoBytes {
    fn from(v: &[u8]) -> ProtoBytes {
        ProtoBytes { inner: InnerProtoString::from(v) }
    }
}

impl<const N: usize> From<&[u8; N]> for ProtoBytes {
    fn from(v: &[u8; N]) -> ProtoBytes {
        ProtoBytes { inner: InnerProtoString::from(v.as_ref()) }
    }
}

impl Proxied for ProtoBytes {
    type View<'msg> = &'msg [u8];
}

impl IntoProxied<ProtoBytes> for ProtoBytes {
    fn into_proxied(self, _private: Private) -> ProtoBytes {
        self
    }
}

impl IntoProxied<ProtoBytes> for &[u8] {
    fn into_proxied(self, _private: Private) -> ProtoBytes {
        ProtoBytes::from(self)
    }
}

impl<const N: usize> IntoProxied<ProtoBytes> for &[u8; N] {
    fn into_proxied(self, _private: Private) -> ProtoBytes {
        ProtoBytes::from(self.as_ref())
    }
}

impl IntoProxied<ProtoBytes> for Vec<u8> {
    fn into_proxied(self, _private: Private) -> ProtoBytes {
        ProtoBytes::from(AsRef::<[u8]>::as_ref(&self))
    }
}

impl IntoProxied<ProtoBytes> for &Vec<u8> {
    fn into_proxied(self, _private: Private) -> ProtoBytes {
        ProtoBytes::from(AsRef::<[u8]>::as_ref(self))
    }
}

impl IntoProxied<ProtoBytes> for Box<[u8]> {
    fn into_proxied(self, _private: Private) -> ProtoBytes {
        ProtoBytes::from(AsRef::<[u8]>::as_ref(&self))
    }
}

impl IntoProxied<ProtoBytes> for Cow<'_, [u8]> {
    fn into_proxied(self, _private: Private) -> ProtoBytes {
        ProtoBytes::from(AsRef::<[u8]>::as_ref(&self))
    }
}

impl IntoProxied<ProtoBytes> for Rc<[u8]> {
    fn into_proxied(self, _private: Private) -> ProtoBytes {
        ProtoBytes::from(AsRef::<[u8]>::as_ref(&self))
    }
}

impl IntoProxied<ProtoBytes> for Arc<[u8]> {
    fn into_proxied(self, _private: Private) -> ProtoBytes {
        ProtoBytes::from(AsRef::<[u8]>::as_ref(&self))
    }
}

impl<'msg> ViewProxy<'msg> for &'msg [u8] {
    type Proxied = ProtoBytes;

    fn as_view(&self) -> &[u8] {
        self
    }

    fn into_view<'shorter>(self) -> &'shorter [u8]
    where
        'msg: 'shorter,
    {
        self
    }
}

/// The bytes were not valid UTF-8.
#[derive(Debug, PartialEq)]
pub struct Utf8Error(pub(crate) ());

impl From<std::str::Utf8Error> for Utf8Error {
    fn from(_: std::str::Utf8Error) -> Utf8Error {
        Utf8Error(())
    }
}

/// An owned type representing protobuf `string` field's contents.
///
/// # UTF-8
///
/// Protobuf [docs] state that a `string` field contains UTF-8 encoded text.
/// However, not every runtime enforces this, and the Rust runtime is designed
/// to integrate with other runtimes with FFI, like C++.
///
/// `ProtoString` represents a string type that is expected to contain valid
/// UTF-8. However, `ProtoString` is not validated, so users must
/// call [`ProtoString::to_string`] to perform a (possibly runtime-elided) UTF-8
/// validation check. This validation should rarely fail in pure Rust programs,
/// but is necessary to prevent UB when interacting with C++, or other languages
/// with looser restrictions.
///
///
/// # `Display` and `ToString`
/// `ProtoString` is ordinarily UTF-8 and so implements `Display`. If there are
/// any invalid UTF-8 sequences, they are replaced with [`U+FFFD REPLACEMENT
/// CHARACTER`]. Because anything implementing `Display` also implements
/// `ToString`, `ProtoString::to_string()` is equivalent to
/// `String::from_utf8_lossy(proto_string.as_bytes()).into_owned()`.
///
/// [`U+FFFD REPLACEMENT CHARACTER`]: std::char::REPLACEMENT_CHARACTER
pub struct ProtoString {
    pub(crate) inner: InnerProtoString,
}

impl ProtoString {
    pub fn as_bytes(&self) -> &[u8] {
        self.inner.as_bytes()
    }
}

impl From<&str> for ProtoString {
    fn from(v: &str) -> Self {
        Self::from(v.as_bytes())
    }
}

impl From<&[u8]> for ProtoString {
    fn from(v: &[u8]) -> Self {
        Self { inner: InnerProtoString::from(v) }
    }
}

impl IntoProxied<ProtoString> for ProtoString {
    fn into_proxied(self, _private: Private) -> ProtoString {
        self
    }
}

impl IntoProxied<ProtoString> for &str {
    fn into_proxied(self, _private: Private) -> ProtoString {
        ProtoString::from(self)
    }
}

impl IntoProxied<ProtoString> for &ProtoStr {
    fn into_proxied(self, _private: Private) -> ProtoString {
        ProtoString::from(self.as_bytes())
    }
}

impl IntoProxied<ProtoString> for String {
    fn into_proxied(self, _private: Private) -> ProtoString {
        ProtoString::from(self.as_str())
    }
}

impl IntoProxied<ProtoString> for &String {
    fn into_proxied(self, _private: Private) -> ProtoString {
        ProtoString::from(self.as_bytes())
    }
}

impl IntoProxied<ProtoString> for OsString {
    fn into_proxied(self, private: Private) -> ProtoString {
        self.as_os_str().into_proxied(private)
    }
}

impl IntoProxied<ProtoString> for &OsStr {
    fn into_proxied(self, _private: Private) -> ProtoString {
        ProtoString::from(self.as_encoded_bytes())
    }
}

impl IntoProxied<ProtoString> for Box<str> {
    fn into_proxied(self, _private: Private) -> ProtoString {
        ProtoString::from(AsRef::<str>::as_ref(&self))
    }
}

impl IntoProxied<ProtoString> for Cow<'_, str> {
    fn into_proxied(self, _private: Private) -> ProtoString {
        ProtoString::from(AsRef::<str>::as_ref(&self))
    }
}

impl IntoProxied<ProtoString> for Rc<str> {
    fn into_proxied(self, _private: Private) -> ProtoString {
        ProtoString::from(AsRef::<str>::as_ref(&self))
    }
}

impl IntoProxied<ProtoString> for Arc<str> {
    fn into_proxied(self, _private: Private) -> ProtoString {
        ProtoString::from(AsRef::<str>::as_ref(&self))
    }
}

/// A shared immutable view of a protobuf `string` field's contents.
///
/// Like a `str`, it can be cheaply accessed as bytes and
/// is dynamically sized, requiring it be accessed through a pointer.
///
/// # UTF-8 and `&str` access
///
/// Protobuf [docs] state that a `string` field contains UTF-8 encoded text.
/// However, not every runtime enforces this, and the Rust runtime is designed
/// to integrate with other runtimes with FFI, like C++.
///
/// Because of this, in order to access the contents as a `&str`, users must
/// call [`ProtoStr::to_str`] to perform a (possibly runtime-elided) UTF-8
/// validation check. However, the Rust API only allows `set()`ting a `string`
/// field with data should be valid UTF-8 like a `&str` or a
/// `&ProtoStr`. This means that this check should rarely fail, but is necessary
/// to prevent UB when interacting with C++, which has looser restrictions.
///
/// Most of the time, users should not perform direct `&str` access to the
/// contents - this type implements `Display` and comparison with `str`,
/// so it's best to avoid a UTF-8 check by working directly with `&ProtoStr`
/// or converting to `&[u8]`.
///
/// # `Display` and `ToString`
/// `ProtoStr` is ordinarily UTF-8 and so implements `Display`. If there are
/// any invalid UTF-8 sequences, they are replaced with [`U+FFFD REPLACEMENT
/// CHARACTER`]. Because anything implementing `Display` also implements
/// `ToString`, `proto_str.to_string()` is equivalent to
/// `String::from_utf8_lossy(proto_str.as_bytes()).into_owned()`.
///
/// [docs]: https://protobuf.dev/programming-guides/proto2/#scalar
/// [dst]: https://doc.rust-lang.org/reference/dynamically-sized-types.html
/// [`U+FFFD REPLACEMENT CHARACTER`]: std::char::REPLACEMENT_CHARACTER
#[repr(transparent)]
pub struct ProtoStr([u8]);

impl ProtoStr {
    /// Converts `self` to a byte slice.
    ///
    /// Note: this type does not implement `Deref`; you must call `as_bytes()`
    /// or `AsRef<[u8]>` to get access to bytes.
    pub fn as_bytes(&self) -> &[u8] {
        &self.0
    }

    /// Yields a `&str` slice if `self` contains valid UTF-8.
    ///
    /// This may perform a runtime check, dependent on runtime.
    ///
    /// `String::from_utf8_lossy(proto_str.as_bytes())` can be used to
    /// infallibly construct a string, replacing invalid UTF-8 with
    /// [`U+FFFD REPLACEMENT CHARACTER`].
    ///
    /// [`U+FFFD REPLACEMENT CHARACTER`]: std::char::REPLACEMENT_CHARACTER
    // This is not `try_to_str` since `to_str` is shorter, with `CStr` as precedent.
    pub fn to_str(&self) -> Result<&str, Utf8Error> {
        Ok(std::str::from_utf8(&self.0)?)
    }

    /// Converts `self` to a string, including invalid characters.
    ///
    /// Invalid UTF-8 sequences are replaced with
    /// [`U+FFFD REPLACEMENT CHARACTER`].
    ///
    /// Users should be prefer this to `.to_string()` provided by `Display`.
    /// `.to_cow_lossy()` is the same operation, but it may avoid an
    /// allocation if the string is already UTF-8.
    ///
    /// [`U+FFFD REPLACEMENT CHARACTER`]: std::char::REPLACEMENT_CHARACTER
    //
    // This method is named `to_string_lossy` in `CStr`, but since `to_string`
    // also exists on this type, this name was chosen to avoid confusion.
    pub fn to_cow_lossy(&self) -> Cow<'_, str> {
        String::from_utf8_lossy(&self.0)
    }

    /// Returns `true` if `self` has a length of zero bytes.
    pub fn is_empty(&self) -> bool {
        self.0.is_empty()
    }

    /// Returns the length of `self`.
    ///
    /// Like `&str`, this is a length in bytes, not `char`s or graphemes.
    pub fn len(&self) -> usize {
        self.0.len()
    }

    /// Iterates over the `char`s in this protobuf `string`.
    ///
    /// Invalid UTF-8 sequences are replaced with
    /// [`U+FFFD REPLACEMENT CHARACTER`].
    ///
    /// [`U+FFFD REPLACEMENT CHARACTER`]: std::char::REPLACEMENT_CHARACTER
    pub fn chars(&self) -> impl Iterator<Item = char> + '_ + fmt::Debug {
        Utf8Chunks::new(self.as_bytes()).flat_map(|chunk| {
            let mut yield_replacement_char = !chunk.invalid().is_empty();
            chunk.valid().chars().chain(iter::from_fn(move || {
                // Yield a single replacement character for every
                // non-empty invalid sequence.
                yield_replacement_char.then(|| {
                    yield_replacement_char = false;
                    char::REPLACEMENT_CHARACTER
                })
            }))
        })
    }

    /// Returns an iterator over chunks of UTF-8 data in the string.
    ///
    /// An `Ok(&str)` is yielded for every valid UTF-8 chunk, and an
    /// `Err(&[u8])` for each non-UTF-8 chunk. An `Err` will be emitted
    /// multiple times in a row for contiguous invalid chunks. Each invalid
    /// chunk in an `Err` has a maximum length of 3 bytes.
    pub fn utf8_chunks(&self) -> impl Iterator<Item = Result<&str, &[u8]>> + '_ {
        Utf8Chunks::new(self.as_bytes()).flat_map(|chunk| {
            let valid = chunk.valid();
            let invalid = chunk.invalid();
            (!valid.is_empty())
                .then_some(Ok(valid))
                .into_iter()
                .chain((!invalid.is_empty()).then_some(Err(invalid)))
        })
    }

    /// Converts known-UTF-8 bytes to a `ProtoStr` without a check.
    ///
    /// # Safety
    /// `bytes` must be valid UTF-8 if the current runtime requires it.
    pub unsafe fn from_utf8_unchecked(bytes: &[u8]) -> &Self {
        // SAFETY:
        // - `ProtoStr` is `#[repr(transparent)]` over `[u8]`, so it has the same
        //   layout.
        // - `ProtoStr` has the same pointer metadata and element size as `[u8]`.
        unsafe { &*(bytes as *const [u8] as *const Self) }
    }

    /// Interprets a string slice as a `&ProtoStr`.
    pub fn from_str(string: &str) -> &Self {
        // SAFETY: `string.as_bytes()` is valid UTF-8.
        unsafe { Self::from_utf8_unchecked(string.as_bytes()) }
    }
}

impl AsRef<[u8]> for ProtoStr {
    fn as_ref(&self) -> &[u8] {
        self.as_bytes()
    }
}

impl<'msg> From<&'msg ProtoStr> for &'msg [u8] {
    fn from(val: &'msg ProtoStr) -> &'msg [u8] {
        val.as_bytes()
    }
}

impl<'msg> From<&'msg str> for &'msg ProtoStr {
    fn from(val: &'msg str) -> &'msg ProtoStr {
        ProtoStr::from_str(val)
    }
}

impl<'msg> TryFrom<&'msg ProtoStr> for &'msg str {
    type Error = Utf8Error;

    fn try_from(val: &'msg ProtoStr) -> Result<&'msg str, Utf8Error> {
        val.to_str()
    }
}

impl<'msg> TryFrom<&'msg [u8]> for &'msg ProtoStr {
    type Error = Utf8Error;

    fn try_from(val: &'msg [u8]) -> Result<&'msg ProtoStr, Utf8Error> {
        Ok(ProtoStr::from_str(std::str::from_utf8(val)?))
    }
}

impl fmt::Debug for ProtoStr {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        fmt::Debug::fmt(&Utf8Chunks::new(self.as_bytes()).debug(), f)
    }
}

impl fmt::Display for ProtoStr {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        use std::fmt::Write as _;
        for chunk in Utf8Chunks::new(self.as_bytes()) {
            fmt::Display::fmt(chunk.valid(), f)?;
            if !chunk.invalid().is_empty() {
                // One invalid chunk is emitted per detected invalid sequence.
                f.write_char(char::REPLACEMENT_CHARACTER)?;
            }
        }
        Ok(())
    }
}

impl Hash for ProtoStr {
    fn hash<H: Hasher>(&self, state: &mut H) {
        self.as_bytes().hash(state)
    }
}

impl Eq for ProtoStr {}
impl Ord for ProtoStr {
    fn cmp(&self, other: &ProtoStr) -> Ordering {
        self.as_bytes().cmp(other.as_bytes())
    }
}

impl Proxied for ProtoString {
    type View<'msg> = &'msg ProtoStr;
}

impl<'msg> ViewProxy<'msg> for &'msg ProtoStr {
    type Proxied = ProtoString;

    fn as_view(&self) -> &ProtoStr {
        self
    }

    fn into_view<'shorter>(self) -> &'shorter ProtoStr
    where
        'msg: 'shorter,
    {
        self
    }
}

/// Implements `PartialCmp` and `PartialEq` for the `lhs` against the `rhs`
/// using `AsRef<[u8]>`.
// TODO: consider improving to not require a `<()>` if no generics are
// needed
macro_rules! impl_bytes_partial_cmp {
    ($(<($($generics:tt)*)> $lhs:ty => $rhs:ty),+ $(,)?) => {
        $(
            impl<$($generics)*> PartialEq<$rhs> for $lhs {
                fn eq(&self, other: &$rhs) -> bool {
                    AsRef::<[u8]>::as_ref(self) == AsRef::<[u8]>::as_ref(other)
                }
            }
            impl<$($generics)*> PartialOrd<$rhs> for $lhs {
                fn partial_cmp(&self, other: &$rhs) -> Option<Ordering> {
                    AsRef::<[u8]>::as_ref(self).partial_cmp(AsRef::<[u8]>::as_ref(other))
                }
            }
        )*
    };
}

impl_bytes_partial_cmp!(
    // `ProtoStr` against protobuf types
    <()> ProtoStr => ProtoStr,

    // `ProtoStr` against foreign types
    <()> ProtoStr => str,
    <()> str => ProtoStr,
);

#[cfg(test)]
mod tests {
    use super::*;

    // TODO: Add unit tests

    // Shorter and safe utility function to construct `ProtoStr` from bytes for
    // testing.
    fn test_proto_str(bytes: &[u8]) -> &ProtoStr {
        // SAFETY: The runtime that this test executes under does not elide UTF-8 checks
        // inside of `ProtoStr`.
        unsafe { ProtoStr::from_utf8_unchecked(bytes) }
    }

    // UTF-8 test cases copied from:
    // https://github.com/rust-lang/rust/blob/e8ee0b7/library/core/tests/str_lossy.rs

    #[test]
    fn proto_str_debug() {
        assert_eq!(&format!("{:?}", test_proto_str(b"Hello There")), "\"Hello There\"");
        assert_eq!(
            &format!(
                "{:?}",
                test_proto_str(b"Hello\xC0\x80 There\xE6\x83 Goodbye\xf4\x8d\x93\xaa"),
            ),
            "\"Hello\\xC0\\x80 There\\xE6\\x83 Goodbye\\u{10d4ea}\"",
        );
    }

    #[test]
    fn proto_str_display() {
        assert_eq!(&test_proto_str(b"Hello There").to_string(), "Hello There");
        assert_eq!(
            &test_proto_str(b"Hello\xC0\x80 There\xE6\x83 Goodbye\xf4\x8d\x93\xaa").to_string(),
            "Hello<EFBFBD><EFBFBD> There<EFBFBD> Goodbye\u{10d4ea}",
        );
    }

    #[test]
    fn proto_str_to_rust_str() {
        assert_eq!(test_proto_str(b"hello").to_str(), Ok("hello"));
        assert_eq!(test_proto_str("ศไทย中华Việt Nam".as_bytes()).to_str(), Ok("ศไทย中华Việt Nam"));
        for expect_fail in [
            &b"Hello\xC2 There\xFF Goodbye"[..],
            b"Hello\xC0\x80 There\xE6\x83 Goodbye",
            b"\xF5foo\xF5\x80bar",
            b"\xF1foo\xF1\x80bar\xF1\x80\x80baz",
            b"\xF4foo\xF4\x80bar\xF4\xBFbaz",
            b"\xF0\x80\x80\x80foo\xF0\x90\x80\x80bar",
            b"\xED\xA0\x80foo\xED\xBF\xBFbar",
        ] {
            assert_eq!(test_proto_str(expect_fail).to_str(), Err(Utf8Error(())), "{expect_fail:?}");
        }
    }

    #[test]
    fn proto_str_to_cow() {
        assert_eq!(test_proto_str(b"hello").to_cow_lossy(), Cow::Borrowed("hello"));
        assert_eq!(
            test_proto_str("ศไทย中华Việt Nam".as_bytes()).to_cow_lossy(),
            Cow::Borrowed("ศไทย中华Việt Nam")
        );
        for (bytes, lossy_str) in [
            (&b"Hello\xC2 There\xFF Goodbye"[..], "Hello<EFBFBD> There<EFBFBD> Goodbye"),
            (b"Hello\xC0\x80 There\xE6\x83 Goodbye", "Hello<EFBFBD><EFBFBD> There<EFBFBD> Goodbye"),
            (b"\xF5foo\xF5\x80bar", "<EFBFBD>foo<EFBFBD><EFBFBD>bar"),
            (b"\xF1foo\xF1\x80bar\xF1\x80\x80baz", "<EFBFBD>foo<EFBFBD>bar<EFBFBD>baz"),
            (b"\xF4foo\xF4\x80bar\xF4\xBFbaz", "<EFBFBD>foo<EFBFBD>bar<EFBFBD><EFBFBD>baz"),
            (b"\xF0\x80\x80\x80foo\xF0\x90\x80\x80bar", "<EFBFBD><EFBFBD><EFBFBD><EFBFBD>foo\u{10000}bar"),
            (b"\xED\xA0\x80foo\xED\xBF\xBFbar", "<EFBFBD><EFBFBD><EFBFBD>foo<EFBFBD><EFBFBD><EFBFBD>bar"),
        ] {
            let cow = test_proto_str(bytes).to_cow_lossy();
            assert!(matches!(cow, Cow::Owned(_)));
            assert_eq!(&*cow, lossy_str, "{bytes:?}");
        }
    }

    #[test]
    fn proto_str_utf8_chunks() {
        macro_rules! assert_chunks {
            ($bytes:expr, $($chunks:expr),* $(,)?) => {
                let bytes = $bytes;
                let chunks: &[Result<&str, &[u8]>] = &[$($chunks),*];
                let s = test_proto_str(bytes);
                let mut got_chunks = s.utf8_chunks();
                let mut expected_chars = chunks.iter().copied();
                assert!(got_chunks.eq(expected_chars), "{bytes:?} -> {chunks:?}");
            };
        }
        assert_chunks!(b"hello", Ok("hello"));
        assert_chunks!("ศไทย中华Việt Nam".as_bytes(), Ok("ศไทย中华Việt Nam"));
        assert_chunks!(
            b"Hello\xC2 There\xFF Goodbye",
            Ok("Hello"),
            Err(b"\xC2"),
            Ok(" There"),
            Err(b"\xFF"),
            Ok(" Goodbye"),
        );
        assert_chunks!(
            b"Hello\xC0\x80 There\xE6\x83 Goodbye",
            Ok("Hello"),
            Err(b"\xC0"),
            Err(b"\x80"),
            Ok(" There"),
            Err(b"\xE6\x83"),
            Ok(" Goodbye"),
        );
        assert_chunks!(
            b"\xF5foo\xF5\x80bar",
            Err(b"\xF5"),
            Ok("foo"),
            Err(b"\xF5"),
            Err(b"\x80"),
            Ok("bar"),
        );
        assert_chunks!(
            b"\xF1foo\xF1\x80bar\xF1\x80\x80baz",
            Err(b"\xF1"),
            Ok("foo"),
            Err(b"\xF1\x80"),
            Ok("bar"),
            Err(b"\xF1\x80\x80"),
            Ok("baz"),
        );
        assert_chunks!(
            b"\xF4foo\xF4\x80bar\xF4\xBFbaz",
            Err(b"\xF4"),
            Ok("foo"),
            Err(b"\xF4\x80"),
            Ok("bar"),
            Err(b"\xF4"),
            Err(b"\xBF"),
            Ok("baz"),
        );
        assert_chunks!(
            b"\xF0\x80\x80\x80foo\xF0\x90\x80\x80bar",
            Err(b"\xF0"),
            Err(b"\x80"),
            Err(b"\x80"),
            Err(b"\x80"),
            Ok("foo\u{10000}bar"),
        );
        assert_chunks!(
            b"\xED\xA0\x80foo\xED\xBF\xBFbar",
            Err(b"\xED"),
            Err(b"\xA0"),
            Err(b"\x80"),
            Ok("foo"),
            Err(b"\xED"),
            Err(b"\xBF"),
            Err(b"\xBF"),
            Ok("bar"),
        );
    }

    #[test]
    fn proto_str_chars() {
        macro_rules! assert_chars {
            ($bytes:expr, $chars:expr) => {
                let bytes = $bytes;
                let chars = $chars;
                let s = test_proto_str(bytes);
                let mut got_chars = s.chars();
                let mut expected_chars = chars.into_iter();
                assert!(got_chars.eq(expected_chars), "{bytes:?} -> {chars:?}");
            };
        }
        assert_chars!(b"hello", ['h', 'e', 'l', 'l', 'o']);
        assert_chars!(
            "ศไทย中华Việt Nam".as_bytes(),
            ['ศ', 'ไ', 'ท', 'ย', '中', '华', 'V', 'i', 'ệ', 't', ' ', 'N', 'a', 'm']
        );
        assert_chars!(
            b"Hello\xC2 There\xFF Goodbye",
            [
                'H', 'e', 'l', 'l', 'o', '<EFBFBD>', ' ', 'T', 'h', 'e', 'r', 'e', '<EFBFBD>', ' ', 'G', 'o',
                'o', 'd', 'b', 'y', 'e'
            ]
        );
        assert_chars!(
            b"Hello\xC0\x80 There\xE6\x83 Goodbye",
            [
                'H', 'e', 'l', 'l', 'o', '<EFBFBD>', '<EFBFBD>', ' ', 'T', 'h', 'e', 'r', 'e', '<EFBFBD>', ' ', 'G',
                'o', 'o', 'd', 'b', 'y', 'e'
            ]
        );
        assert_chars!(b"\xF5foo\xF5\x80bar", ['<EFBFBD>', 'f', 'o', 'o', '<EFBFBD>', '<EFBFBD>', 'b', 'a', 'r']);
        assert_chars!(
            b"\xF1foo\xF1\x80bar\xF1\x80\x80baz",
            ['<EFBFBD>', 'f', 'o', 'o', '<EFBFBD>', 'b', 'a', 'r', '<EFBFBD>', 'b', 'a', 'z']
        );
        assert_chars!(
            b"\xF4foo\xF4\x80bar\xF4\xBFbaz",
            ['<EFBFBD>', 'f', 'o', 'o', '<EFBFBD>', 'b', 'a', 'r', '<EFBFBD>', '<EFBFBD>', 'b', 'a', 'z']
        );
        assert_chars!(
            b"\xF0\x80\x80\x80foo\xF0\x90\x80\x80bar",
            ['<EFBFBD>', '<EFBFBD>', '<EFBFBD>', '<EFBFBD>', 'f', 'o', 'o', '\u{10000}', 'b', 'a', 'r']
        );
        assert_chars!(
            b"\xED\xA0\x80foo\xED\xBF\xBFbar",
            ['<EFBFBD>', '<EFBFBD>', '<EFBFBD>', 'f', 'o', 'o', '<EFBFBD>', '<EFBFBD>', '<EFBFBD>', 'b', 'a', 'r']
        );
    }
}
-												Define the shape of BytesMut with stubbed methods

This is the primary mutator for `bytes` fields, which proxy `[u8]`.

PiperOrigin-RevId: 547242972

											
										
										
											1 year ago
+								// Protocol Buffers - Google's data interchange format
 								// Copyright 2023 Google LLC.  All rights reserved.
 								//
-												Shorten our license headers into an abbreviated form that references LICENSE instead of including it in full.

PiperOrigin-RevId: 563888892

											
										
										
											1 year ago
+								// Use of this source code is governed by a BSD-style
 								// license that can be found in the LICENSE file or at
 								// https://developers.google.com/open-source/licenses/bsd
-												Define the shape of BytesMut with stubbed methods

This is the primary mutator for `bytes` fields, which proxy `[u8]`.

PiperOrigin-RevId: 547242972

											
										
										
											1 year ago
 								//! Items specific to `bytes` and `string` fields.
 								#![allow(dead_code)]
 								#![allow(unused)]
-												Create a upb rust directory.

The intent of this directory would be for a layer of Rust bindings that directly map to upb semantics; Rust Protobuf runtime would be layer on top of that Rust, instead of directly on the upb C api.

PiperOrigin-RevId: 624282429

											
										
										
											8 months ago
+								use crate::__internal::Private;
-												Add ProtoString/Bytes owned types

PiperOrigin-RevId: 644999527

											
										
										
											6 months ago
+								use crate::__runtime::{InnerProtoString, PtrAndLen, RawMessage};
 								use crate::{IntoProxied, Mut, MutProxied, MutProxy, Optional, Proxied, View, ViewProxy};
-												Define the shape of BytesMut with stubbed methods

This is the primary mutator for `bytes` fields, which proxy `[u8]`.

PiperOrigin-RevId: 547242972

											
										
										
											1 year ago
+								use std::borrow::Cow;
 								use std::cmp::{Eq, Ord, Ordering, PartialEq, PartialOrd};
 								use std::convert::{AsMut, AsRef};
-												Add ProtoString/Bytes owned types

PiperOrigin-RevId: 644999527

											
										
										
											6 months ago
+								use std::ffi::{OsStr, OsString};
-												Define the shape of ProtoStr as a DST

PiperOrigin-RevId: 547871451

											
										
										
											1 year ago
+								use std::fmt;
-												Define the shape of BytesMut with stubbed methods

This is the primary mutator for `bytes` fields, which proxy `[u8]`.

PiperOrigin-RevId: 547242972

											
										
										
											1 year ago
+								use std::hash::{Hash, Hasher};
-												Define the shape of ProtoStr as a DST

PiperOrigin-RevId: 547871451

											
										
										
											1 year ago
+								use std::iter;
-												Define the shape of BytesMut with stubbed methods

This is the primary mutator for `bytes` fields, which proxy `[u8]`.

PiperOrigin-RevId: 547242972

											
										
										
											1 year ago
+								use std::ops::{Deref, DerefMut};
-												Add ProtoString/Bytes owned types

PiperOrigin-RevId: 644999527

											
										
										
											6 months ago
+								use std::ptr;
 								use std::rc::Rc;
 								use std::sync::Arc;
-												Add UTF-8 handling to ProtoStr

This mostly uses a copied `Utf8Chunks` utility from nightly Rust.

PiperOrigin-RevId: 551224417

											
										
										
											1 year ago
+								use utf8::Utf8Chunks;
-												Define the shape of BytesMut with stubbed methods

This is the primary mutator for `bytes` fields, which proxy `[u8]`.

PiperOrigin-RevId: 547242972

											
										
										
											1 year ago
-												Add ProtoString/Bytes owned types

PiperOrigin-RevId: 644999527

											
										
										
											6 months ago
+								pub struct ProtoBytes {
 								    pub(crate) inner: InnerProtoString,
 								}
 								impl AsRef<[u8]> for ProtoBytes {
 								    fn as_ref(&self) -> &[u8] {
 								        self.inner.as_bytes()
 								    }
 								}
 								impl From<&[u8]> for ProtoBytes {
 								    fn from(v: &[u8]) -> ProtoBytes {
 								        ProtoBytes { inner: InnerProtoString::from(v) }
 								    }
 								}
 								impl<const N: usize> From<&[u8; N]> for ProtoBytes {
 								    fn from(v: &[u8; N]) -> ProtoBytes {
 								        ProtoBytes { inner: InnerProtoString::from(v.as_ref()) }
 								    }
 								}
 								impl Proxied for ProtoBytes {
-												Define the shape of BytesMut with stubbed methods

This is the primary mutator for `bytes` fields, which proxy `[u8]`.

PiperOrigin-RevId: 547242972

											
										
										
											1 year ago
+								    type View<'msg> = &'msg [u8];
-												Split Proxied into Proxied and MutProxied traits.

Proxied is not marked as Sized yet, because ProtoStr is still dynamically sized. We will wait for clarity for the string types before marking Proxied Sized.

PiperOrigin-RevId: 627707544

											
										
										
											8 months ago
+								}
-												Add ProtoString/Bytes owned types

PiperOrigin-RevId: 644999527

											
										
										
											6 months ago
+								impl IntoProxied<ProtoBytes> for ProtoBytes {
 								    fn into_proxied(self, _private: Private) -> ProtoBytes {
 								        self
 								    }
 								}
 								impl IntoProxied<ProtoBytes> for &[u8] {
 								    fn into_proxied(self, _private: Private) -> ProtoBytes {
 								        ProtoBytes::from(self)
 								    }
 								}
 								impl<const N: usize> IntoProxied<ProtoBytes> for &[u8; N] {
 								    fn into_proxied(self, _private: Private) -> ProtoBytes {
 								        ProtoBytes::from(self.as_ref())
 								    }
 								}
 								impl IntoProxied<ProtoBytes> for Vec<u8> {
 								    fn into_proxied(self, _private: Private) -> ProtoBytes {
 								        ProtoBytes::from(AsRef::<[u8]>::as_ref(&self))
 								    }
 								}
 								impl IntoProxied<ProtoBytes> for &Vec<u8> {
 								    fn into_proxied(self, _private: Private) -> ProtoBytes {
 								        ProtoBytes::from(AsRef::<[u8]>::as_ref(self))
 								    }
 								}
 								impl IntoProxied<ProtoBytes> for Box<[u8]> {
 								    fn into_proxied(self, _private: Private) -> ProtoBytes {
 								        ProtoBytes::from(AsRef::<[u8]>::as_ref(&self))
 								    }
 								}
 								impl IntoProxied<ProtoBytes> for Cow<'_, [u8]> {
 								    fn into_proxied(self, _private: Private) -> ProtoBytes {
 								        ProtoBytes::from(AsRef::<[u8]>::as_ref(&self))
 								    }
 								}
 								impl IntoProxied<ProtoBytes> for Rc<[u8]> {
 								    fn into_proxied(self, _private: Private) -> ProtoBytes {
 								        ProtoBytes::from(AsRef::<[u8]>::as_ref(&self))
 								    }
 								}
 								impl IntoProxied<ProtoBytes> for Arc<[u8]> {
 								    fn into_proxied(self, _private: Private) -> ProtoBytes {
 								        ProtoBytes::from(AsRef::<[u8]>::as_ref(&self))
 								    }
 								}
-												Define the shape of BytesMut with stubbed methods

This is the primary mutator for `bytes` fields, which proxy `[u8]`.

PiperOrigin-RevId: 547242972

											
										
										
											1 year ago
+								impl<'msg> ViewProxy<'msg> for &'msg [u8] {
-												Add ProtoString/Bytes owned types

PiperOrigin-RevId: 644999527

											
										
										
											6 months ago
+								    type Proxied = ProtoBytes;
-												Define the shape of BytesMut with stubbed methods

This is the primary mutator for `bytes` fields, which proxy `[u8]`.

PiperOrigin-RevId: 547242972

											
										
										
											1 year ago
 								    fn as_view(&self) -> &[u8] {
 								        self
 								    }
 								    fn into_view<'shorter>(self) -> &'shorter [u8]
 								    where
 								        'msg: 'shorter,
 								    {
 								        self
 								    }
 								}
-												Define the shape of ProtoStr as a DST

PiperOrigin-RevId: 547871451

											
										
										
											1 year ago
+								/// The bytes were not valid UTF-8.
-												Add UTF-8 handling to ProtoStr

This mostly uses a copied `Utf8Chunks` utility from nightly Rust.

PiperOrigin-RevId: 551224417

											
										
										
											1 year ago
+								#[derive(Debug, PartialEq)]
-												Define the shape of ProtoStr as a DST

PiperOrigin-RevId: 547871451

											
										
										
											1 year ago
+								pub struct Utf8Error(pub(crate) ());
 								impl From<std::str::Utf8Error> for Utf8Error {
 								    fn from(_: std::str::Utf8Error) -> Utf8Error {
 								        Utf8Error(())
 								    }
 								}
-												Add ProtoString/Bytes owned types

PiperOrigin-RevId: 644999527

											
										
										
											6 months ago
+								/// An owned type representing protobuf `string` field's contents.
 								///
 								/// # UTF-8
 								///
 								/// Protobuf [docs] state that a `string` field contains UTF-8 encoded text.
 								/// However, not every runtime enforces this, and the Rust runtime is designed
 								/// to integrate with other runtimes with FFI, like C++.
 								///
 								/// `ProtoString` represents a string type that is expected to contain valid
 								/// UTF-8. However, `ProtoString` is not validated, so users must
 								/// call [`ProtoString::to_string`] to perform a (possibly runtime-elided) UTF-8
 								/// validation check. This validation should rarely fail in pure Rust programs,
 								/// but is necessary to prevent UB when interacting with C++, or other languages
 								/// with looser restrictions.
 								///
 								///
 								/// # `Display` and `ToString`
 								/// `ProtoString` is ordinarily UTF-8 and so implements `Display`. If there are
 								/// any invalid UTF-8 sequences, they are replaced with [`U+FFFD REPLACEMENT
 								/// CHARACTER`]. Because anything implementing `Display` also implements
 								/// `ToString`, `ProtoString::to_string()` is equivalent to
 								/// `String::from_utf8_lossy(proto_string.as_bytes()).into_owned()`.
 								///
 								/// [`U+FFFD REPLACEMENT CHARACTER`]: std::char::REPLACEMENT_CHARACTER
 								pub struct ProtoString {
 								    pub(crate) inner: InnerProtoString,
 								}
 								impl ProtoString {
 								    pub fn as_bytes(&self) -> &[u8] {
 								        self.inner.as_bytes()
 								    }
 								}
 								impl From<&str> for ProtoString {
 								    fn from(v: &str) -> Self {
 								        Self::from(v.as_bytes())
 								    }
 								}
 								impl From<&[u8]> for ProtoString {
 								    fn from(v: &[u8]) -> Self {
 								        Self { inner: InnerProtoString::from(v) }
 								    }
 								}
 								impl IntoProxied<ProtoString> for ProtoString {
 								    fn into_proxied(self, _private: Private) -> ProtoString {
 								        self
 								    }
 								}
 								impl IntoProxied<ProtoString> for &str {
 								    fn into_proxied(self, _private: Private) -> ProtoString {
 								        ProtoString::from(self)
 								    }
 								}
 								impl IntoProxied<ProtoString> for &ProtoStr {
 								    fn into_proxied(self, _private: Private) -> ProtoString {
 								        ProtoString::from(self.as_bytes())
 								    }
 								}
 								impl IntoProxied<ProtoString> for String {
 								    fn into_proxied(self, _private: Private) -> ProtoString {
 								        ProtoString::from(self.as_str())
 								    }
 								}
 								impl IntoProxied<ProtoString> for &String {
 								    fn into_proxied(self, _private: Private) -> ProtoString {
 								        ProtoString::from(self.as_bytes())
 								    }
 								}
 								impl IntoProxied<ProtoString> for OsString {
 								    fn into_proxied(self, private: Private) -> ProtoString {
 								        self.as_os_str().into_proxied(private)
 								    }
 								}
 								impl IntoProxied<ProtoString> for &OsStr {
 								    fn into_proxied(self, _private: Private) -> ProtoString {
 								        ProtoString::from(self.as_encoded_bytes())
 								    }
 								}
 								impl IntoProxied<ProtoString> for Box<str> {
 								    fn into_proxied(self, _private: Private) -> ProtoString {
 								        ProtoString::from(AsRef::<str>::as_ref(&self))
 								    }
 								}
 								impl IntoProxied<ProtoString> for Cow<'_, str> {
 								    fn into_proxied(self, _private: Private) -> ProtoString {
 								        ProtoString::from(AsRef::<str>::as_ref(&self))
 								    }
 								}
 								impl IntoProxied<ProtoString> for Rc<str> {
 								    fn into_proxied(self, _private: Private) -> ProtoString {
 								        ProtoString::from(AsRef::<str>::as_ref(&self))
 								    }
 								}
 								impl IntoProxied<ProtoString> for Arc<str> {
 								    fn into_proxied(self, _private: Private) -> ProtoString {
 								        ProtoString::from(AsRef::<str>::as_ref(&self))
 								    }
 								}
-												Define the shape of ProtoStr as a DST

PiperOrigin-RevId: 547871451

											
										
										
											1 year ago
+								/// A shared immutable view of a protobuf `string` field's contents.
 								///
 								/// Like a `str`, it can be cheaply accessed as bytes and
 								/// is dynamically sized, requiring it be accessed through a pointer.
 								///
 								/// # UTF-8 and `&str` access
 								///
 								/// Protobuf [docs] state that a `string` field contains UTF-8 encoded text.
 								/// However, not every runtime enforces this, and the Rust runtime is designed
 								/// to integrate with other runtimes with FFI, like C++.
 								///
 								/// Because of this, in order to access the contents as a `&str`, users must
 								/// call [`ProtoStr::to_str`] to perform a (possibly runtime-elided) UTF-8
 								/// validation check. However, the Rust API only allows `set()`ting a `string`
 								/// field with data should be valid UTF-8 like a `&str` or a
 								/// `&ProtoStr`. This means that this check should rarely fail, but is necessary
 								/// to prevent UB when interacting with C++, which has looser restrictions.
 								///
 								/// Most of the time, users should not perform direct `&str` access to the
 								/// contents - this type implements `Display` and comparison with `str`,
 								/// so it's best to avoid a UTF-8 check by working directly with `&ProtoStr`
 								/// or converting to `&[u8]`.
 								///
 								/// # `Display` and `ToString`
 								/// `ProtoStr` is ordinarily UTF-8 and so implements `Display`. If there are
 								/// any invalid UTF-8 sequences, they are replaced with [`U+FFFD REPLACEMENT
 								/// CHARACTER`]. Because anything implementing `Display` also implements
 								/// `ToString`, `proto_str.to_string()` is equivalent to
 								/// `String::from_utf8_lossy(proto_str.as_bytes()).into_owned()`.
 								///
 								/// [docs]: https://protobuf.dev/programming-guides/proto2/#scalar
 								/// [dst]: https://doc.rust-lang.org/reference/dynamically-sized-types.html
 								/// [`U+FFFD REPLACEMENT CHARACTER`]: std::char::REPLACEMENT_CHARACTER
 								#[repr(transparent)]
 								pub struct ProtoStr([u8]);
 								impl ProtoStr {
 								    /// Converts `self` to a byte slice.
 								    ///
 								    /// Note: this type does not implement `Deref`; you must call `as_bytes()`
 								    /// or `AsRef<[u8]>` to get access to bytes.
 								    pub fn as_bytes(&self) -> &[u8] {
 								        &self.0
 								    }
 								    /// Yields a `&str` slice if `self` contains valid UTF-8.
 								    ///
 								    /// This may perform a runtime check, dependent on runtime.
 								    ///
 								    /// `String::from_utf8_lossy(proto_str.as_bytes())` can be used to
 								    /// infallibly construct a string, replacing invalid UTF-8 with
 								    /// [`U+FFFD REPLACEMENT CHARACTER`].
 								    ///
 								    /// [`U+FFFD REPLACEMENT CHARACTER`]: std::char::REPLACEMENT_CHARACTER
 								    // This is not `try_to_str` since `to_str` is shorter, with `CStr` as precedent.
 								    pub fn to_str(&self) -> Result<&str, Utf8Error> {
 								        Ok(std::str::from_utf8(&self.0)?)
 								    }
 								    /// Converts `self` to a string, including invalid characters.
 								    ///
 								    /// Invalid UTF-8 sequences are replaced with
 								    /// [`U+FFFD REPLACEMENT CHARACTER`].
 								    ///
 								    /// Users should be prefer this to `.to_string()` provided by `Display`.
 								    /// `.to_cow_lossy()` is the same operation, but it may avoid an
 								    /// allocation if the string is already UTF-8.
 								    ///
 								    /// [`U+FFFD REPLACEMENT CHARACTER`]: std::char::REPLACEMENT_CHARACTER
 								    //
 								    // This method is named `to_string_lossy` in `CStr`, but since `to_string`
 								    // also exists on this type, this name was chosen to avoid confusion.
 								    pub fn to_cow_lossy(&self) -> Cow<'_, str> {
 								        String::from_utf8_lossy(&self.0)
 								    }
 								    /// Returns `true` if `self` has a length of zero bytes.
 								    pub fn is_empty(&self) -> bool {
 								        self.0.is_empty()
 								    }
 								    /// Returns the length of `self`.
 								    ///
 								    /// Like `&str`, this is a length in bytes, not `char`s or graphemes.
 								    pub fn len(&self) -> usize {
 								        self.0.len()
 								    }
 								    /// Iterates over the `char`s in this protobuf `string`.
 								    ///
 								    /// Invalid UTF-8 sequences are replaced with
 								    /// [`U+FFFD REPLACEMENT CHARACTER`].
 								    ///
 								    /// [`U+FFFD REPLACEMENT CHARACTER`]: std::char::REPLACEMENT_CHARACTER
-												Make utf8chars.chars iter return `impl Iterator + fmt::Debug`.

PiperOrigin-RevId: 596599987

											
										
										
											11 months ago
+								    pub fn chars(&self) -> impl Iterator<Item = char> + '_ + fmt::Debug {
-												Add UTF-8 handling to ProtoStr

This mostly uses a copied `Utf8Chunks` utility from nightly Rust.

PiperOrigin-RevId: 551224417

											
										
										
											1 year ago
+								        Utf8Chunks::new(self.as_bytes()).flat_map(|chunk| {
 								            let mut yield_replacement_char = !chunk.invalid().is_empty();
 								            chunk.valid().chars().chain(iter::from_fn(move || {
 								                // Yield a single replacement character for every
 								                // non-empty invalid sequence.
 								                yield_replacement_char.then(|| {
 								                    yield_replacement_char = false;
 								                    char::REPLACEMENT_CHARACTER
 								                })
 								            }))
 								        })
-												Define the shape of ProtoStr as a DST

PiperOrigin-RevId: 547871451

											
										
										
											1 year ago
+								    }
 								    /// Returns an iterator over chunks of UTF-8 data in the string.
 								    ///
 								    /// An `Ok(&str)` is yielded for every valid UTF-8 chunk, and an
-												Add UTF-8 handling to ProtoStr

This mostly uses a copied `Utf8Chunks` utility from nightly Rust.

PiperOrigin-RevId: 551224417

											
										
										
											1 year ago
+								    /// `Err(&[u8])` for each non-UTF-8 chunk. An `Err` will be emitted
 								    /// multiple times in a row for contiguous invalid chunks. Each invalid
 								    /// chunk in an `Err` has a maximum length of 3 bytes.
 								    pub fn utf8_chunks(&self) -> impl Iterator<Item = Result<&str, &[u8]>> + '_ {
 								        Utf8Chunks::new(self.as_bytes()).flat_map(|chunk| {
 								            let valid = chunk.valid();
 								            let invalid = chunk.invalid();
 								            (!valid.is_empty())
 								                .then_some(Ok(valid))
 								                .into_iter()
 								                .chain((!invalid.is_empty()).then_some(Err(invalid)))
 								        })
-												Define the shape of ProtoStr as a DST

PiperOrigin-RevId: 547871451

											
										
										
											1 year ago
+								    }
 								    /// Converts known-UTF-8 bytes to a `ProtoStr` without a check.
 								    ///
 								    /// # Safety
 								    /// `bytes` must be valid UTF-8 if the current runtime requires it.
 								    pub unsafe fn from_utf8_unchecked(bytes: &[u8]) -> &Self {
 								        // SAFETY:
 								        // - `ProtoStr` is `#[repr(transparent)]` over `[u8]`, so it has the same
 								        //   layout.
 								        // - `ProtoStr` has the same pointer metadata and element size as `[u8]`.
 								        unsafe { &*(bytes as *const [u8] as *const Self) }
 								    }
 								    /// Interprets a string slice as a `&ProtoStr`.
 								    pub fn from_str(string: &str) -> &Self {
 								        // SAFETY: `string.as_bytes()` is valid UTF-8.
 								        unsafe { Self::from_utf8_unchecked(string.as_bytes()) }
 								    }
 								}
 								impl AsRef<[u8]> for ProtoStr {
 								    fn as_ref(&self) -> &[u8] {
 								        self.as_bytes()
 								    }
 								}
 								impl<'msg> From<&'msg ProtoStr> for &'msg [u8] {
 								    fn from(val: &'msg ProtoStr) -> &'msg [u8] {
 								        val.as_bytes()
 								    }
 								}
-												Implement v0.6 string fields

This is predominantly a wrapper around `BytesMut`, for simplicity.
Bytes and string fields are mostly the same, except for possible UTF-8 handling.

This also implements some minor parts of `ProtoStr` that were missed.

PiperOrigin-RevId: 561422951

											
										
										
											1 year ago
+								impl<'msg> From<&'msg str> for &'msg ProtoStr {
 								    fn from(val: &'msg str) -> &'msg ProtoStr {
 								        ProtoStr::from_str(val)
 								    }
 								}
-												Define the shape of ProtoStr as a DST

PiperOrigin-RevId: 547871451

											
										
										
											1 year ago
+								impl<'msg> TryFrom<&'msg ProtoStr> for &'msg str {
 								    type Error = Utf8Error;
 								    fn try_from(val: &'msg ProtoStr) -> Result<&'msg str, Utf8Error> {
 								        val.to_str()
 								    }
 								}
-												Implement v0.6 string fields

This is predominantly a wrapper around `BytesMut`, for simplicity.
Bytes and string fields are mostly the same, except for possible UTF-8 handling.

This also implements some minor parts of `ProtoStr` that were missed.

PiperOrigin-RevId: 561422951

											
										
										
											1 year ago
+								impl<'msg> TryFrom<&'msg [u8]> for &'msg ProtoStr {
 								    type Error = Utf8Error;
 								    fn try_from(val: &'msg [u8]) -> Result<&'msg ProtoStr, Utf8Error> {
 								        Ok(ProtoStr::from_str(std::str::from_utf8(val)?))
 								    }
 								}
-												Define the shape of ProtoStr as a DST

PiperOrigin-RevId: 547871451

											
										
										
											1 year ago
+								impl fmt::Debug for ProtoStr {
-												Add UTF-8 handling to ProtoStr

This mostly uses a copied `Utf8Chunks` utility from nightly Rust.

PiperOrigin-RevId: 551224417

											
										
										
											1 year ago
+								    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
 								        fmt::Debug::fmt(&Utf8Chunks::new(self.as_bytes()).debug(), f)
-												Define the shape of ProtoStr as a DST

PiperOrigin-RevId: 547871451

											
										
										
											1 year ago
+								    }
 								}
 								impl fmt::Display for ProtoStr {
-												Add UTF-8 handling to ProtoStr

This mostly uses a copied `Utf8Chunks` utility from nightly Rust.

PiperOrigin-RevId: 551224417

											
										
										
											1 year ago
+								    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
 								        use std::fmt::Write as _;
 								        for chunk in Utf8Chunks::new(self.as_bytes()) {
 								            fmt::Display::fmt(chunk.valid(), f)?;
 								            if !chunk.invalid().is_empty() {
 								                // One invalid chunk is emitted per detected invalid sequence.
 								                f.write_char(char::REPLACEMENT_CHARACTER)?;
 								            }
 								        }
 								        Ok(())
-												Define the shape of ProtoStr as a DST

PiperOrigin-RevId: 547871451

											
										
										
											1 year ago
+								    }
 								}
-												Implement v0.6 string fields

This is predominantly a wrapper around `BytesMut`, for simplicity.
Bytes and string fields are mostly the same, except for possible UTF-8 handling.

This also implements some minor parts of `ProtoStr` that were missed.

PiperOrigin-RevId: 561422951

											
										
										
											1 year ago
+								impl Hash for ProtoStr {
 								    fn hash<H: Hasher>(&self, state: &mut H) {
 								        self.as_bytes().hash(state)
 								    }
 								}
 								impl Eq for ProtoStr {}
 								impl Ord for ProtoStr {
 								    fn cmp(&self, other: &ProtoStr) -> Ordering {
 								        self.as_bytes().cmp(other.as_bytes())
 								    }
 								}
-												Add ProtoString/Bytes owned types

PiperOrigin-RevId: 644999527

											
										
										
											6 months ago
+								impl Proxied for ProtoString {
-												Implement v0.6 string fields

This is predominantly a wrapper around `BytesMut`, for simplicity.
Bytes and string fields are mostly the same, except for possible UTF-8 handling.

This also implements some minor parts of `ProtoStr` that were missed.

PiperOrigin-RevId: 561422951

											
										
										
											1 year ago
+								    type View<'msg> = &'msg ProtoStr;
-												Split Proxied into Proxied and MutProxied traits.

Proxied is not marked as Sized yet, because ProtoStr is still dynamically sized. We will wait for clarity for the string types before marking Proxied Sized.

PiperOrigin-RevId: 627707544

											
										
										
											8 months ago
+								}
-												Implement v0.6 string fields

This is predominantly a wrapper around `BytesMut`, for simplicity.
Bytes and string fields are mostly the same, except for possible UTF-8 handling.

This also implements some minor parts of `ProtoStr` that were missed.

PiperOrigin-RevId: 561422951

											
										
										
											1 year ago
+								impl<'msg> ViewProxy<'msg> for &'msg ProtoStr {
-												Add ProtoString/Bytes owned types

PiperOrigin-RevId: 644999527

											
										
										
											6 months ago
+								    type Proxied = ProtoString;
-												Implement v0.6 string fields

This is predominantly a wrapper around `BytesMut`, for simplicity.
Bytes and string fields are mostly the same, except for possible UTF-8 handling.

This also implements some minor parts of `ProtoStr` that were missed.

PiperOrigin-RevId: 561422951

											
										
										
											1 year ago
 								    fn as_view(&self) -> &ProtoStr {
 								        self
 								    }
 								    fn into_view<'shorter>(self) -> &'shorter ProtoStr
 								    where
 								        'msg: 'shorter,
 								    {
 								        self
 								    }
 								}
-												Define the shape of BytesMut with stubbed methods

This is the primary mutator for `bytes` fields, which proxy `[u8]`.

PiperOrigin-RevId: 547242972

											
										
										
											1 year ago
+								/// Implements `PartialCmp` and `PartialEq` for the `lhs` against the `rhs`
 								/// using `AsRef<[u8]>`.
-												Internal change

PiperOrigin-RevId: 566426899

											
										
										
											1 year ago
+								// TODO: consider improving to not require a `<()>` if no generics are
-												Define the shape of ProtoStr as a DST

PiperOrigin-RevId: 547871451

											
										
										
											1 year ago
+								// needed
-												Define the shape of BytesMut with stubbed methods

This is the primary mutator for `bytes` fields, which proxy `[u8]`.

PiperOrigin-RevId: 547242972

											
										
										
											1 year ago
+								macro_rules! impl_bytes_partial_cmp {
 								    ($(<($($generics:tt)*)> $lhs:ty => $rhs:ty),+ $(,)?) => {
 								        $(
 								            impl<$($generics)*> PartialEq<$rhs> for $lhs {
 								                fn eq(&self, other: &$rhs) -> bool {
 								                    AsRef::<[u8]>::as_ref(self) == AsRef::<[u8]>::as_ref(other)
 								                }
 								            }
 								            impl<$($generics)*> PartialOrd<$rhs> for $lhs {
 								                fn partial_cmp(&self, other: &$rhs) -> Option<Ordering> {
 								                    AsRef::<[u8]>::as_ref(self).partial_cmp(AsRef::<[u8]>::as_ref(other))
 								                }
 								            }
 								        )*
 								    };
 								}
 								impl_bytes_partial_cmp!(
-												Define the shape of ProtoStr as a DST

PiperOrigin-RevId: 547871451

											
										
										
											1 year ago
+								    // `ProtoStr` against protobuf types
 								    <()> ProtoStr => ProtoStr,
 								    // `ProtoStr` against foreign types
 								    <()> ProtoStr => str,
 								    <()> str => ProtoStr,
-												Define the shape of BytesMut with stubbed methods

This is the primary mutator for `bytes` fields, which proxy `[u8]`.

PiperOrigin-RevId: 547242972

											
										
										
											1 year ago
+								);
-												Define the shape of ProtoStr as a DST

PiperOrigin-RevId: 547871451

											
										
										
											1 year ago
+								#[cfg(test)]
 								mod tests {
-												Add UTF-8 handling to ProtoStr

This mostly uses a copied `Utf8Chunks` utility from nightly Rust.

PiperOrigin-RevId: 551224417

											
										
										
											1 year ago
+								    use super::*;
-												Internal change

PiperOrigin-RevId: 566426899

											
										
										
											1 year ago
+								    // TODO: Add unit tests
-												Add UTF-8 handling to ProtoStr

This mostly uses a copied `Utf8Chunks` utility from nightly Rust.

PiperOrigin-RevId: 551224417

											
										
										
											1 year ago
 								    // Shorter and safe utility function to construct `ProtoStr` from bytes for
 								    // testing.
 								    fn test_proto_str(bytes: &[u8]) -> &ProtoStr {
 								        // SAFETY: The runtime that this test executes under does not elide UTF-8 checks
 								        // inside of `ProtoStr`.
 								        unsafe { ProtoStr::from_utf8_unchecked(bytes) }
 								    }
 								    // UTF-8 test cases copied from:
 								    // https://github.com/rust-lang/rust/blob/e8ee0b7/library/core/tests/str_lossy.rs
 								    #[test]
 								    fn proto_str_debug() {
 								        assert_eq!(&format!("{:?}", test_proto_str(b"Hello There")), "\"Hello There\"");
 								        assert_eq!(
 								            &format!(
 								                "{:?}",
 								                test_proto_str(b"Hello\xC0\x80 There\xE6\x83 Goodbye\xf4\x8d\x93\xaa"),
 								            ),
 								            "\"Hello\\xC0\\x80 There\\xE6\\x83 Goodbye\\u{10d4ea}\"",
 								        );
 								    }
 								    #[test]
 								    fn proto_str_display() {
 								        assert_eq!(&test_proto_str(b"Hello There").to_string(), "Hello There");
 								        assert_eq!(
 								            &test_proto_str(b"Hello\xC0\x80 There\xE6\x83 Goodbye\xf4\x8d\x93\xaa").to_string(),
 								            "Hello<EFBFBD><EFBFBD> There<EFBFBD> Goodbye\u{10d4ea}",
 								        );
 								    }
 								    #[test]
 								    fn proto_str_to_rust_str() {
 								        assert_eq!(test_proto_str(b"hello").to_str(), Ok("hello"));
 								        assert_eq!(test_proto_str("ศไทย中华Việt Nam".as_bytes()).to_str(), Ok("ศไทย中华Việt Nam"));
 								        for expect_fail in [
 								            &b"Hello\xC2 There\xFF Goodbye"[..],
 								            b"Hello\xC0\x80 There\xE6\x83 Goodbye",
 								            b"\xF5foo\xF5\x80bar",
 								            b"\xF1foo\xF1\x80bar\xF1\x80\x80baz",
 								            b"\xF4foo\xF4\x80bar\xF4\xBFbaz",
 								            b"\xF0\x80\x80\x80foo\xF0\x90\x80\x80bar",
 								            b"\xED\xA0\x80foo\xED\xBF\xBFbar",
 								        ] {
 								            assert_eq!(test_proto_str(expect_fail).to_str(), Err(Utf8Error(())), "{expect_fail:?}");
 								        }
 								    }
 								    #[test]
 								    fn proto_str_to_cow() {
 								        assert_eq!(test_proto_str(b"hello").to_cow_lossy(), Cow::Borrowed("hello"));
 								        assert_eq!(
 								            test_proto_str("ศไทย中华Việt Nam".as_bytes()).to_cow_lossy(),
 								            Cow::Borrowed("ศไทย中华Việt Nam")
 								        );
 								        for (bytes, lossy_str) in [
 								            (&b"Hello\xC2 There\xFF Goodbye"[..], "Hello<EFBFBD> There<EFBFBD> Goodbye"),
 								            (b"Hello\xC0\x80 There\xE6\x83 Goodbye", "Hello<EFBFBD><EFBFBD> There<EFBFBD> Goodbye"),
 								            (b"\xF5foo\xF5\x80bar", "<EFBFBD>foo<EFBFBD><EFBFBD>bar"),
 								            (b"\xF1foo\xF1\x80bar\xF1\x80\x80baz", "<EFBFBD>foo<EFBFBD>bar<EFBFBD>baz"),
 								            (b"\xF4foo\xF4\x80bar\xF4\xBFbaz", "<EFBFBD>foo<EFBFBD>bar<EFBFBD><EFBFBD>baz"),
 								            (b"\xF0\x80\x80\x80foo\xF0\x90\x80\x80bar", "<EFBFBD><EFBFBD><EFBFBD><EFBFBD>foo\u{10000}bar"),
 								            (b"\xED\xA0\x80foo\xED\xBF\xBFbar", "<EFBFBD><EFBFBD><EFBFBD>foo<EFBFBD><EFBFBD><EFBFBD>bar"),
 								        ] {
 								            let cow = test_proto_str(bytes).to_cow_lossy();
 								            assert!(matches!(cow, Cow::Owned(_)));
 								            assert_eq!(&*cow, lossy_str, "{bytes:?}");
 								        }
 								    }
 								    #[test]
 								    fn proto_str_utf8_chunks() {
 								        macro_rules! assert_chunks {
 								            ($bytes:expr, $($chunks:expr),* $(,)?) => {
 								                let bytes = $bytes;
 								                let chunks: &[Result<&str, &[u8]>] = &[$($chunks),*];
 								                let s = test_proto_str(bytes);
 								                let mut got_chunks = s.utf8_chunks();
 								                let mut expected_chars = chunks.iter().copied();
 								                assert!(got_chunks.eq(expected_chars), "{bytes:?} -> {chunks:?}");
 								            };
 								        }
 								        assert_chunks!(b"hello", Ok("hello"));
 								        assert_chunks!("ศไทย中华Việt Nam".as_bytes(), Ok("ศไทย中华Việt Nam"));
 								        assert_chunks!(
 								            b"Hello\xC2 There\xFF Goodbye",
 								            Ok("Hello"),
 								            Err(b"\xC2"),
 								            Ok(" There"),
 								            Err(b"\xFF"),
 								            Ok(" Goodbye"),
 								        );
 								        assert_chunks!(
 								            b"Hello\xC0\x80 There\xE6\x83 Goodbye",
 								            Ok("Hello"),
 								            Err(b"\xC0"),
 								            Err(b"\x80"),
 								            Ok(" There"),
 								            Err(b"\xE6\x83"),
 								            Ok(" Goodbye"),
 								        );
 								        assert_chunks!(
 								            b"\xF5foo\xF5\x80bar",
 								            Err(b"\xF5"),
 								            Ok("foo"),
 								            Err(b"\xF5"),
 								            Err(b"\x80"),
 								            Ok("bar"),
 								        );
 								        assert_chunks!(
 								            b"\xF1foo\xF1\x80bar\xF1\x80\x80baz",
 								            Err(b"\xF1"),
 								            Ok("foo"),
 								            Err(b"\xF1\x80"),
 								            Ok("bar"),
 								            Err(b"\xF1\x80\x80"),
 								            Ok("baz"),
 								        );
 								        assert_chunks!(
 								            b"\xF4foo\xF4\x80bar\xF4\xBFbaz",
 								            Err(b"\xF4"),
 								            Ok("foo"),
 								            Err(b"\xF4\x80"),
 								            Ok("bar"),
 								            Err(b"\xF4"),
 								            Err(b"\xBF"),
 								            Ok("baz"),
 								        );
 								        assert_chunks!(
 								            b"\xF0\x80\x80\x80foo\xF0\x90\x80\x80bar",
 								            Err(b"\xF0"),
 								            Err(b"\x80"),
 								            Err(b"\x80"),
 								            Err(b"\x80"),
 								            Ok("foo\u{10000}bar"),
 								        );
 								        assert_chunks!(
 								            b"\xED\xA0\x80foo\xED\xBF\xBFbar",
 								            Err(b"\xED"),
 								            Err(b"\xA0"),
 								            Err(b"\x80"),
 								            Ok("foo"),
 								            Err(b"\xED"),
 								            Err(b"\xBF"),
 								            Err(b"\xBF"),
 								            Ok("bar"),
 								        );
 								    }
 								    #[test]
 								    fn proto_str_chars() {
 								        macro_rules! assert_chars {
 								            ($bytes:expr, $chars:expr) => {
 								                let bytes = $bytes;
 								                let chars = $chars;
 								                let s = test_proto_str(bytes);
 								                let mut got_chars = s.chars();
 								                let mut expected_chars = chars.into_iter();
 								                assert!(got_chars.eq(expected_chars), "{bytes:?} -> {chars:?}");
 								            };
 								        }
 								        assert_chars!(b"hello", ['h', 'e', 'l', 'l', 'o']);
 								        assert_chars!(
 								            "ศไทย中华Việt Nam".as_bytes(),
 								            ['ศ', 'ไ', 'ท', 'ย', '中', '华', 'V', 'i', 'ệ', 't', ' ', 'N', 'a', 'm']
 								        );
 								        assert_chars!(
 								            b"Hello\xC2 There\xFF Goodbye",
 								            [
 								                'H', 'e', 'l', 'l', 'o', '<EFBFBD>', ' ', 'T', 'h', 'e', 'r', 'e', '<EFBFBD>', ' ', 'G', 'o',
 								                'o', 'd', 'b', 'y', 'e'
 								            ]
 								        );
 								        assert_chars!(
 								            b"Hello\xC0\x80 There\xE6\x83 Goodbye",
 								            [
 								                'H', 'e', 'l', 'l', 'o', '<EFBFBD>', '<EFBFBD>', ' ', 'T', 'h', 'e', 'r', 'e', '<EFBFBD>', ' ', 'G',
 								                'o', 'o', 'd', 'b', 'y', 'e'
 								            ]
 								        );
 								        assert_chars!(b"\xF5foo\xF5\x80bar", ['<EFBFBD>', 'f', 'o', 'o', '<EFBFBD>', '<EFBFBD>', 'b', 'a', 'r']);
 								        assert_chars!(
 								            b"\xF1foo\xF1\x80bar\xF1\x80\x80baz",
 								            ['<EFBFBD>', 'f', 'o', 'o', '<EFBFBD>', 'b', 'a', 'r', '<EFBFBD>', 'b', 'a', 'z']
 								        );
 								        assert_chars!(
 								            b"\xF4foo\xF4\x80bar\xF4\xBFbaz",
 								            ['<EFBFBD>', 'f', 'o', 'o', '<EFBFBD>', 'b', 'a', 'r', '<EFBFBD>', '<EFBFBD>', 'b', 'a', 'z']
 								        );
 								        assert_chars!(
 								            b"\xF0\x80\x80\x80foo\xF0\x90\x80\x80bar",
 								            ['<EFBFBD>', '<EFBFBD>', '<EFBFBD>', '<EFBFBD>', 'f', 'o', 'o', '\u{10000}', 'b', 'a', 'r']
 								        );
 								        assert_chars!(
 								            b"\xED\xA0\x80foo\xED\xBF\xBFbar",
 								            ['<EFBFBD>', '<EFBFBD>', '<EFBFBD>', 'f', 'o', 'o', '<EFBFBD>', '<EFBFBD>', '<EFBFBD>', 'b', 'a', 'r']
 								        );
 								    }
-												Define the shape of BytesMut with stubbed methods

This is the primary mutator for `bytes` fields, which proxy `[u8]`.

PiperOrigin-RevId: 547242972

											
										
										
											1 year ago
+								}