|
|
|
// Protocol Buffers - Google's data interchange format
|
|
|
|
// Copyright 2024 Google LLC. All rights reserved.
|
|
|
|
//
|
|
|
|
// Use of this source code is governed by a BSD-style
|
|
|
|
// license that can be found in the LICENSE file or at
|
|
|
|
// https://developers.google.com/open-source/licenses/bsd
|
|
|
|
|
|
|
|
//! Tests demonstrating the Protobuf Rust behavior around UTF-8 under different
|
|
|
|
//! scenarios (proto2, proto3, editions).
|
|
|
|
|
|
|
|
// TODO: The behavior is currently subptimal (for example because
|
|
|
|
// b/333545903 or b/335140403). Design and implement desirable changes to this
|
|
|
|
// behavior. Do not assume that the Protobuf team is intentional about these
|
|
|
|
// behaviors while b/304774814 is open.
|
|
|
|
|
|
|
|
use feature_verify_rust_proto::Verify;
|
|
|
|
use googletest::prelude::*;
|
|
|
|
use no_features_proto2_rust_proto::NoFeaturesProto2;
|
|
|
|
use no_features_proto3_rust_proto::NoFeaturesProto3;
|
|
|
|
use protobuf::{ParseError, ProtoStr};
|
|
|
|
|
|
|
|
// We use 0b1000_0000, since 0b1XXX_XXXX in UTF-8 denotes a byte 2-4, but never
|
|
|
|
// the first byte.
|
|
|
|
const NON_UTF8_BYTES: &[u8] = b"\x80";
|
|
|
|
|
|
|
|
// Returns ProtoStr with non-UTF-8 content.
|
|
|
|
fn make_non_utf8_proto_str() -> &'static ProtoStr {
|
|
|
|
unsafe {
|
|
|
|
// SAFETY: This is safe under current implementation of C++ and UPB kernels.
|
|
|
|
// In the hypothethical pure Rust runtime this would be library-level UB - but
|
|
|
|
// this test is specifically present to demonstrate UTF-8 behavior under
|
|
|
|
// C++ and UPB kernels.
|
|
|
|
ProtoStr::from_utf8_unchecked(NON_UTF8_BYTES)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_proto2() {
|
|
|
|
let non_utf8_str = make_non_utf8_proto_str();
|
|
|
|
|
|
|
|
let mut msg = NoFeaturesProto2::new();
|
|
|
|
|
|
|
|
// No error on setter
|
|
|
|
msg.set_my_field(non_utf8_str);
|
|
|
|
assert_that!(msg.my_field().as_bytes(), eq(NON_UTF8_BYTES));
|
|
|
|
|
|
|
|
// No error on serialization
|
|
|
|
// TODO: Add test assertion once serialize becomes fallible.
|
|
|
|
let serialized_nonutf8 = msg.serialize();
|
|
|
|
|
|
|
|
// No error on parsing.
|
|
|
|
let parsed_result = NoFeaturesProto2::parse(&serialized_nonutf8);
|
|
|
|
assert_that!(parsed_result, ok(anything()));
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_proto3() {
|
|
|
|
let non_utf8_str = make_non_utf8_proto_str();
|
|
|
|
|
|
|
|
let mut msg = NoFeaturesProto3::new();
|
|
|
|
|
|
|
|
// No error on setter
|
|
|
|
msg.set_my_field(non_utf8_str);
|
|
|
|
assert_that!(msg.my_field().as_bytes(), eq(NON_UTF8_BYTES));
|
|
|
|
|
|
|
|
// No error on serialization
|
|
|
|
// TODO: Add test assertion once serialize becomes fallible.
|
|
|
|
let serialized_nonutf8 = msg.serialize();
|
|
|
|
|
|
|
|
// Error on parsing.
|
|
|
|
let parsed_result = NoFeaturesProto3::parse(&serialized_nonutf8);
|
|
|
|
assert_that!(parsed_result, err(matches_pattern!(&ParseError)));
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_verify() {
|
|
|
|
let non_utf8_str = make_non_utf8_proto_str();
|
|
|
|
|
|
|
|
let mut msg = Verify::new();
|
|
|
|
|
|
|
|
// No error on setter
|
|
|
|
msg.set_my_field(non_utf8_str);
|
|
|
|
assert_that!(msg.my_field().as_bytes(), eq(NON_UTF8_BYTES));
|
|
|
|
|
|
|
|
// No error on serialization
|
|
|
|
// TODO: Add test assertion once serialize becomes fallible.
|
|
|
|
let serialized_nonutf8 = msg.serialize();
|
|
|
|
|
|
|
|
// Error on parsing.
|
|
|
|
let parsed_result = Verify::parse(&serialized_nonutf8);
|
|
|
|
assert_that!(parsed_result, err(matches_pattern!(&ParseError)));
|
|
|
|
}
|