parent
f799af8bf5
commit
1a7ea70730
7 changed files with 299 additions and 0 deletions
@ -0,0 +1,112 @@ |
||||
load("@rules_rust//rust:defs.bzl", "rust_test") |
||||
load("//bazel:cc_proto_library.bzl", "cc_proto_library") |
||||
load("//rust:defs.bzl", "rust_cc_proto_library", "rust_upb_proto_library") |
||||
|
||||
licenses(["notice"]) |
||||
|
||||
rust_test( |
||||
name = "utf8_cpp_test", |
||||
srcs = ["utf8_test.rs"], |
||||
aliases = { |
||||
"//rust:protobuf_cpp": "protobuf", |
||||
"//rust/test/shared:matchers_cpp": "matchers", |
||||
}, |
||||
deps = [ |
||||
":feature_verify_cc_rust_proto", |
||||
":no_features_proto2_cc_rust_proto", |
||||
":no_features_proto3_cc_rust_proto", |
||||
"//rust:protobuf_cpp", |
||||
"//rust/test/shared:matchers_cpp", |
||||
"@crate_index//:googletest", |
||||
], |
||||
) |
||||
|
||||
rust_test( |
||||
name = "utf8_upb_test", |
||||
srcs = ["utf8_test.rs"], |
||||
aliases = { |
||||
"//rust:protobuf_upb": "protobuf", |
||||
"//rust/test/shared:matchers_upb": "matchers", |
||||
}, |
||||
deps = [ |
||||
":feature_verify_upb_rust_proto", |
||||
":no_features_proto2_upb_rust_proto", |
||||
":no_features_proto3_upb_rust_proto", |
||||
"//rust:protobuf_upb", |
||||
"//rust/test/shared:matchers_upb", |
||||
"@crate_index//:googletest", |
||||
], |
||||
) |
||||
|
||||
cc_test( |
||||
name = "utf8_pure_cc_test", |
||||
srcs = ["utf8_test.cc"], |
||||
deps = [ |
||||
":feature_verify_cc_proto", |
||||
":no_features_proto2_cc_proto", |
||||
":no_features_proto3_cc_proto", |
||||
"@com_google_absl//absl/strings:string_view", |
||||
"@com_google_googletest//:gtest", |
||||
"@com_google_googletest//:gtest_main", |
||||
], |
||||
) |
||||
|
||||
proto_library( |
||||
name = "no_features_proto2_proto", |
||||
srcs = ["no_features_proto2.proto"], |
||||
) |
||||
|
||||
proto_library( |
||||
name = "no_features_proto3_proto", |
||||
srcs = ["no_features_proto3.proto"], |
||||
) |
||||
|
||||
proto_library( |
||||
name = "feature_verify_proto", |
||||
srcs = ["feature_verify.proto"], |
||||
) |
||||
|
||||
cc_proto_library( |
||||
name = "no_features_proto2_cc_proto", |
||||
deps = [":no_features_proto2_proto"], |
||||
) |
||||
|
||||
cc_proto_library( |
||||
name = "no_features_proto3_cc_proto", |
||||
deps = [":no_features_proto3_proto"], |
||||
) |
||||
|
||||
cc_proto_library( |
||||
name = "feature_verify_cc_proto", |
||||
deps = [":feature_verify_proto"], |
||||
) |
||||
|
||||
rust_upb_proto_library( |
||||
name = "no_features_proto2_upb_rust_proto", |
||||
deps = [":no_features_proto2_proto"], |
||||
) |
||||
|
||||
rust_upb_proto_library( |
||||
name = "no_features_proto3_upb_rust_proto", |
||||
deps = [":no_features_proto3_proto"], |
||||
) |
||||
|
||||
rust_upb_proto_library( |
||||
name = "feature_verify_upb_rust_proto", |
||||
deps = [":feature_verify_proto"], |
||||
) |
||||
|
||||
rust_cc_proto_library( |
||||
name = "no_features_proto2_cc_rust_proto", |
||||
deps = [":no_features_proto2_proto"], |
||||
) |
||||
|
||||
rust_cc_proto_library( |
||||
name = "no_features_proto3_cc_rust_proto", |
||||
deps = [":no_features_proto3_proto"], |
||||
) |
||||
|
||||
rust_cc_proto_library( |
||||
name = "feature_verify_cc_rust_proto", |
||||
deps = [":feature_verify_proto"], |
||||
) |
@ -0,0 +1,9 @@ |
||||
edition = "2023"; |
||||
|
||||
package utf8; |
||||
|
||||
option features.utf8_validation = VERIFY; |
||||
|
||||
message Verify { |
||||
string my_field = 1; |
||||
} |
@ -0,0 +1,7 @@ |
||||
syntax = "proto2"; |
||||
|
||||
package utf8; |
||||
|
||||
message NoFeaturesProto2 { |
||||
optional string my_field = 1; |
||||
} |
@ -0,0 +1,7 @@ |
||||
syntax = "proto3"; |
||||
|
||||
package utf8; |
||||
|
||||
message NoFeaturesProto3 { |
||||
optional string my_field = 1; |
||||
} |
@ -0,0 +1,70 @@ |
||||
#include <string> |
||||
|
||||
#include <gmock/gmock.h> |
||||
#include <gtest/gtest.h> |
||||
#include "absl/strings/string_view.h" |
||||
#include "google/protobuf/rust/test/shared/utf8/feature_verify.pb.h" |
||||
#include "google/protobuf/rust/test/shared/utf8/no_features_proto2.pb.h" |
||||
#include "google/protobuf/rust/test/shared/utf8/no_features_proto3.pb.h" |
||||
|
||||
namespace { |
||||
|
||||
using ::testing::Eq; |
||||
using ::testing::IsEmpty; |
||||
using ::testing::Not; |
||||
|
||||
// We use 0b1000_0000, since 0b1XXX_XXXX in UTF-8 denotes a byte 2-4, but never
|
||||
// the first byte.
|
||||
constexpr char kInvalidUtf8[] = "\x80"; |
||||
|
||||
TEST(Utf8Test, TestProto2) { |
||||
utf8::NoFeaturesProto2 no_features_proto2; |
||||
|
||||
// No error on setter.
|
||||
no_features_proto2.set_my_field(kInvalidUtf8); |
||||
EXPECT_THAT(no_features_proto2.my_field(), |
||||
Eq(absl::string_view(kInvalidUtf8))); |
||||
|
||||
// No error on serialization.
|
||||
std::string serialized_nonutf8 = no_features_proto2.SerializeAsString(); |
||||
EXPECT_THAT(serialized_nonutf8, Not(IsEmpty())); |
||||
|
||||
// No error on parsing.
|
||||
utf8::NoFeaturesProto2 parsed; |
||||
EXPECT_THAT(parsed.ParseFromString(serialized_nonutf8), Eq(true)); |
||||
} |
||||
|
||||
TEST(Utf8Test, TestProto3) { |
||||
utf8::NoFeaturesProto3 no_features_proto3; |
||||
|
||||
// No error on setter.
|
||||
no_features_proto3.set_my_field(kInvalidUtf8); |
||||
EXPECT_THAT(no_features_proto3.my_field(), |
||||
Eq(absl::string_view(kInvalidUtf8))); |
||||
|
||||
// No error on serialization.
|
||||
std::string serialized_nonutf8 = no_features_proto3.SerializeAsString(); |
||||
EXPECT_THAT(serialized_nonutf8, Not(IsEmpty())); |
||||
|
||||
// Error on parsing.
|
||||
utf8::NoFeaturesProto3 parsed; |
||||
EXPECT_THAT(parsed.ParseFromString(serialized_nonutf8), Eq(false)); |
||||
} |
||||
|
||||
TEST(Utf8Test, TestEditionsVerify) { |
||||
utf8::Verify verify; |
||||
|
||||
// No error on setter.
|
||||
verify.set_my_field(kInvalidUtf8); |
||||
EXPECT_THAT(verify.my_field(), Eq(absl::string_view(kInvalidUtf8))); |
||||
|
||||
// No error on serialization.
|
||||
std::string serialized_nonutf8 = verify.SerializeAsString(); |
||||
EXPECT_THAT(serialized_nonutf8, Not(IsEmpty())); |
||||
|
||||
// Error on parsing.
|
||||
utf8::Verify parsed; |
||||
EXPECT_THAT(parsed.ParseFromString(serialized_nonutf8), Eq(false)); |
||||
} |
||||
|
||||
} // namespace
|
@ -0,0 +1,92 @@ |
||||
// Protocol Buffers - Google's data interchange format
|
||||
// Copyright 2024 Google LLC. All rights reserved.
|
||||
//
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file or at
|
||||
// https://developers.google.com/open-source/licenses/bsd
|
||||
|
||||
//! Tests demonstrating the Protobuf Rust behavior around UTF-8 under different
|
||||
//! scenarios (proto2, proto3, editions).
|
||||
|
||||
// TODO: The behavior is currently subptimal (for example because
|
||||
// b/333545903 or b/335140403). Design and implement desirable changes to this
|
||||
// behavior. Do not assume that the Protobuf team is intentional about these
|
||||
// behaviors while b/304774814 is open.
|
||||
|
||||
use feature_verify_proto::Verify; |
||||
use googletest::prelude::*; |
||||
use no_features_proto2_proto::NoFeaturesProto2; |
||||
use no_features_proto3_proto::NoFeaturesProto3; |
||||
use protobuf::{ParseError, ProtoStr}; |
||||
|
||||
// We use 0b1000_0000, since 0b1XXX_XXXX in UTF-8 denotes a byte 2-4, but never
|
||||
// the first byte.
|
||||
const NON_UTF8_BYTES: &[u8] = b"\x80"; |
||||
|
||||
// Returns ProtoStr with non-UTF-8 content.
|
||||
fn make_non_utf8_proto_str() -> &'static ProtoStr { |
||||
unsafe { |
||||
// SAFETY: This is safe under current implementation of C++ and UPB kernels.
|
||||
// In the hypothethical pure Rust runtime this would be library-level UB - but
|
||||
// this test is specifically present to demonstrate UTF-8 behavior under
|
||||
// C++ and UPB kernels.
|
||||
ProtoStr::from_utf8_unchecked(NON_UTF8_BYTES) |
||||
} |
||||
} |
||||
|
||||
#[test] |
||||
fn test_proto2() { |
||||
let non_utf8_str = make_non_utf8_proto_str(); |
||||
|
||||
let mut msg = NoFeaturesProto2::new(); |
||||
|
||||
// No error on setter
|
||||
msg.set_my_field(non_utf8_str); |
||||
assert_that!(msg.my_field().as_bytes(), eq(NON_UTF8_BYTES)); |
||||
|
||||
// No error on serialization
|
||||
// TODO: Add test assertion once serialize becomes fallible.
|
||||
let serialized_nonutf8 = msg.serialize(); |
||||
|
||||
// No error on parsing.
|
||||
let parsed_result = NoFeaturesProto2::parse(&serialized_nonutf8); |
||||
assert_that!(parsed_result, ok(anything())); |
||||
} |
||||
|
||||
#[test] |
||||
fn test_proto3() { |
||||
let non_utf8_str = make_non_utf8_proto_str(); |
||||
|
||||
let mut msg = NoFeaturesProto3::new(); |
||||
|
||||
// No error on setter
|
||||
msg.set_my_field(non_utf8_str); |
||||
assert_that!(msg.my_field().as_bytes(), eq(NON_UTF8_BYTES)); |
||||
|
||||
// No error on serialization
|
||||
// TODO: Add test assertion once serialize becomes fallible.
|
||||
let serialized_nonutf8 = msg.serialize(); |
||||
|
||||
// Error on parsing.
|
||||
let parsed_result = NoFeaturesProto3::parse(&serialized_nonutf8); |
||||
assert_that!(parsed_result, err(matches_pattern!(&ParseError))); |
||||
} |
||||
|
||||
#[test] |
||||
fn test_verify() { |
||||
let non_utf8_str = make_non_utf8_proto_str(); |
||||
|
||||
let mut msg = Verify::new(); |
||||
|
||||
// No error on setter
|
||||
msg.set_my_field(non_utf8_str); |
||||
assert_that!(msg.my_field().as_bytes(), eq(NON_UTF8_BYTES)); |
||||
|
||||
// No error on serialization
|
||||
// TODO: Add test assertion once serialize becomes fallible.
|
||||
let serialized_nonutf8 = msg.serialize(); |
||||
|
||||
// Error on parsing.
|
||||
let parsed_result = Verify::parse(&serialized_nonutf8); |
||||
assert_that!(parsed_result, err(matches_pattern!(&ParseError))); |
||||
} |
Loading…
Reference in new issue