Add test coverage for UTF-8 behavior

PiperOrigin-RevId: 625650220
pull/16507/head
Marcel Hlopko 10 months ago committed by Copybara-Service
parent f799af8bf5
commit 1a7ea70730
  1. 2
      rust/test/shared/BUILD
  2. 112
      rust/test/shared/utf8/BUILD
  3. 9
      rust/test/shared/utf8/feature_verify.proto
  4. 7
      rust/test/shared/utf8/no_features_proto2.proto
  5. 7
      rust/test/shared/utf8/no_features_proto3.proto
  6. 70
      rust/test/shared/utf8/utf8_test.cc
  7. 92
      rust/test/shared/utf8/utf8_test.rs

@ -23,6 +23,7 @@ rust_library(
aliases = {
"//rust:protobuf_upb_export": "protobuf",
},
visibility = ["//rust/test/shared:__subpackages__"],
deps = [
"//rust:protobuf_upb_export",
"@crate_index//:googletest",
@ -36,6 +37,7 @@ rust_library(
aliases = {
"//rust:protobuf_cpp_export": "protobuf",
},
visibility = ["//rust/test/shared:__subpackages__"],
deps = [
"//rust:protobuf_cpp_export",
"@crate_index//:googletest",

@ -0,0 +1,112 @@
load("@rules_rust//rust:defs.bzl", "rust_test")
load("//bazel:cc_proto_library.bzl", "cc_proto_library")
load("//rust:defs.bzl", "rust_cc_proto_library", "rust_upb_proto_library")
licenses(["notice"])
rust_test(
name = "utf8_cpp_test",
srcs = ["utf8_test.rs"],
aliases = {
"//rust:protobuf_cpp": "protobuf",
"//rust/test/shared:matchers_cpp": "matchers",
},
deps = [
":feature_verify_cc_rust_proto",
":no_features_proto2_cc_rust_proto",
":no_features_proto3_cc_rust_proto",
"//rust:protobuf_cpp",
"//rust/test/shared:matchers_cpp",
"@crate_index//:googletest",
],
)
rust_test(
name = "utf8_upb_test",
srcs = ["utf8_test.rs"],
aliases = {
"//rust:protobuf_upb": "protobuf",
"//rust/test/shared:matchers_upb": "matchers",
},
deps = [
":feature_verify_upb_rust_proto",
":no_features_proto2_upb_rust_proto",
":no_features_proto3_upb_rust_proto",
"//rust:protobuf_upb",
"//rust/test/shared:matchers_upb",
"@crate_index//:googletest",
],
)
cc_test(
name = "utf8_pure_cc_test",
srcs = ["utf8_test.cc"],
deps = [
":feature_verify_cc_proto",
":no_features_proto2_cc_proto",
":no_features_proto3_cc_proto",
"@com_google_absl//absl/strings:string_view",
"@com_google_googletest//:gtest",
"@com_google_googletest//:gtest_main",
],
)
proto_library(
name = "no_features_proto2_proto",
srcs = ["no_features_proto2.proto"],
)
proto_library(
name = "no_features_proto3_proto",
srcs = ["no_features_proto3.proto"],
)
proto_library(
name = "feature_verify_proto",
srcs = ["feature_verify.proto"],
)
cc_proto_library(
name = "no_features_proto2_cc_proto",
deps = [":no_features_proto2_proto"],
)
cc_proto_library(
name = "no_features_proto3_cc_proto",
deps = [":no_features_proto3_proto"],
)
cc_proto_library(
name = "feature_verify_cc_proto",
deps = [":feature_verify_proto"],
)
rust_upb_proto_library(
name = "no_features_proto2_upb_rust_proto",
deps = [":no_features_proto2_proto"],
)
rust_upb_proto_library(
name = "no_features_proto3_upb_rust_proto",
deps = [":no_features_proto3_proto"],
)
rust_upb_proto_library(
name = "feature_verify_upb_rust_proto",
deps = [":feature_verify_proto"],
)
rust_cc_proto_library(
name = "no_features_proto2_cc_rust_proto",
deps = [":no_features_proto2_proto"],
)
rust_cc_proto_library(
name = "no_features_proto3_cc_rust_proto",
deps = [":no_features_proto3_proto"],
)
rust_cc_proto_library(
name = "feature_verify_cc_rust_proto",
deps = [":feature_verify_proto"],
)

@ -0,0 +1,9 @@
edition = "2023";
package utf8;
option features.utf8_validation = VERIFY;
message Verify {
string my_field = 1;
}

@ -0,0 +1,7 @@
syntax = "proto2";
package utf8;
message NoFeaturesProto2 {
optional string my_field = 1;
}

@ -0,0 +1,7 @@
syntax = "proto3";
package utf8;
message NoFeaturesProto3 {
optional string my_field = 1;
}

@ -0,0 +1,70 @@
#include <string>
#include <gmock/gmock.h>
#include <gtest/gtest.h>
#include "absl/strings/string_view.h"
#include "google/protobuf/rust/test/shared/utf8/feature_verify.pb.h"
#include "google/protobuf/rust/test/shared/utf8/no_features_proto2.pb.h"
#include "google/protobuf/rust/test/shared/utf8/no_features_proto3.pb.h"
namespace {
using ::testing::Eq;
using ::testing::IsEmpty;
using ::testing::Not;
// We use 0b1000_0000, since 0b1XXX_XXXX in UTF-8 denotes a byte 2-4, but never
// the first byte.
constexpr char kInvalidUtf8[] = "\x80";
TEST(Utf8Test, TestProto2) {
utf8::NoFeaturesProto2 no_features_proto2;
// No error on setter.
no_features_proto2.set_my_field(kInvalidUtf8);
EXPECT_THAT(no_features_proto2.my_field(),
Eq(absl::string_view(kInvalidUtf8)));
// No error on serialization.
std::string serialized_nonutf8 = no_features_proto2.SerializeAsString();
EXPECT_THAT(serialized_nonutf8, Not(IsEmpty()));
// No error on parsing.
utf8::NoFeaturesProto2 parsed;
EXPECT_THAT(parsed.ParseFromString(serialized_nonutf8), Eq(true));
}
TEST(Utf8Test, TestProto3) {
utf8::NoFeaturesProto3 no_features_proto3;
// No error on setter.
no_features_proto3.set_my_field(kInvalidUtf8);
EXPECT_THAT(no_features_proto3.my_field(),
Eq(absl::string_view(kInvalidUtf8)));
// No error on serialization.
std::string serialized_nonutf8 = no_features_proto3.SerializeAsString();
EXPECT_THAT(serialized_nonutf8, Not(IsEmpty()));
// Error on parsing.
utf8::NoFeaturesProto3 parsed;
EXPECT_THAT(parsed.ParseFromString(serialized_nonutf8), Eq(false));
}
TEST(Utf8Test, TestEditionsVerify) {
utf8::Verify verify;
// No error on setter.
verify.set_my_field(kInvalidUtf8);
EXPECT_THAT(verify.my_field(), Eq(absl::string_view(kInvalidUtf8)));
// No error on serialization.
std::string serialized_nonutf8 = verify.SerializeAsString();
EXPECT_THAT(serialized_nonutf8, Not(IsEmpty()));
// Error on parsing.
utf8::Verify parsed;
EXPECT_THAT(parsed.ParseFromString(serialized_nonutf8), Eq(false));
}
} // namespace

@ -0,0 +1,92 @@
// Protocol Buffers - Google's data interchange format
// Copyright 2024 Google LLC. All rights reserved.
//
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file or at
// https://developers.google.com/open-source/licenses/bsd
//! Tests demonstrating the Protobuf Rust behavior around UTF-8 under different
//! scenarios (proto2, proto3, editions).
// TODO: The behavior is currently subptimal (for example because
// b/333545903 or b/335140403). Design and implement desirable changes to this
// behavior. Do not assume that the Protobuf team is intentional about these
// behaviors while b/304774814 is open.
use feature_verify_proto::Verify;
use googletest::prelude::*;
use no_features_proto2_proto::NoFeaturesProto2;
use no_features_proto3_proto::NoFeaturesProto3;
use protobuf::{ParseError, ProtoStr};
// We use 0b1000_0000, since 0b1XXX_XXXX in UTF-8 denotes a byte 2-4, but never
// the first byte.
const NON_UTF8_BYTES: &[u8] = b"\x80";
// Returns ProtoStr with non-UTF-8 content.
fn make_non_utf8_proto_str() -> &'static ProtoStr {
unsafe {
// SAFETY: This is safe under current implementation of C++ and UPB kernels.
// In the hypothethical pure Rust runtime this would be library-level UB - but
// this test is specifically present to demonstrate UTF-8 behavior under
// C++ and UPB kernels.
ProtoStr::from_utf8_unchecked(NON_UTF8_BYTES)
}
}
#[test]
fn test_proto2() {
let non_utf8_str = make_non_utf8_proto_str();
let mut msg = NoFeaturesProto2::new();
// No error on setter
msg.set_my_field(non_utf8_str);
assert_that!(msg.my_field().as_bytes(), eq(NON_UTF8_BYTES));
// No error on serialization
// TODO: Add test assertion once serialize becomes fallible.
let serialized_nonutf8 = msg.serialize();
// No error on parsing.
let parsed_result = NoFeaturesProto2::parse(&serialized_nonutf8);
assert_that!(parsed_result, ok(anything()));
}
#[test]
fn test_proto3() {
let non_utf8_str = make_non_utf8_proto_str();
let mut msg = NoFeaturesProto3::new();
// No error on setter
msg.set_my_field(non_utf8_str);
assert_that!(msg.my_field().as_bytes(), eq(NON_UTF8_BYTES));
// No error on serialization
// TODO: Add test assertion once serialize becomes fallible.
let serialized_nonutf8 = msg.serialize();
// Error on parsing.
let parsed_result = NoFeaturesProto3::parse(&serialized_nonutf8);
assert_that!(parsed_result, err(matches_pattern!(&ParseError)));
}
#[test]
fn test_verify() {
let non_utf8_str = make_non_utf8_proto_str();
let mut msg = Verify::new();
// No error on setter
msg.set_my_field(non_utf8_str);
assert_that!(msg.my_field().as_bytes(), eq(NON_UTF8_BYTES));
// No error on serialization
// TODO: Add test assertion once serialize becomes fallible.
let serialized_nonutf8 = msg.serialize();
// Error on parsing.
let parsed_result = Verify::parse(&serialized_nonutf8);
assert_that!(parsed_result, err(matches_pattern!(&ParseError)));
}
Loading…
Cancel
Save