From 1190f460c103ebe3014b0d022258934cdbbea1aa Mon Sep 17 00:00:00 2001 From: Mike Kruskal Date: Wed, 5 Jul 2023 12:03:49 -0700 Subject: [PATCH] Improve feature propagation on map fields. Generated map messages should inherit features from the original field, not the containing message. To do this, we copy *all* features from the original field to the generated ones, and disable explicit feature validation for them. Additionally, this change fixes some bugs in the error handling of explicitly set features. String map fields should be allowed to use `string_field_validation`, and uninterpreted feature options in the proto should still be recognized for validation. PiperOrigin-RevId: 545747503 --- src/google/protobuf/compiler/parser.cc | 32 ++-- .../protobuf/compiler/parser_unittest.cc | 59 ++++++ src/google/protobuf/descriptor.cc | 30 ++- src/google/protobuf/descriptor_unittest.cc | 171 ++++++++++++++++++ 4 files changed, 273 insertions(+), 19 deletions(-) diff --git a/src/google/protobuf/compiler/parser.cc b/src/google/protobuf/compiler/parser.cc index 5bff5a94cd..d0231a8da8 100644 --- a/src/google/protobuf/compiler/parser.cc +++ b/src/google/protobuf/compiler/parser.cc @@ -1222,41 +1222,45 @@ void Parser::GenerateMapEntry(const MapField& map_field, } else { value_field->set_type_name(map_field.value_type_name); } - // Propagate the "enforce_utf8" option to key and value fields if they - // are strings. This helps simplify the implementation of code generators - // and also reflection-based parsing code. + // Propagate all features to the generated key and value fields. This helps + // simplify the implementation of code generators and also reflection-based + // parsing code. Instead of having to implement complex inheritance rules + // special-casing maps, we can just copy them at generation time. // // The following definition: // message Foo { - // map value = 1 [enforce_utf8 = false]; + // map value = 1 [features.some_feature = VALUE]; // } // will be interpreted as: // message Foo { // message ValueEntry { // option map_entry = true; - // string key = 1 [enforce_utf8 = false]; - // string value = 2 [enforce_utf8 = false]; + // string key = 1 [features.some_feature = VALUE]; + // string value = 2 [features.some_feature = VALUE]; // } - // repeated ValueEntry value = 1 [enforce_utf8 = false]; + // repeated ValueEntry value = 1 [features.some_feature = VALUE]; // } - // - // TODO(xiaofeng): Remove this when the "enforce_utf8" option is removed - // from protocol compiler. for (int i = 0; i < field->options().uninterpreted_option_size(); ++i) { const UninterpretedOption& option = field->options().uninterpreted_option(i); + // Legacy handling for the `enforce_utf8` option, which bears a striking + // similarity to features in many respects. + // TODO(b/289755572) Delete this once proto2/proto3 have been turned down. if (option.name_size() == 1 && option.name(0).name_part() == "enforce_utf8" && !option.name(0).is_extension()) { if (key_field->type() == FieldDescriptorProto::TYPE_STRING) { - key_field->mutable_options()->add_uninterpreted_option()->CopyFrom( - option); + *key_field->mutable_options()->add_uninterpreted_option() = option; } if (value_field->type() == FieldDescriptorProto::TYPE_STRING) { - value_field->mutable_options()->add_uninterpreted_option()->CopyFrom( - option); + *value_field->mutable_options()->add_uninterpreted_option() = option; } } + if (option.name(0).name_part() == "features" && + !option.name(0).is_extension()) { + *key_field->mutable_options()->add_uninterpreted_option() = option; + *value_field->mutable_options()->add_uninterpreted_option() = option; + } } } diff --git a/src/google/protobuf/compiler/parser_unittest.cc b/src/google/protobuf/compiler/parser_unittest.cc index 24e478bc6d..9f6f103a3c 100644 --- a/src/google/protobuf/compiler/parser_unittest.cc +++ b/src/google/protobuf/compiler/parser_unittest.cc @@ -3964,6 +3964,65 @@ TEST_F(ParseEditionsTest, ExtensionsParse) { "edition: \"2023\"\n"); } +TEST_F(ParseEditionsTest, MapFeatures) { + ExpectParsesTo( + R"schema( + edition = '2023'; + message Foo { + map map_field = 1 [ + features.my_feature = SOMETHING + ]; + })schema", + R"pb(message_type { + name: "Foo" + field { + name: "map_field" + number: 1 + label: LABEL_REPEATED + type_name: "MapFieldEntry" + options { + uninterpreted_option { + name { name_part: "features" is_extension: false } + name { name_part: "my_feature" is_extension: false } + identifier_value: "SOMETHING" + } + } + } + nested_type { + name: "MapFieldEntry" + field { + name: "key" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_STRING + options { + uninterpreted_option { + name { name_part: "features" is_extension: false } + name { name_part: "my_feature" is_extension: false } + identifier_value: "SOMETHING" + } + } + } + field { + name: "value" + number: 2 + label: LABEL_OPTIONAL + type_name: "int" + options { + uninterpreted_option { + name { name_part: "features" is_extension: false } + name { name_part: "my_feature" is_extension: false } + identifier_value: "SOMETHING" + } + } + } + options { map_entry: true } + } + } + syntax: "editions" + edition: "2023")pb"); +} + TEST_F(ParseEditionsTest, EmptyEdition) { ExpectHasEarlyExitErrors( R"schema( diff --git a/src/google/protobuf/descriptor.cc b/src/google/protobuf/descriptor.cc index a5d9faedde..7f55bfcb1a 100644 --- a/src/google/protobuf/descriptor.cc +++ b/src/google/protobuf/descriptor.cc @@ -7962,6 +7962,17 @@ void DescriptorBuilder::ValidateOptions(const FieldDescriptor* field, } +static bool IsStringMapType(const FieldDescriptor* field) { + if (!field->is_map()) return false; + for (int i = 0; i < field->message_type()->field_count(); ++i) { + if (field->message_type()->field(i)->type() == + FieldDescriptor::TYPE_STRING) { + return true; + } + } + return false; +} + void DescriptorBuilder::ValidateFieldFeatures( const FieldDescriptor* field, const FieldDescriptorProto& proto) { #ifdef PROTOBUF_FUTURE_EDITIONS @@ -7986,28 +7997,37 @@ void DescriptorBuilder::ValidateFieldFeatures( "Extensions can't be required."); } + if (field->containing_type() != nullptr && + field->containing_type()->options().map_entry()) { + // Skip validation of explicit features on generated map fields. These will + // be blindly propagated from the original map field, and may violate a lot + // of these conditions. Note: we do still validate the user-specified map + // field. + return; + } + // Validate explicitly specified features on the field proto. if ((field->containing_oneof() != nullptr || field->is_repeated() || field->message_type() != nullptr) && - proto.options().features().field_presence() == FeatureSet::IMPLICIT) { + field->proto_features_->field_presence() == FeatureSet::IMPLICIT) { AddError( field->full_name(), proto, DescriptorPool::ErrorCollector::NAME, "Only singular scalar fields can specify implicit field presence."); } if ((field->containing_oneof() != nullptr || field->is_repeated()) && - proto.options().features().field_presence() == - FeatureSet::LEGACY_REQUIRED) { + field->proto_features_->field_presence() == FeatureSet::LEGACY_REQUIRED) { AddError( field->full_name(), proto, DescriptorPool::ErrorCollector::NAME, "Only singular scalar fields can specify required field presence."); } if (field->type() != FieldDescriptor::TYPE_STRING && - proto.options().features().has_string_field_validation()) { + !IsStringMapType(field) && + field->proto_features_->has_string_field_validation()) { AddError(field->full_name(), proto, DescriptorPool::ErrorCollector::NAME, "Only string fields can specify `string_field_validation`."); } if (!field->is_repeated() && - proto.options().features().has_repeated_field_encoding()) { + field->proto_features_->has_repeated_field_encoding()) { AddError(field->full_name(), proto, DescriptorPool::ErrorCollector::NAME, "Only repeated fields can specify `repeated_field_encoding`."); } diff --git a/src/google/protobuf/descriptor_unittest.cc b/src/google/protobuf/descriptor_unittest.cc index 27c368c03d..bef2eb1846 100644 --- a/src/google/protobuf/descriptor_unittest.cc +++ b/src/google/protobuf/descriptor_unittest.cc @@ -8106,6 +8106,110 @@ TEST_F(FeaturesTest, OneofFieldFeaturesOverride) { EXPECT_EQ(GetFeatures(field).GetExtension(pb::test).int_file_feature(), 2); } +TEST_F(FeaturesTest, MapFieldFeaturesOverride) { + constexpr absl::string_view kProtoFile = R"schema( + edition = "2023"; + + import "google/protobuf/unittest_features.proto"; + + option features.(pb.test).int_file_feature = 99; + option features.(pb.test).int_multiple_feature = 1; + + message Foo { + option features.(pb.test).int_message_feature = 87; + option features.(pb.test).int_multiple_feature = 2; + + map map_field = 1 [ + features.(pb.test).int_field_feature = 100, + features.(pb.test).int_multiple_feature = 3 + ]; + } + )schema"; + io::ArrayInputStream input_stream(kProtoFile.data(), kProtoFile.size()); + SimpleErrorCollector error_collector; + io::Tokenizer tokenizer(&input_stream, &error_collector); + compiler::Parser parser; + parser.RecordErrorsTo(&error_collector); + FileDescriptorProto proto; + ASSERT_TRUE(parser.Parse(&tokenizer, &proto)) + << error_collector.last_error() << "\n" + << kProtoFile; + ASSERT_EQ("", error_collector.last_error()); + proto.set_name("foo.proto"); + + BuildDescriptorMessagesInTestPool(); + BuildFileInTestPool(pb::TestFeatures::descriptor()->file()); + const FileDescriptor* file = pool_.BuildFile(proto); + ASSERT_THAT(file, NotNull()); + + const FieldDescriptor* map_field = file->message_type(0)->field(0); + const FieldDescriptor* key = map_field->message_type()->field(0); + const FieldDescriptor* value = map_field->message_type()->field(1); + + auto validate = [](const FieldDescriptor* desc) { + EXPECT_EQ(GetFeatures(desc).GetExtension(pb::test).int_file_feature(), 99) + << desc->DebugString(); + EXPECT_EQ(GetFeatures(desc).GetExtension(pb::test).int_message_feature(), + 87) + << desc->DebugString(); + EXPECT_EQ(GetFeatures(desc).GetExtension(pb::test).int_field_feature(), 100) + << desc->DebugString(); + EXPECT_EQ(GetFeatures(desc).GetExtension(pb::test).int_multiple_feature(), + 3) + << desc->DebugString(); + }; + + validate(map_field); + validate(key); + validate(value); +} + +TEST_F(FeaturesTest, MapFieldFeaturesStringValidation) { + constexpr absl::string_view kProtoFile = R"schema( + edition = "2023"; + + message Foo { + map map_field = 1 [ + features.string_field_validation = HINT + ]; + map map_field_value = 2 [ + features.string_field_validation = HINT + ]; + map map_field_key = 3 [ + features.string_field_validation = HINT + ]; + } + )schema"; + io::ArrayInputStream input_stream(kProtoFile.data(), kProtoFile.size()); + SimpleErrorCollector error_collector; + io::Tokenizer tokenizer(&input_stream, &error_collector); + compiler::Parser parser; + parser.RecordErrorsTo(&error_collector); + FileDescriptorProto proto; + ASSERT_TRUE(parser.Parse(&tokenizer, &proto)) + << error_collector.last_error() << "\n" + << kProtoFile; + ASSERT_EQ("", error_collector.last_error()); + proto.set_name("foo.proto"); + + BuildDescriptorMessagesInTestPool(); + const FileDescriptor* file = pool_.BuildFile(proto); + ASSERT_THAT(file, NotNull()); + + auto validate_map_field = [](const FieldDescriptor* field) { + const FieldDescriptor* key = field->message_type()->field(0); + const FieldDescriptor* value = field->message_type()->field(1); + + EXPECT_FALSE(field->requires_utf8_validation()) << field->DebugString(); + EXPECT_FALSE(key->requires_utf8_validation()) << field->DebugString(); + EXPECT_FALSE(value->requires_utf8_validation()) << field->DebugString(); + }; + + validate_map_field(file->message_type(0)->field(0)); + validate_map_field(file->message_type(0)->field(1)); + validate_map_field(file->message_type(0)->field(2)); +} + TEST_F(FeaturesTest, RootExtensionFeaturesOverride) { BuildDescriptorMessagesInTestPool(); BuildFileInTestPool(pb::TestFeatures::descriptor()->file()); @@ -9139,6 +9243,73 @@ TEST_F(FeaturesTest, InvalidFieldNonStringWithStringValidation) { "`string_field_validation`.\n"); } +TEST_F(FeaturesTest, InvalidFieldNonStringMapWithStringValidation) { + BuildDescriptorMessagesInTestPool(); + BuildFileWithErrors( + R"pb( + name: "foo.proto" + syntax: "editions" + edition: "2023" + message_type { + name: "Foo" + nested_type { + name: "MapFieldEntry" + field { + name: "key" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_INT32 + options { + uninterpreted_option { + name { name_part: "features" is_extension: false } + name { + name_part: "string_field_validation" + is_extension: false + } + identifier_value: "HINT" + } + } + } + field { + name: "value" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_INT32 + options { + uninterpreted_option { + name { name_part: "features" is_extension: false } + name { + name_part: "string_field_validation" + is_extension: false + } + identifier_value: "HINT" + } + } + } + options { map_entry: true } + } + field { + name: "map_field" + number: 1 + label: LABEL_REPEATED + type_name: "MapFieldEntry" + options { + uninterpreted_option { + name { name_part: "features" is_extension: false } + name { + name_part: "string_field_validation" + is_extension: false + } + identifier_value: "HINT" + } + } + } + } + )pb", + "foo.proto: Foo.map_field: NAME: Only string fields can specify " + "`string_field_validation`.\n"); +} + TEST_F(FeaturesTest, InvalidFieldNonRepeatedWithRepeatedEncoding) { BuildDescriptorMessagesInTestPool(); BuildFileWithErrors(