From 63f4c503a2762abaec136209ee5b62452ecf206e Mon Sep 17 00:00:00 2001 From: Mike Kruskal Date: Sat, 28 Oct 2023 12:57:37 -0700 Subject: [PATCH] Editions: Embed resolved features of descriptor.proto for pure python. Because pure python builds all descriptors at runtime via reflection, it's unable to parse options during the build of descriptor.proto (i.e. before we've built the options schemas). We always lazily parse these options to avoid this, but that still means options can't be *used* during this build. Since the current build process makes heavy use of features (which previously just relied on syntax), this poses a problem for editions. To get around this, we just embed the resolved features directly into the gencode for this one file. This will allow us to skip feature resolution for these descriptors and still consider features in their build. PiperOrigin-RevId: 577495949 --- .../protobuf/internal/descriptor_test.py | 67 ++++++++++- .../protobuf/compiler/python/generator.cc | 109 ++++++++++++++++++ .../protobuf/compiler/python/generator.h | 3 + 3 files changed, 175 insertions(+), 4 deletions(-) diff --git a/python/google/protobuf/internal/descriptor_test.py b/python/google/protobuf/internal/descriptor_test.py index 1adbad286e..56ca079f34 100755 --- a/python/google/protobuf/internal/descriptor_test.py +++ b/python/google/protobuf/internal/descriptor_test.py @@ -12,13 +12,15 @@ __author__ = 'robinson@google.com (Will Robinson)' import unittest import warnings -from google.protobuf import descriptor_pb2 -from google.protobuf.internal import api_implementation -from google.protobuf.internal import test_util from google.protobuf import descriptor +from google.protobuf import descriptor_pb2 from google.protobuf import descriptor_pool from google.protobuf import symbol_database from google.protobuf import text_format +from google.protobuf.internal import api_implementation +from google.protobuf.internal import test_util + +from google.protobuf.internal import _parameterized from google.protobuf import unittest_custom_options_pb2 from google.protobuf import unittest_import_pb2 from google.protobuf import unittest_pb2 @@ -1169,7 +1171,6 @@ class MakeDescriptorTest(unittest.TestCase): self.assertEqual(result.fields[0].cpp_type, descriptor.FieldDescriptor.CPPTYPE_UINT64) - def testMakeDescriptorWithOptions(self): descriptor_proto = descriptor_pb2.DescriptorProto() aggregate_message = unittest_custom_options_pb2.AggregateMessage @@ -1214,5 +1215,63 @@ class MakeDescriptorTest(unittest.TestCase): json_names[index]) +class FeaturesTest(_parameterized.TestCase): + + # TODO Add _features for upb and C++. + @_parameterized.named_parameters([ + ('File', lambda: descriptor_pb2.DESCRIPTOR), + ('Message', lambda: descriptor_pb2.FeatureSet.DESCRIPTOR), + ( + 'Enum', + lambda: descriptor_pb2.FeatureSet.FieldPresence.DESCRIPTOR, + ), + ( + 'Field', + lambda: descriptor_pb2.FeatureSet.DESCRIPTOR.fields_by_name[ + 'enum_type' + ], + ), + ]) + @unittest.skipIf( + api_implementation.Type() != 'python', + 'Features field is only available with the pure python implementation', + ) + def testDescriptorProtoDefaultFeatures(self, desc): + self.assertEqual( + desc()._features.field_presence, + descriptor_pb2.FeatureSet.FieldPresence.EXPLICIT, + ) + self.assertEqual( + desc()._features.enum_type, + descriptor_pb2.FeatureSet.EnumType.CLOSED, + ) + self.assertEqual( + desc()._features.repeated_field_encoding, + descriptor_pb2.FeatureSet.RepeatedFieldEncoding.EXPANDED, + ) + + # TODO Add _features for upb and C++. + @unittest.skipIf( + api_implementation.Type() != 'python', + 'Features field is only available with the pure python implementation', + ) + def testDescriptorProtoOverrideFeatures(self): + desc = descriptor_pb2.SourceCodeInfo.Location.DESCRIPTOR.fields_by_name[ + 'path' + ] + self.assertEqual( + desc._features.field_presence, + descriptor_pb2.FeatureSet.FieldPresence.EXPLICIT, + ) + self.assertEqual( + desc._features.enum_type, + descriptor_pb2.FeatureSet.EnumType.CLOSED, + ) + self.assertEqual( + desc._features.repeated_field_encoding, + descriptor_pb2.FeatureSet.RepeatedFieldEncoding.PACKED, + ) + + if __name__ == '__main__': unittest.main() diff --git a/src/google/protobuf/compiler/python/generator.cc b/src/google/protobuf/compiler/python/generator.cc index 6430459c6a..3f3410e3cd 100644 --- a/src/google/protobuf/compiler/python/generator.cc +++ b/src/google/protobuf/compiler/python/generator.cc @@ -33,6 +33,7 @@ #include "absl/container/flat_hash_map.h" #include "absl/log/absl_check.h" #include "absl/log/absl_log.h" +#include "absl/memory/memory.h" #include "absl/strings/ascii.h" #include "absl/strings/escaping.h" #include "absl/strings/str_cat.h" @@ -50,6 +51,8 @@ #include "google/protobuf/descriptor.h" #include "google/protobuf/descriptor.pb.h" #include "google/protobuf/descriptor_legacy.h" +#include "google/protobuf/descriptor_visitor.h" +#include "google/protobuf/dynamic_message.h" #include "google/protobuf/io/printer.h" #include "google/protobuf/io/strtod.h" #include "google/protobuf/io/zero_copy_stream.h" @@ -285,6 +288,7 @@ bool Generator::Generate(const FileDescriptor* file, PrintAllEnumsInFile(); PrintMessageDescriptors(); FixForeignFieldsInDescriptors(); + PrintResolvedFeatures(); printer_->Outdent(); printer_->Print("else:\n"); printer_->Indent(); @@ -416,6 +420,111 @@ void Generator::PrintImports() const { printer_->Print("\n"); } +template +std::string Generator::GetResolvedFeatures( + const DescriptorT& descriptor) const { + if (!GeneratingDescriptorProto()) { + // Everything but descriptor.proto can handle proper feature resolution. + return "None"; + } + + // Load the resolved features from our pool. + const Descriptor* feature_set = file_->pool()->FindMessageTypeByName( + FeatureSet::GetDescriptor()->full_name()); + auto message_factory = absl::make_unique(); + auto features = + absl::WrapUnique(message_factory->GetPrototype(feature_set)->New()); + features->ParseFromString( + GetResolvedSourceFeatures(descriptor).SerializeAsString()); + + // Collect all of the resolved features. + std::vector feature_args; + const Reflection* reflection = features->GetReflection(); + std::vector fields; + reflection->ListFields(*features, &fields); + for (const auto* field : fields) { + // Assume these are all enums. If we add non-enum global features or any + // python-specific features, we will need to come back and improve this + // logic. + ABSL_CHECK(field->enum_type() != nullptr) + << "Unexpected non-enum field found!"; + if (field->options().retention() == FieldOptions::RETENTION_SOURCE) { + // Skip any source-retention features. + continue; + } + const EnumDescriptor* enm = field->enum_type(); + const EnumValueDescriptor* value = + enm->FindValueByNumber(reflection->GetEnumValue(*features, field)); + + feature_args.emplace_back(absl::StrCat( + field->name(), "=", + absl::StrFormat("%s.values_by_name[\"%s\"].number", + ModuleLevelDescriptorName(*enm), value->name()))); + } + return absl::StrCat("_ResolvedFeatures(", absl::StrJoin(feature_args, ","), + ")"); +} + +void Generator::PrintResolvedFeatures() const { + // Since features are used during the descriptor build, it's impossible to do + // feature resolution at the normal point for descriptor.proto. Instead, we do + // feature resolution here in the generator, and embed a custom object on all + // of the generated descriptors. This object should act like any other + // FeatureSet message on normal descriptors, but will never have to be + // resolved by the python runtime. + ABSL_CHECK(GeneratingDescriptorProto()); + printer_->Emit({{"resolved_features", GetResolvedFeatures(*file_)}, + {"descriptor_name", kDescriptorKey}}, + R"py( + class _ResolvedFeatures: + def __init__(self, features = None, **kwargs): + if features: + for k, v in features.FIELDS.items(): + setattr(self, k, getattr(features, k)) + else: + for k, v in kwargs.items(): + setattr(self, k, v) + $descriptor_name$._features = $resolved_features$ + )py"); + +#define MAKE_NESTED(desc, CPP_FIELD, PY_FIELD) \ + [&] { \ + for (int i = 0; i < desc.CPP_FIELD##_count(); ++i) { \ + printer_->Emit( \ + {{"resolved_subfeatures", GetResolvedFeatures(*desc.CPP_FIELD(i))}, \ + {"index", absl::StrCat(i)}, \ + {"field", PY_FIELD}}, \ + "$descriptor_name$.$field$[$index$]._features = " \ + "$resolved_subfeatures$\n"); \ + } \ + } + + internal::VisitDescriptors(*file_, [&](const Descriptor& msg) { + printer_->Emit( + {{"resolved_features", GetResolvedFeatures(msg)}, + {"descriptor_name", ModuleLevelDescriptorName(msg)}, + {"field_features", MAKE_NESTED(msg, field, "fields")}, + {"oneof_features", MAKE_NESTED(msg, oneof_decl, "oneofs")}, + {"ext_features", MAKE_NESTED(msg, extension, "extensions")}}, + R"py( + $descriptor_name$._features = $resolved_features$ + $field_features$ + $oneof_features$ + $ext_features$ + )py"); + }); + internal::VisitDescriptors(*file_, [&](const EnumDescriptor& enm) { + printer_->Emit({{"resolved_features", GetResolvedFeatures(enm)}, + {"descriptor_name", ModuleLevelDescriptorName(enm)}, + {"value_features", MAKE_NESTED(enm, value, "values")}}, + R"py( + $descriptor_name$._features = $resolved_features$ + $value_features$ + )py"); + }); +#undef MAKE_NESTED +} + // Prints the single file descriptor for this file. void Generator::PrintFileDescriptor() const { absl::flat_hash_map m; diff --git a/src/google/protobuf/compiler/python/generator.h b/src/google/protobuf/compiler/python/generator.h index 3a4ff665ad..c5beeeea35 100644 --- a/src/google/protobuf/compiler/python/generator.h +++ b/src/google/protobuf/compiler/python/generator.h @@ -75,6 +75,9 @@ class PROTOC_EXPORT Generator : public CodeGenerator { GeneratorOptions ParseParameter(absl::string_view parameter, std::string* error) const; void PrintImports() const; + template + std::string GetResolvedFeatures(const DescriptorT& descriptor) const; + void PrintResolvedFeatures() const; void PrintFileDescriptor() const; void PrintAllEnumsInFile() const; void PrintNestedEnums(const Descriptor& descriptor) const;