diff --git a/src/google/protobuf/compiler/BUILD.bazel b/src/google/protobuf/compiler/BUILD.bazel index 9cc302eb8d..b04c617141 100644 --- a/src/google/protobuf/compiler/BUILD.bazel +++ b/src/google/protobuf/compiler/BUILD.bazel @@ -328,6 +328,8 @@ cc_library( visibility = ["//src/google/protobuf:__subpackages__"], deps = [ "//src/google/protobuf:protobuf_nowkt", + "@com_google_absl//absl/types:span", + "@com_google_absl//absl/container:flat_hash_set", ], ) diff --git a/src/google/protobuf/compiler/command_line_interface.cc b/src/google/protobuf/compiler/command_line_interface.cc index 20d6d95e46..f4adce055f 100644 --- a/src/google/protobuf/compiler/command_line_interface.cc +++ b/src/google/protobuf/compiler/command_line_interface.cc @@ -2635,13 +2635,11 @@ void CommandLineInterface::GetTransitiveDependencies( // Add this file. FileDescriptorProto* new_descriptor = output->Add(); - *new_descriptor = StripSourceRetentionOptions(*file); + *new_descriptor = + StripSourceRetentionOptions(*file, include_source_code_info); if (include_json_name) { file->CopyJsonNameTo(new_descriptor); } - if (include_source_code_info) { - file->CopySourceCodeInfoTo(new_descriptor); - } } const CommandLineInterface::GeneratorInfo* diff --git a/src/google/protobuf/compiler/command_line_interface_unittest.cc b/src/google/protobuf/compiler/command_line_interface_unittest.cc index 0b6d4aafed..5cce601fce 100644 --- a/src/google/protobuf/compiler/command_line_interface_unittest.cc +++ b/src/google/protobuf/compiler/command_line_interface_unittest.cc @@ -38,6 +38,7 @@ #include +#include #include "absl/log/absl_check.h" #include "absl/strings/str_cat.h" #include "absl/strings/str_format.h" diff --git a/src/google/protobuf/compiler/retention.cc b/src/google/protobuf/compiler/retention.cc index 04937a40e6..abb44cda9a 100644 --- a/src/google/protobuf/compiler/retention.cc +++ b/src/google/protobuf/compiler/retention.cc @@ -30,10 +30,14 @@ #include "google/protobuf/compiler/retention.h" +#include #include #include +#include #include +#include "absl/container/flat_hash_set.h" +#include "absl/types/span.h" #include "google/protobuf/descriptor.h" #include "google/protobuf/dynamic_message.h" @@ -43,24 +47,38 @@ namespace compiler { namespace { -// Recursively strips any options with source retention from the message. -void StripMessage(Message& m) { +// Recursively strips any options with source retention from the message. If +// stripped_paths is not null, then this function will populate it with the +// paths that were stripped, using the path format from +// SourceCodeInfo.Location. The path parameter is used as a stack tracking the +// path to the current location. +void StripMessage(Message& m, std::vector& path, + std::vector>* stripped_paths) { const Reflection* reflection = m.GetReflection(); std::vector fields; reflection->ListFields(m, &fields); for (const FieldDescriptor* field : fields) { + path.push_back(field->number()); if (field->options().retention() == FieldOptions::RETENTION_SOURCE) { reflection->ClearField(&m, field); + if (stripped_paths != nullptr) { + stripped_paths->push_back(path); + } } else if (field->type() == FieldDescriptor::TYPE_MESSAGE) { if (field->is_repeated()) { int field_size = reflection->FieldSize(m, field); for (int i = 0; i < field_size; ++i) { - StripMessage(*reflection->MutableRepeatedMessage(&m, field, i)); + path.push_back(i); + StripMessage(*reflection->MutableRepeatedMessage(&m, field, i), path, + stripped_paths); + path.pop_back(); } } else { - StripMessage(*reflection->MutableMessage(&m, field)); + StripMessage(*reflection->MutableMessage(&m, field), path, + stripped_paths); } } + path.pop_back(); } } @@ -72,18 +90,23 @@ void StripMessage(Message& m) { // Using a dynamic message allows us to see these custom options. To convert // back and forth between the generated type and the dynamic message, we have // to serialize one and parse that into the other. -void ConvertToDynamicMessageAndStripOptions(Message& m, - const DescriptorPool& pool) { +// +// If stripped_paths is not null, it will be populated with the paths that were +// stripped, using the path format from SourceCodeInfo.Location. +void ConvertToDynamicMessageAndStripOptions( + Message& m, const DescriptorPool& pool, + std::vector>* stripped_paths = nullptr) { // We need to look up the descriptor in the pool so that we can get a // descriptor which knows about any custom options that were used in the // .proto file. const Descriptor* descriptor = pool.FindMessageTypeByName(m.GetTypeName()); + std::vector path; if (descriptor == nullptr) { // If the pool does not contain the descriptor, then this proto file does // not transitively depend on descriptor.proto, in which case we know there // are no custom options to worry about. - StripMessage(m); + StripMessage(m, path, stripped_paths); } else { DynamicMessageFactory factory; std::unique_ptr dynamic_message( @@ -91,7 +114,7 @@ void ConvertToDynamicMessageAndStripOptions(Message& m, std::string serialized; ABSL_CHECK(m.SerializeToString(&serialized)); ABSL_CHECK(dynamic_message->ParseFromString(serialized)); - StripMessage(*dynamic_message); + StripMessage(*dynamic_message, path, stripped_paths); ABSL_CHECK(dynamic_message->SerializeToString(&serialized)); ABSL_CHECK(m.ParseFromString(serialized)); } @@ -118,12 +141,72 @@ auto StripLocalOptions(const DescriptorType& descriptor) { return options; } +// Returns true if x is a prefix of y. +bool IsPrefix(absl::Span x, absl::Span y) { + return x == y.subspan(0, x.size()); +} + +// Strips the paths in stripped_paths from the SourceCodeInfo. +void StripSourceCodeInfo(std::vector>& stripped_paths, + SourceCodeInfo& source_code_info) { + RepeatedPtrField* locations = + source_code_info.mutable_location(); + + // We sort the locations lexicographically by their paths and include an + // index pointing back to the original location. + std::vector, int>> sorted_locations; + sorted_locations.reserve(locations->size()); + for (int i = 0; i < locations->size(); ++i) { + sorted_locations.emplace_back((*locations)[i].path(), i); + } + absl::c_sort(sorted_locations); + absl::c_sort(stripped_paths); + + // With both arrays sorted, we can efficiently step through them in tandem. + // If a stripped path is a prefix of any location, then that is a location + // we need to delete from the SourceCodeInfo. + absl::flat_hash_set indices_to_delete; + auto i = stripped_paths.cbegin(); + auto j = sorted_locations.cbegin(); + while (i != stripped_paths.cend() && j != sorted_locations.cend()) { + if (IsPrefix(*i, j->first)) { + indices_to_delete.insert(j->second); + ++j; + } else if (*i < j->first) { + ++i; + } else { + ++j; + } + } + + // We delete the locations in descending order to avoid invalidating + // indices. + std::vector old_locations; + old_locations.resize(locations->size()); + locations->ExtractSubrange(0, locations->size(), old_locations.data()); + locations->Reserve(old_locations.size() - indices_to_delete.size()); + for (int i = 0; i < old_locations.size(); ++i) { + if (indices_to_delete.contains(i)) { + delete old_locations[i]; + } else { + locations->AddAllocated(old_locations[i]); + } + } +} + } // namespace -FileDescriptorProto StripSourceRetentionOptions(const FileDescriptor& file) { +FileDescriptorProto StripSourceRetentionOptions(const FileDescriptor& file, + bool include_source_code_info) { FileDescriptorProto file_proto; file.CopyTo(&file_proto); - ConvertToDynamicMessageAndStripOptions(file_proto, *file.pool()); + std::vector> stripped_paths; + ConvertToDynamicMessageAndStripOptions(file_proto, *file.pool(), + &stripped_paths); + if (include_source_code_info) { + file.CopySourceCodeInfoTo(&file_proto); + StripSourceCodeInfo(stripped_paths, *file_proto.mutable_source_code_info()); + } return file_proto; } diff --git a/src/google/protobuf/compiler/retention.h b/src/google/protobuf/compiler/retention.h index 0579b067e7..354f5f2126 100644 --- a/src/google/protobuf/compiler/retention.h +++ b/src/google/protobuf/compiler/retention.h @@ -42,9 +42,11 @@ namespace protobuf { namespace compiler { // Returns a FileDescriptorProto for this file, with all RETENTION_SOURCE -// options stripped out. -PROTOC_EXPORT FileDescriptorProto -StripSourceRetentionOptions(const FileDescriptor& file); +// options stripped out. If include_source_code_info is true, this function +// will also populate the source code info but strip out the parts of it +// corresponding to source-retention options. +PROTOC_EXPORT FileDescriptorProto StripSourceRetentionOptions( + const FileDescriptor& file, bool include_source_code_info = false); // The following functions take a descriptor and strip all source-retention // options from just the local entity (e.g. message, enum, field). Most code diff --git a/src/google/protobuf/retention_test.cc b/src/google/protobuf/retention_test.cc index 2b582d8fc3..cf92ed4bff 100644 --- a/src/google/protobuf/retention_test.cc +++ b/src/google/protobuf/retention_test.cc @@ -264,6 +264,66 @@ TEST(RetentionTest, StripSourceRetentionOptions) { expected_options)); } +TEST(RetentionTest, StripSourceRetentionOptionsWithSourceCodeInfo) { + // The tests above make assertions against the generated code, but this test + // case directly examines the result of the StripSourceRetentionOptions() + // function instead. + std::string proto_file = + absl::Substitute(R"( + syntax = "proto2"; + + package google.protobuf.internal; + + import "$0"; + + option (source_retention_option) = 123; + option (options) = { + i1: 123 + i2: 456 + c { s: "abc" } + rc { s: "abc" } + }; + option (repeated_options) = { + i1: 111 i2: 222 + }; + + message Options { + optional int32 i1 = 1 [retention = RETENTION_SOURCE]; + optional int32 i2 = 2; + message ChildMessage { + optional string s = 1 [retention = RETENTION_SOURCE]; + } + optional ChildMessage c = 3; + repeated ChildMessage rc = 4; + } + + extend google.protobuf.FileOptions { + optional int32 source_retention_option = 50000 [retention = RETENTION_SOURCE]; + optional Options options = 50001; + repeated Options repeated_options = 50002; + })", + FileDescriptorSet::descriptor()->file()->name()); + io::ArrayInputStream input_stream(proto_file.data(), + static_cast(proto_file.size())); + io::ErrorCollector error_collector; + io::Tokenizer tokenizer(&input_stream, &error_collector); + compiler::Parser parser; + FileDescriptorProto file_descriptor; + ASSERT_TRUE(parser.Parse(&tokenizer, &file_descriptor)); + file_descriptor.set_name("retention.proto"); + + DescriptorPool pool; + FileDescriptorProto descriptor_proto_descriptor; + FileDescriptorSet::descriptor()->file()->CopyTo(&descriptor_proto_descriptor); + pool.BuildFile(descriptor_proto_descriptor); + pool.BuildFile(file_descriptor); + + FileDescriptorProto stripped_file = compiler::StripSourceRetentionOptions( + *pool.FindFileByName("retention.proto"), + /*include_source_code_info=*/true); + EXPECT_EQ(stripped_file.source_code_info().location_size(), 63); +} + } // namespace } // namespace internal } // namespace protobuf