Update source code info when stripping source-retention options

When we invoke protoc with `--include_source_info --descriptor_set_out`, we
currently strip out source-retention options but without doing the
corresponding stripping of the source code info. This CL fixes that problem by
making sure to strip the same entities from source code info that we stripped
from the main part of the descriptors.

PiperOrigin-RevId: 519831383
pull/12346/head
Mike Kruskal 2 years ago committed by Copybara-Service
parent 0549e52188
commit dff36c7d02
  1. 2
      src/google/protobuf/compiler/BUILD.bazel
  2. 6
      src/google/protobuf/compiler/command_line_interface.cc
  3. 1
      src/google/protobuf/compiler/command_line_interface_unittest.cc
  4. 103
      src/google/protobuf/compiler/retention.cc
  5. 8
      src/google/protobuf/compiler/retention.h
  6. 60
      src/google/protobuf/retention_test.cc

@ -328,6 +328,8 @@ cc_library(
visibility = ["//src/google/protobuf:__subpackages__"], visibility = ["//src/google/protobuf:__subpackages__"],
deps = [ deps = [
"//src/google/protobuf:protobuf_nowkt", "//src/google/protobuf:protobuf_nowkt",
"@com_google_absl//absl/types:span",
"@com_google_absl//absl/container:flat_hash_set",
], ],
) )

@ -2635,13 +2635,11 @@ void CommandLineInterface::GetTransitiveDependencies(
// Add this file. // Add this file.
FileDescriptorProto* new_descriptor = output->Add(); FileDescriptorProto* new_descriptor = output->Add();
*new_descriptor = StripSourceRetentionOptions(*file); *new_descriptor =
StripSourceRetentionOptions(*file, include_source_code_info);
if (include_json_name) { if (include_json_name) {
file->CopyJsonNameTo(new_descriptor); file->CopyJsonNameTo(new_descriptor);
} }
if (include_source_code_info) {
file->CopySourceCodeInfoTo(new_descriptor);
}
} }
const CommandLineInterface::GeneratorInfo* const CommandLineInterface::GeneratorInfo*

@ -38,6 +38,7 @@
#include <cstdint> #include <cstdint>
#include <gmock/gmock.h>
#include "absl/log/absl_check.h" #include "absl/log/absl_check.h"
#include "absl/strings/str_cat.h" #include "absl/strings/str_cat.h"
#include "absl/strings/str_format.h" #include "absl/strings/str_format.h"

@ -30,10 +30,14 @@
#include "google/protobuf/compiler/retention.h" #include "google/protobuf/compiler/retention.h"
#include <algorithm>
#include <memory> #include <memory>
#include <string> #include <string>
#include <utility>
#include <vector> #include <vector>
#include "absl/container/flat_hash_set.h"
#include "absl/types/span.h"
#include "google/protobuf/descriptor.h" #include "google/protobuf/descriptor.h"
#include "google/protobuf/dynamic_message.h" #include "google/protobuf/dynamic_message.h"
@ -43,24 +47,38 @@ namespace compiler {
namespace { namespace {
// Recursively strips any options with source retention from the message. // Recursively strips any options with source retention from the message. If
void StripMessage(Message& m) { // stripped_paths is not null, then this function will populate it with the
// paths that were stripped, using the path format from
// SourceCodeInfo.Location. The path parameter is used as a stack tracking the
// path to the current location.
void StripMessage(Message& m, std::vector<int>& path,
std::vector<std::vector<int>>* stripped_paths) {
const Reflection* reflection = m.GetReflection(); const Reflection* reflection = m.GetReflection();
std::vector<const FieldDescriptor*> fields; std::vector<const FieldDescriptor*> fields;
reflection->ListFields(m, &fields); reflection->ListFields(m, &fields);
for (const FieldDescriptor* field : fields) { for (const FieldDescriptor* field : fields) {
path.push_back(field->number());
if (field->options().retention() == FieldOptions::RETENTION_SOURCE) { if (field->options().retention() == FieldOptions::RETENTION_SOURCE) {
reflection->ClearField(&m, field); reflection->ClearField(&m, field);
if (stripped_paths != nullptr) {
stripped_paths->push_back(path);
}
} else if (field->type() == FieldDescriptor::TYPE_MESSAGE) { } else if (field->type() == FieldDescriptor::TYPE_MESSAGE) {
if (field->is_repeated()) { if (field->is_repeated()) {
int field_size = reflection->FieldSize(m, field); int field_size = reflection->FieldSize(m, field);
for (int i = 0; i < field_size; ++i) { for (int i = 0; i < field_size; ++i) {
StripMessage(*reflection->MutableRepeatedMessage(&m, field, i)); path.push_back(i);
StripMessage(*reflection->MutableRepeatedMessage(&m, field, i), path,
stripped_paths);
path.pop_back();
} }
} else { } else {
StripMessage(*reflection->MutableMessage(&m, field)); StripMessage(*reflection->MutableMessage(&m, field), path,
stripped_paths);
} }
} }
path.pop_back();
} }
} }
@ -72,18 +90,23 @@ void StripMessage(Message& m) {
// Using a dynamic message allows us to see these custom options. To convert // Using a dynamic message allows us to see these custom options. To convert
// back and forth between the generated type and the dynamic message, we have // back and forth between the generated type and the dynamic message, we have
// to serialize one and parse that into the other. // to serialize one and parse that into the other.
void ConvertToDynamicMessageAndStripOptions(Message& m, //
const DescriptorPool& pool) { // If stripped_paths is not null, it will be populated with the paths that were
// stripped, using the path format from SourceCodeInfo.Location.
void ConvertToDynamicMessageAndStripOptions(
Message& m, const DescriptorPool& pool,
std::vector<std::vector<int>>* stripped_paths = nullptr) {
// We need to look up the descriptor in the pool so that we can get a // We need to look up the descriptor in the pool so that we can get a
// descriptor which knows about any custom options that were used in the // descriptor which knows about any custom options that were used in the
// .proto file. // .proto file.
const Descriptor* descriptor = pool.FindMessageTypeByName(m.GetTypeName()); const Descriptor* descriptor = pool.FindMessageTypeByName(m.GetTypeName());
std::vector<int> path;
if (descriptor == nullptr) { if (descriptor == nullptr) {
// If the pool does not contain the descriptor, then this proto file does // If the pool does not contain the descriptor, then this proto file does
// not transitively depend on descriptor.proto, in which case we know there // not transitively depend on descriptor.proto, in which case we know there
// are no custom options to worry about. // are no custom options to worry about.
StripMessage(m); StripMessage(m, path, stripped_paths);
} else { } else {
DynamicMessageFactory factory; DynamicMessageFactory factory;
std::unique_ptr<Message> dynamic_message( std::unique_ptr<Message> dynamic_message(
@ -91,7 +114,7 @@ void ConvertToDynamicMessageAndStripOptions(Message& m,
std::string serialized; std::string serialized;
ABSL_CHECK(m.SerializeToString(&serialized)); ABSL_CHECK(m.SerializeToString(&serialized));
ABSL_CHECK(dynamic_message->ParseFromString(serialized)); ABSL_CHECK(dynamic_message->ParseFromString(serialized));
StripMessage(*dynamic_message); StripMessage(*dynamic_message, path, stripped_paths);
ABSL_CHECK(dynamic_message->SerializeToString(&serialized)); ABSL_CHECK(dynamic_message->SerializeToString(&serialized));
ABSL_CHECK(m.ParseFromString(serialized)); ABSL_CHECK(m.ParseFromString(serialized));
} }
@ -118,12 +141,72 @@ auto StripLocalOptions(const DescriptorType& descriptor) {
return options; return options;
} }
// Returns true if x is a prefix of y.
bool IsPrefix(absl::Span<const int> x, absl::Span<const int> y) {
return x == y.subspan(0, x.size());
}
// Strips the paths in stripped_paths from the SourceCodeInfo.
void StripSourceCodeInfo(std::vector<std::vector<int>>& stripped_paths,
SourceCodeInfo& source_code_info) {
RepeatedPtrField<SourceCodeInfo::Location>* locations =
source_code_info.mutable_location();
// We sort the locations lexicographically by their paths and include an
// index pointing back to the original location.
std::vector<std::pair<absl::Span<const int>, int>> sorted_locations;
sorted_locations.reserve(locations->size());
for (int i = 0; i < locations->size(); ++i) {
sorted_locations.emplace_back((*locations)[i].path(), i);
}
absl::c_sort(sorted_locations);
absl::c_sort(stripped_paths);
// With both arrays sorted, we can efficiently step through them in tandem.
// If a stripped path is a prefix of any location, then that is a location
// we need to delete from the SourceCodeInfo.
absl::flat_hash_set<int> indices_to_delete;
auto i = stripped_paths.cbegin();
auto j = sorted_locations.cbegin();
while (i != stripped_paths.cend() && j != sorted_locations.cend()) {
if (IsPrefix(*i, j->first)) {
indices_to_delete.insert(j->second);
++j;
} else if (*i < j->first) {
++i;
} else {
++j;
}
}
// We delete the locations in descending order to avoid invalidating
// indices.
std::vector<SourceCodeInfo::Location*> old_locations;
old_locations.resize(locations->size());
locations->ExtractSubrange(0, locations->size(), old_locations.data());
locations->Reserve(old_locations.size() - indices_to_delete.size());
for (int i = 0; i < old_locations.size(); ++i) {
if (indices_to_delete.contains(i)) {
delete old_locations[i];
} else {
locations->AddAllocated(old_locations[i]);
}
}
}
} // namespace } // namespace
FileDescriptorProto StripSourceRetentionOptions(const FileDescriptor& file) { FileDescriptorProto StripSourceRetentionOptions(const FileDescriptor& file,
bool include_source_code_info) {
FileDescriptorProto file_proto; FileDescriptorProto file_proto;
file.CopyTo(&file_proto); file.CopyTo(&file_proto);
ConvertToDynamicMessageAndStripOptions(file_proto, *file.pool()); std::vector<std::vector<int>> stripped_paths;
ConvertToDynamicMessageAndStripOptions(file_proto, *file.pool(),
&stripped_paths);
if (include_source_code_info) {
file.CopySourceCodeInfoTo(&file_proto);
StripSourceCodeInfo(stripped_paths, *file_proto.mutable_source_code_info());
}
return file_proto; return file_proto;
} }

@ -42,9 +42,11 @@ namespace protobuf {
namespace compiler { namespace compiler {
// Returns a FileDescriptorProto for this file, with all RETENTION_SOURCE // Returns a FileDescriptorProto for this file, with all RETENTION_SOURCE
// options stripped out. // options stripped out. If include_source_code_info is true, this function
PROTOC_EXPORT FileDescriptorProto // will also populate the source code info but strip out the parts of it
StripSourceRetentionOptions(const FileDescriptor& file); // corresponding to source-retention options.
PROTOC_EXPORT FileDescriptorProto StripSourceRetentionOptions(
const FileDescriptor& file, bool include_source_code_info = false);
// The following functions take a descriptor and strip all source-retention // The following functions take a descriptor and strip all source-retention
// options from just the local entity (e.g. message, enum, field). Most code // options from just the local entity (e.g. message, enum, field). Most code

@ -264,6 +264,66 @@ TEST(RetentionTest, StripSourceRetentionOptions) {
expected_options)); expected_options));
} }
TEST(RetentionTest, StripSourceRetentionOptionsWithSourceCodeInfo) {
// The tests above make assertions against the generated code, but this test
// case directly examines the result of the StripSourceRetentionOptions()
// function instead.
std::string proto_file =
absl::Substitute(R"(
syntax = "proto2";
package google.protobuf.internal;
import "$0";
option (source_retention_option) = 123;
option (options) = {
i1: 123
i2: 456
c { s: "abc" }
rc { s: "abc" }
};
option (repeated_options) = {
i1: 111 i2: 222
};
message Options {
optional int32 i1 = 1 [retention = RETENTION_SOURCE];
optional int32 i2 = 2;
message ChildMessage {
optional string s = 1 [retention = RETENTION_SOURCE];
}
optional ChildMessage c = 3;
repeated ChildMessage rc = 4;
}
extend google.protobuf.FileOptions {
optional int32 source_retention_option = 50000 [retention = RETENTION_SOURCE];
optional Options options = 50001;
repeated Options repeated_options = 50002;
})",
FileDescriptorSet::descriptor()->file()->name());
io::ArrayInputStream input_stream(proto_file.data(),
static_cast<int>(proto_file.size()));
io::ErrorCollector error_collector;
io::Tokenizer tokenizer(&input_stream, &error_collector);
compiler::Parser parser;
FileDescriptorProto file_descriptor;
ASSERT_TRUE(parser.Parse(&tokenizer, &file_descriptor));
file_descriptor.set_name("retention.proto");
DescriptorPool pool;
FileDescriptorProto descriptor_proto_descriptor;
FileDescriptorSet::descriptor()->file()->CopyTo(&descriptor_proto_descriptor);
pool.BuildFile(descriptor_proto_descriptor);
pool.BuildFile(file_descriptor);
FileDescriptorProto stripped_file = compiler::StripSourceRetentionOptions(
*pool.FindFileByName("retention.proto"),
/*include_source_code_info=*/true);
EXPECT_EQ(stripped_file.source_code_info().location_size(), 63);
}
} // namespace } // namespace
} // namespace internal } // namespace internal
} // namespace protobuf } // namespace protobuf

Loading…
Cancel
Save