Update source code info when stripping source-retention options

When we invoke protoc with `--include_source_info --descriptor_set_out`, we
currently strip out source-retention options but without doing the
corresponding stripping of the source code info. This CL fixes that problem by
making sure to strip the same entities from source code info that we stripped
from the main part of the descriptors.

PiperOrigin-RevId: 519831383
pull/12346/head
Mike Kruskal 2 years ago committed by Copybara-Service
parent 0549e52188
commit dff36c7d02
  1. 2
      src/google/protobuf/compiler/BUILD.bazel
  2. 6
      src/google/protobuf/compiler/command_line_interface.cc
  3. 1
      src/google/protobuf/compiler/command_line_interface_unittest.cc
  4. 103
      src/google/protobuf/compiler/retention.cc
  5. 8
      src/google/protobuf/compiler/retention.h
  6. 60
      src/google/protobuf/retention_test.cc

@ -328,6 +328,8 @@ cc_library(
visibility = ["//src/google/protobuf:__subpackages__"],
deps = [
"//src/google/protobuf:protobuf_nowkt",
"@com_google_absl//absl/types:span",
"@com_google_absl//absl/container:flat_hash_set",
],
)

@ -2635,13 +2635,11 @@ void CommandLineInterface::GetTransitiveDependencies(
// Add this file.
FileDescriptorProto* new_descriptor = output->Add();
*new_descriptor = StripSourceRetentionOptions(*file);
*new_descriptor =
StripSourceRetentionOptions(*file, include_source_code_info);
if (include_json_name) {
file->CopyJsonNameTo(new_descriptor);
}
if (include_source_code_info) {
file->CopySourceCodeInfoTo(new_descriptor);
}
}
const CommandLineInterface::GeneratorInfo*

@ -38,6 +38,7 @@
#include <cstdint>
#include <gmock/gmock.h>
#include "absl/log/absl_check.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/str_format.h"

@ -30,10 +30,14 @@
#include "google/protobuf/compiler/retention.h"
#include <algorithm>
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include "absl/container/flat_hash_set.h"
#include "absl/types/span.h"
#include "google/protobuf/descriptor.h"
#include "google/protobuf/dynamic_message.h"
@ -43,24 +47,38 @@ namespace compiler {
namespace {
// Recursively strips any options with source retention from the message.
void StripMessage(Message& m) {
// Recursively strips any options with source retention from the message. If
// stripped_paths is not null, then this function will populate it with the
// paths that were stripped, using the path format from
// SourceCodeInfo.Location. The path parameter is used as a stack tracking the
// path to the current location.
void StripMessage(Message& m, std::vector<int>& path,
std::vector<std::vector<int>>* stripped_paths) {
const Reflection* reflection = m.GetReflection();
std::vector<const FieldDescriptor*> fields;
reflection->ListFields(m, &fields);
for (const FieldDescriptor* field : fields) {
path.push_back(field->number());
if (field->options().retention() == FieldOptions::RETENTION_SOURCE) {
reflection->ClearField(&m, field);
if (stripped_paths != nullptr) {
stripped_paths->push_back(path);
}
} else if (field->type() == FieldDescriptor::TYPE_MESSAGE) {
if (field->is_repeated()) {
int field_size = reflection->FieldSize(m, field);
for (int i = 0; i < field_size; ++i) {
StripMessage(*reflection->MutableRepeatedMessage(&m, field, i));
path.push_back(i);
StripMessage(*reflection->MutableRepeatedMessage(&m, field, i), path,
stripped_paths);
path.pop_back();
}
} else {
StripMessage(*reflection->MutableMessage(&m, field));
StripMessage(*reflection->MutableMessage(&m, field), path,
stripped_paths);
}
}
path.pop_back();
}
}
@ -72,18 +90,23 @@ void StripMessage(Message& m) {
// Using a dynamic message allows us to see these custom options. To convert
// back and forth between the generated type and the dynamic message, we have
// to serialize one and parse that into the other.
void ConvertToDynamicMessageAndStripOptions(Message& m,
const DescriptorPool& pool) {
//
// If stripped_paths is not null, it will be populated with the paths that were
// stripped, using the path format from SourceCodeInfo.Location.
void ConvertToDynamicMessageAndStripOptions(
Message& m, const DescriptorPool& pool,
std::vector<std::vector<int>>* stripped_paths = nullptr) {
// We need to look up the descriptor in the pool so that we can get a
// descriptor which knows about any custom options that were used in the
// .proto file.
const Descriptor* descriptor = pool.FindMessageTypeByName(m.GetTypeName());
std::vector<int> path;
if (descriptor == nullptr) {
// If the pool does not contain the descriptor, then this proto file does
// not transitively depend on descriptor.proto, in which case we know there
// are no custom options to worry about.
StripMessage(m);
StripMessage(m, path, stripped_paths);
} else {
DynamicMessageFactory factory;
std::unique_ptr<Message> dynamic_message(
@ -91,7 +114,7 @@ void ConvertToDynamicMessageAndStripOptions(Message& m,
std::string serialized;
ABSL_CHECK(m.SerializeToString(&serialized));
ABSL_CHECK(dynamic_message->ParseFromString(serialized));
StripMessage(*dynamic_message);
StripMessage(*dynamic_message, path, stripped_paths);
ABSL_CHECK(dynamic_message->SerializeToString(&serialized));
ABSL_CHECK(m.ParseFromString(serialized));
}
@ -118,12 +141,72 @@ auto StripLocalOptions(const DescriptorType& descriptor) {
return options;
}
// Returns true if x is a prefix of y.
bool IsPrefix(absl::Span<const int> x, absl::Span<const int> y) {
return x == y.subspan(0, x.size());
}
// Strips the paths in stripped_paths from the SourceCodeInfo.
void StripSourceCodeInfo(std::vector<std::vector<int>>& stripped_paths,
SourceCodeInfo& source_code_info) {
RepeatedPtrField<SourceCodeInfo::Location>* locations =
source_code_info.mutable_location();
// We sort the locations lexicographically by their paths and include an
// index pointing back to the original location.
std::vector<std::pair<absl::Span<const int>, int>> sorted_locations;
sorted_locations.reserve(locations->size());
for (int i = 0; i < locations->size(); ++i) {
sorted_locations.emplace_back((*locations)[i].path(), i);
}
absl::c_sort(sorted_locations);
absl::c_sort(stripped_paths);
// With both arrays sorted, we can efficiently step through them in tandem.
// If a stripped path is a prefix of any location, then that is a location
// we need to delete from the SourceCodeInfo.
absl::flat_hash_set<int> indices_to_delete;
auto i = stripped_paths.cbegin();
auto j = sorted_locations.cbegin();
while (i != stripped_paths.cend() && j != sorted_locations.cend()) {
if (IsPrefix(*i, j->first)) {
indices_to_delete.insert(j->second);
++j;
} else if (*i < j->first) {
++i;
} else {
++j;
}
}
// We delete the locations in descending order to avoid invalidating
// indices.
std::vector<SourceCodeInfo::Location*> old_locations;
old_locations.resize(locations->size());
locations->ExtractSubrange(0, locations->size(), old_locations.data());
locations->Reserve(old_locations.size() - indices_to_delete.size());
for (int i = 0; i < old_locations.size(); ++i) {
if (indices_to_delete.contains(i)) {
delete old_locations[i];
} else {
locations->AddAllocated(old_locations[i]);
}
}
}
} // namespace
FileDescriptorProto StripSourceRetentionOptions(const FileDescriptor& file) {
FileDescriptorProto StripSourceRetentionOptions(const FileDescriptor& file,
bool include_source_code_info) {
FileDescriptorProto file_proto;
file.CopyTo(&file_proto);
ConvertToDynamicMessageAndStripOptions(file_proto, *file.pool());
std::vector<std::vector<int>> stripped_paths;
ConvertToDynamicMessageAndStripOptions(file_proto, *file.pool(),
&stripped_paths);
if (include_source_code_info) {
file.CopySourceCodeInfoTo(&file_proto);
StripSourceCodeInfo(stripped_paths, *file_proto.mutable_source_code_info());
}
return file_proto;
}

@ -42,9 +42,11 @@ namespace protobuf {
namespace compiler {
// Returns a FileDescriptorProto for this file, with all RETENTION_SOURCE
// options stripped out.
PROTOC_EXPORT FileDescriptorProto
StripSourceRetentionOptions(const FileDescriptor& file);
// options stripped out. If include_source_code_info is true, this function
// will also populate the source code info but strip out the parts of it
// corresponding to source-retention options.
PROTOC_EXPORT FileDescriptorProto StripSourceRetentionOptions(
const FileDescriptor& file, bool include_source_code_info = false);
// The following functions take a descriptor and strip all source-retention
// options from just the local entity (e.g. message, enum, field). Most code

@ -264,6 +264,66 @@ TEST(RetentionTest, StripSourceRetentionOptions) {
expected_options));
}
TEST(RetentionTest, StripSourceRetentionOptionsWithSourceCodeInfo) {
// The tests above make assertions against the generated code, but this test
// case directly examines the result of the StripSourceRetentionOptions()
// function instead.
std::string proto_file =
absl::Substitute(R"(
syntax = "proto2";
package google.protobuf.internal;
import "$0";
option (source_retention_option) = 123;
option (options) = {
i1: 123
i2: 456
c { s: "abc" }
rc { s: "abc" }
};
option (repeated_options) = {
i1: 111 i2: 222
};
message Options {
optional int32 i1 = 1 [retention = RETENTION_SOURCE];
optional int32 i2 = 2;
message ChildMessage {
optional string s = 1 [retention = RETENTION_SOURCE];
}
optional ChildMessage c = 3;
repeated ChildMessage rc = 4;
}
extend google.protobuf.FileOptions {
optional int32 source_retention_option = 50000 [retention = RETENTION_SOURCE];
optional Options options = 50001;
repeated Options repeated_options = 50002;
})",
FileDescriptorSet::descriptor()->file()->name());
io::ArrayInputStream input_stream(proto_file.data(),
static_cast<int>(proto_file.size()));
io::ErrorCollector error_collector;
io::Tokenizer tokenizer(&input_stream, &error_collector);
compiler::Parser parser;
FileDescriptorProto file_descriptor;
ASSERT_TRUE(parser.Parse(&tokenizer, &file_descriptor));
file_descriptor.set_name("retention.proto");
DescriptorPool pool;
FileDescriptorProto descriptor_proto_descriptor;
FileDescriptorSet::descriptor()->file()->CopyTo(&descriptor_proto_descriptor);
pool.BuildFile(descriptor_proto_descriptor);
pool.BuildFile(file_descriptor);
FileDescriptorProto stripped_file = compiler::StripSourceRetentionOptions(
*pool.FindFileByName("retention.proto"),
/*include_source_code_info=*/true);
EXPECT_EQ(stripped_file.source_code_info().location_size(), 63);
}
} // namespace
} // namespace internal
} // namespace protobuf

Loading…
Cancel
Save