Change the way pinning and weak references are done in the Weak Descriptor

feature.
The existing solution does not work well in the face of shared libraries when
section merging can't be done via linker script.

The new solution has two separate modes of action:
 - When sections are not merged, we directly put the default instance pointer
 in the file_default_instances array.
 - When sections are merged, the file_default_instances array is all `nullptr`
 and we inject them at runtime via `InitWeakDefaults`. All the surviving
 default instances are in this section with extra information to be able to
 iterate the section.

PiperOrigin-RevId: 599558000
pull/15462/head
Protobuf Team Bot 1 year ago committed by Copybara-Service
parent adacf6d0e1
commit b161e2df00
  1. 143
      src/google/protobuf/compiler/cpp/file.cc
  2. 20
      src/google/protobuf/compiler/cpp/helpers.cc
  3. 34
      src/google/protobuf/compiler/cpp/helpers.h
  4. 18
      src/google/protobuf/compiler/cpp/message.cc
  5. 2
      src/google/protobuf/compiler/cpp/message.h
  6. 1
      src/google/protobuf/compiler/java/java_features.pb.cc
  7. 1
      src/google/protobuf/compiler/plugin.pb.cc
  8. 1
      src/google/protobuf/cpp_features.pb.cc
  9. 1
      src/google/protobuf/descriptor.pb.cc
  10. 28
      src/google/protobuf/generated_message_util.cc
  11. 8
      src/google/protobuf/generated_message_util.h

@ -617,6 +617,33 @@ void FileGenerator::GenerateSourceDefaultInstance(int idx, io::Printer* p) {
PROTOBUF_ATTRIBUTE_NO_DESTROY PROTOBUF_CONSTINIT$ dllexport_decl$
PROTOBUF_ATTRIBUTE_INIT_PRIORITY1 $type$ $name$;
)cc");
} else if (UsingImplicitWeakDescriptor(file_, options_)) {
p->Emit(
{
{"index", generator->index_in_file_messages()},
{"type", DefaultInstanceType(generator->descriptor(), options_)},
{"name", DefaultInstanceName(generator->descriptor(), options_)},
{"class", ClassName(generator->descriptor())},
{"section", WeakDefaultInstanceSection(
generator->descriptor(),
generator->index_in_file_messages(), options_)},
},
R"cc(
struct $type$ {
PROTOBUF_CONSTEXPR $type$() : _instance(::_pbi::ConstantInitialized{}) {}
~$type$() {}
//~ _instance must be the first member.
union {
$class$ _instance;
};
::_pbi::WeakDescriptorDefaultTail tail = {
file_default_instances + $index$, sizeof($type$)};
};
PROTOBUF_ATTRIBUTE_NO_DESTROY PROTOBUF_CONSTINIT$ dllexport_decl$
PROTOBUF_ATTRIBUTE_INIT_PRIORITY1 $type$ $name$
__attribute__((section("$section$")));
)cc");
} else {
p->Emit(
{
@ -735,6 +762,7 @@ void FileGenerator::GenerateInternalForwardDeclarations(
const CrossFileReferences& refs, io::Printer* p) {
{
NamespaceOpener ns(p);
for (auto instance : refs.weak_default_instances) {
ns.ChangeTo(Namespace(instance, options_));
@ -867,6 +895,50 @@ void FileGenerator::GenerateSource(io::Printer* p) {
GetCrossFileReferencesForFile(file_, &refs);
GenerateInternalForwardDeclarations(refs, p);
// When in weak descriptor mode, we generate the file_default_instances before
// the default instances.
if (UsingImplicitWeakDescriptor(file_, options_) &&
!message_generators_.empty()) {
p->Emit(
{
{"weak_defaults",
[&] {
for (auto& gen : message_generators_) {
p->Emit(
{
{"class", QualifiedClassName(gen->descriptor())},
{"section",
WeakDefaultInstanceSection(
gen->descriptor(), gen->index_in_file_messages(),
options_)},
},
R"cc(
extern const $class$ __start_$section$
__attribute__((weak));
)cc");
}
}},
{"defaults",
[&] {
for (auto& gen : message_generators_) {
p->Emit({{"section",
WeakDefaultInstanceSection(
gen->descriptor(), gen->index_in_file_messages(),
options_)}},
R"cc(
&__start_$section$,
)cc");
}
}},
},
R"cc(
$weak_defaults$;
static const ::_pb::Message* file_default_instances[] = {
$defaults$,
};
)cc");
}
if (IsAnyMessage(file_)) {
MuteWuninitialized(p);
}
@ -1004,8 +1076,6 @@ void FileGenerator::GenerateReflectionInitializationCode(io::Printer* p) {
if (!message_generators_.empty()) {
std::vector<std::pair<size_t, size_t>> offsets;
offsets.reserve(message_generators_.size());
bool has_implicit_weak_descriptors =
UsingImplicitWeakDescriptor(file_, options_);
p->Emit(
{
@ -1024,50 +1094,6 @@ void FileGenerator::GenerateReflectionInitializationCode(io::Printer* p) {
offset += offsets[i].first;
}
}},
{"weak_defaults",
[&] {
if (!has_implicit_weak_descriptors) return;
int index = 0;
for (auto& gen : message_generators_) {
p->Emit(
{
{"index", index++},
{"ns", Namespace(gen->descriptor(), options_)},
{"class", ClassName(gen->descriptor())},
{"section", WeakDefaultWriterSection(gen->descriptor(),
options_)},
},
R"cc(
constexpr ::_pbi::WeakDefaultWriter pb_$index$_weak_
__attribute__((__nodebug__))
__attribute__((section("$section$"))) = {
file_default_instances + $index$,
&$ns$::_$class$_default_instance_._instance};
)cc");
}
}},
{"defaults",
[&] {
for (auto& gen : message_generators_) {
if (has_implicit_weak_descriptors) {
p->Emit(R"cc(
nullptr,
)cc");
} else {
p->Emit(
{
{"ns", Namespace(gen->descriptor(), options_)},
{"class", ClassName(gen->descriptor())},
},
R"cc(
&$ns$::_$class$_default_instance_._instance,
)cc");
}
}
}},
// When we have implicit weak descriptors we make the array mutable
// for dynamic initialization.
{"const", has_implicit_weak_descriptors ? "" : "const"},
},
R"cc(
const ::uint32_t
@ -1080,12 +1106,27 @@ void FileGenerator::GenerateReflectionInitializationCode(io::Printer* p) {
schemas[] ABSL_ATTRIBUTE_SECTION_VARIABLE(protodesc_cold) = {
$schemas$,
};
static const ::_pb::Message* $const $file_default_instances[] = {
$defaults$,
};
$weak_defaults$;
)cc");
if (!UsingImplicitWeakDescriptor(file_, options_)) {
p->Emit({{"defaults",
[&] {
for (auto& gen : message_generators_) {
p->Emit(
{
{"ns", Namespace(gen->descriptor(), options_)},
{"class", ClassName(gen->descriptor())},
},
R"cc(
&$ns$::_$class$_default_instance_._instance,
)cc");
}
}}},
R"cc(
static const ::_pb::Message* const file_default_instances[] = {
$defaults$,
};
)cc");
}
} else {
// Ee still need these symbols to exist.
//

@ -1488,22 +1488,18 @@ bool UsingImplicitWeakDescriptor(const FileDescriptor* file,
!options.opensource_runtime;
}
std::string WeakDefaultWriterSection(const Descriptor* descriptor,
const Options& options) {
std::string WeakDefaultInstanceSection(const Descriptor* descriptor,
int index_in_file_messages,
const Options& options) {
const auto* file = descriptor->file();
// To make a compact name we use the index of the object in its parent instead
// of its name, recursively until we reach the root.
// So the name could be `pb_def_1_2_1_0_HASH` instead of
// To make a compact name we use the index of the object in its file
// of its name.
// So the name could be `pb_def_3_HASH` instead of
// `pd_def_VeryLongClassName_WithNesting_AndMoreNames_HASH`
// We need a know common prefix to merge the sections later on.
std::string prefix = "pb_def";
do {
absl::StrAppend(&prefix, "_", descriptor->index());
descriptor = descriptor->containing_type();
} while (descriptor != nullptr);
return UniqueName(prefix, file, options);
return UniqueName(absl::StrCat("pb_def_", index_in_file_messages), file,
options);
}
bool UsingImplicitWeakFields(const FileDescriptor* file,

@ -741,10 +741,16 @@ void ListAllTypesForServices(const FileDescriptor* fd,
// decoupling the messages from the TU-wide `file_default_instances` array.
// This way there are no static initializers in the TU pointing to any part of
// the generated classes and they can be GC'd by the linker.
// Instead, we inject the surviving messages by having `WeakDefaultWriter`
// objects in a special `pb_defaults` section. The runtime will iterate this
// section to see the list of all live objects and put them back into the
// `file_default_instances` array.
// Instead of direct use, we have two ways to weakly refer to the default
// instances:
// - Each default instance is located on its own section, and we use a
// `&__start_section_name` pointer to access it. This is a reference that
// allows GC to happen. This step is used with dynamic linking.
// - We also allow merging all these sections at link time into the
// `pb_defaults` section. All surviving messages will be injected back into
// the `file_default_instances` when the runtime is initialized. This is
// useful when doing static linking and you want to avoid having an unbounded
// number of sections.
//
// Any object that gets GC'd will have a `nullptr` in the respective slot in the
// `file_default_instances` array. The runtime will recognize this and will
@ -758,12 +764,10 @@ void ListAllTypesForServices(const FileDescriptor* fd,
// friends.
//
// A "pin" is adding dependency edge in the graph for the GC.
// The `WeakDefaultWriter`, the default instance, and vtable of a message all
// pin each other. If anyone lives, they all do. This is important.
// The `WeakDefaultWriter` pins the default instance of the message by using it.
// The default instance of the message pins the vtable trivially by using it.
// The vtable pins the `WeakDefaultWriter` by having a StrongPointer into it
// from any of the virtual functions.
// The default instance and vtable of a message pin each other. If any one
// lives, they both do. This is important. The default instance of the message
// pins the vtable trivially by using it. The vtable pins the default instance
// by having a StrongPointer into it from any of the virtual functions.
//
// All parent messages pin their children.
// SPEED messages do this implicitly via the TcParseTable, which contain
@ -786,11 +790,11 @@ void ListAllTypesForServices(const FileDescriptor* fd,
bool UsingImplicitWeakDescriptor(const FileDescriptor* file,
const Options& options);
// Section name to be used for the DefaultWriter object for implicit weak
// descriptor objects.
// See `UsingImplicitWeakDescriptor` above.
std::string WeakDefaultWriterSection(const Descriptor* descriptor,
const Options& options);
// Section name to be used for the default instance for implicit weak descriptor
// objects. See `UsingImplicitWeakDescriptor` above.
std::string WeakDefaultInstanceSection(const Descriptor* descriptor,
int index_in_file_messages,
const Options& options);
// Indicates whether we should use implicit weak fields for this file.
bool UsingImplicitWeakFields(const FileDescriptor* file,

@ -2162,12 +2162,12 @@ void MessageGenerator::GenerateClassMethods(io::Printer* p) {
auto t = p->WithVars(MakeTrackerCalls(descriptor_, options_));
Formatter format(p);
const auto pin_weak_writer = [&] {
const auto pin_weak_descriptor = [&] {
if (!UsingImplicitWeakDescriptor(descriptor_->file(), options_)) return;
p->Emit({{"index", index_in_file_messages_}},
R"cc(
::_pbi::StrongPointer(&pb_$index$_weak_);
)cc");
p->Emit(
R"cc(
::_pbi::StrongPointer(&_$classname$_default_instance_);
)cc");
// For CODE_SIZE types, we need to pin the submessages too.
// SPEED types will pin them via the TcParse table automatically.
@ -2194,12 +2194,12 @@ void MessageGenerator::GenerateClassMethods(io::Printer* p) {
if (HasDescriptorMethods(descriptor_->file(), options_)) {
p->Emit(
{
{"pin_weak_writer", pin_weak_writer},
{"pin_weak_descriptor", pin_weak_descriptor},
{"index", index_in_file_messages_},
},
R"cc(
::$proto_ns$::Metadata $classname$::GetMetadata() const {
$pin_weak_writer$;
$pin_weak_descriptor$;
return ::_pbi::AssignDescriptors(&$desc_table$_getter,
&$desc_table$_once,
$file_level_metadata$[$index$]);
@ -2329,13 +2329,13 @@ void MessageGenerator::GenerateClassMethods(io::Printer* p) {
if (HasDescriptorMethods(descriptor_->file(), options_)) {
p->Emit(
{
{"pin_weak_writer", pin_weak_writer},
{"pin_weak_descriptor", pin_weak_descriptor},
{"index", index_in_file_messages_},
},
R"cc(
::$proto_ns$::Metadata $classname$::GetMetadata() const {
$annotate_reflection$;
$pin_weak_writer$;
$pin_weak_descriptor$;
return ::_pbi::AssignDescriptors(&$desc_table$_getter,
&$desc_table$_once,
$file_level_metadata$[$index$]);

@ -48,6 +48,8 @@ class MessageGenerator {
~MessageGenerator() = default;
int index_in_file_messages() const { return index_in_file_messages_; }
// Append the two types of nested generators to the corresponding vector.
void AddGenerators(
std::vector<std::unique_ptr<EnumGenerator>>* enum_generators,

@ -68,7 +68,6 @@ static const ::_pbi::MigrationSchema
schemas[] ABSL_ATTRIBUTE_SECTION_VARIABLE(protodesc_cold) = {
{0, 10, -1, sizeof(::pb::JavaFeatures)},
};
static const ::_pb::Message* const file_default_instances[] = {
&::pb::_JavaFeatures_default_instance_._instance,
};

@ -213,7 +213,6 @@ static const ::_pbi::MigrationSchema
{34, 46, -1, sizeof(::google::protobuf::compiler::CodeGeneratorResponse_File)},
{50, 63, -1, sizeof(::google::protobuf::compiler::CodeGeneratorResponse)},
};
static const ::_pb::Message* const file_default_instances[] = {
&::google::protobuf::compiler::_Version_default_instance_._instance,
&::google::protobuf::compiler::_CodeGeneratorRequest_default_instance_._instance,

@ -66,7 +66,6 @@ static const ::_pbi::MigrationSchema
schemas[] ABSL_ATTRIBUTE_SECTION_VARIABLE(protodesc_cold) = {
{0, 9, -1, sizeof(::pb::CppFeatures)},
};
static const ::_pb::Message* const file_default_instances[] = {
&::pb::_CppFeatures_default_instance_._instance,
};

@ -1622,7 +1622,6 @@ static const ::_pbi::MigrationSchema
{552, 565, -1, sizeof(::google::protobuf::GeneratedCodeInfo_Annotation)},
{570, -1, -1, sizeof(::google::protobuf::GeneratedCodeInfo)},
};
static const ::_pb::Message* const file_default_instances[] = {
&::google::protobuf::_FileDescriptorSet_default_instance_._instance,
&::google::protobuf::_FileDescriptorProto_default_instance_._instance,

@ -12,8 +12,8 @@
#include "google/protobuf/generated_message_util.h"
#include <atomic>
#include <cstdint>
#include <limits>
#include <vector>
#include "google/protobuf/arenastring.h"
#include "google/protobuf/extension_set.h"
@ -51,19 +51,25 @@ PROTOBUF_ATTRIBUTE_NO_DESTROY PROTOBUF_CONSTINIT const EmptyCord empty_cord_;
#if defined(PROTOBUF_DESCRIPTOR_WEAK_MESSAGES_ALLOWED)
extern "C" {
// We add a single dummy writer to guarantee the section is never empty.
WeakDefaultWriter dummy_writer
__attribute__((section("pb_defaults"))) = {&dummy_writer.source, nullptr};
// When using --descriptor_implicit_weak_messages we expect the writer objects
// to live in the `pb_defaults` section. We load them all using the
// When using --descriptor_implicit_weak_messages we expect the default instance
// objects to live in the `pb_defaults` section. We load them all using the
// __start/__end symbols provided by the linker.
extern const WeakDefaultWriter __start_pb_defaults;
extern const WeakDefaultWriter __stop_pb_defaults;
// Each object is its own type and size, so we use a `char` to load them
// appropriately. These are weak because the section might not exist at all.
__attribute__((weak)) extern const char __start_pb_defaults;
__attribute__((weak)) extern const char __stop_pb_defaults;
}
static void InitWeakDefaults() {
StrongPointer(&dummy_writer); // force link the dummy writer.
for (auto it = &__start_pb_defaults; it != &__stop_pb_defaults; ++it) {
*it->destination = it->source;
// We don't know the size of each object, but we know the layout of the tail.
// It contains a WeakDescriptorDefaultTail object.
// As such, we iterate the section backwards.
const char* start = &__start_pb_defaults;
const char* end = &__stop_pb_defaults;
while (start != end) {
auto* tail = reinterpret_cast<const WeakDescriptorDefaultTail*>(end) - 1;
end -= tail->size;
const Message* instance = reinterpret_cast<const Message*>(end);
*tail->target = instance;
}
}
#else

@ -316,11 +316,9 @@ class MapSorterPtr {
std::unique_ptr<storage_type[]> items_;
};
// Single message link for implicit weak descriptor messages.
// The runtime will register all the instances that are linked in.
struct WeakDefaultWriter {
const Message** destination;
const Message* source;
struct WeakDescriptorDefaultTail {
const Message** target;
size_t size;
};
} // namespace internal

Loading…
Cancel
Save