From 79ccb8fac990555b9c598eeeb1afff3ed7f90928 Mon Sep 17 00:00:00 2001 From: Tony Liao Date: Thu, 31 Oct 2024 15:18:31 -0700 Subject: [PATCH] Maintain an invariant that hasbit is set iff string is nondefault. As an optimization, string fields are initialized with a pointer to a global immutable std::string instance and create a local std::string only when "set". If a field has hasbits, it presents a possibility that the hasbit is set but the string field is still pointing to the global empty string instance. This can happen, for example, when the field is implicit-presence but hasbit has been generated for it. Maintaining an invariant that hasbit is set iff string is nondefault can simplify the implementation of destructors and message.Clear(). The code would not need to branch further after scanning hasbits, instead it can always assume that a local std::string object exists as soon as it sees that the hasbit is set. However, this does require an else block in the merge implementation of implicit-presence string fields. When hasbits are implemented for implicit-presence string fields, merging from a non-present (i.e. empty) string field requires a nondefault std::string instance to be created. On the other hand, branches in Clear() can be eliminated. We think this is the right tradeoff because: 1. The allocation of nondefault string instance can only happen when the source proto has hasbit set but the field is empty. This is a relatively rare scenario. 2. Clear() is called every time a protobuf object is "overwritten" via an assignment operator or ParseFrom(). This happens probably more frequently than 1. PiperOrigin-RevId: 691951661 --- .../cpp/field_generators/string_field.cc | 23 +----- src/google/protobuf/compiler/cpp/message.cc | 78 +++++++++++++++++-- 2 files changed, 74 insertions(+), 27 deletions(-) diff --git a/src/google/protobuf/compiler/cpp/field_generators/string_field.cc b/src/google/protobuf/compiler/cpp/field_generators/string_field.cc index bd1aa7f2b7..1fc3951d49 100644 --- a/src/google/protobuf/compiler/cpp/field_generators/string_field.cc +++ b/src/google/protobuf/compiler/cpp/field_generators/string_field.cc @@ -16,7 +16,6 @@ #include "absl/log/absl_check.h" #include "absl/memory/memory.h" #include "absl/strings/str_cat.h" -#include "absl/strings/string_view.h" #include "google/protobuf/compiler/cpp/field.h" #include "google/protobuf/compiler/cpp/field_generators/generators.h" #include "google/protobuf/compiler/cpp/helpers.h" @@ -24,15 +23,12 @@ #include "google/protobuf/descriptor.h" #include "google/protobuf/descriptor.pb.h" #include "google/protobuf/io/printer.h" -#include "google/protobuf/port.h" namespace google { namespace protobuf { namespace compiler { namespace cpp { namespace { -using ::google::protobuf::internal::cpp::GetFieldHasbitMode; -using ::google::protobuf::internal::cpp::HasbitMode; using ::google::protobuf::internal::cpp::HasHasbit; using ::google::protobuf::io::AnnotationCollector; using Sub = ::google::protobuf::io::Printer::Sub; @@ -537,22 +533,6 @@ void SingularString::GenerateClearingCode(io::Printer* p) const { )cc"); } -// Returns "ClearNonDefaultToEmpty" or "ClearToEmpty" depending on whether the -// field might still point to the default string instance. -absl::string_view GetClearFunctionForField(const FieldDescriptor* field) { - switch (GetFieldHasbitMode(field)) { - case HasbitMode::kNoHasbit: - case HasbitMode::kHintHasbit: - // TODO: b/376149315 - Would be nice to call ClearNonDefaultToEmpty for - // hint hasbits too. - return "ClearToEmpty"; - case HasbitMode::kTrueHasbit: - return "ClearNonDefaultToEmpty"; - default: - internal::Unreachable(); - } -} - void SingularString::GenerateMessageClearingCode(io::Printer* p) const { if (is_oneof()) { p->Emit(R"cc( @@ -593,7 +573,8 @@ void SingularString::GenerateMessageClearingCode(io::Printer* p) const { return; } - p->Emit({{"Clear", GetClearFunctionForField(field_)}}, + p->Emit({{"Clear", + HasHasbit(field_) ? "ClearNonDefaultToEmpty" : "ClearToEmpty"}}, R"cc( $field_$.$Clear$(); )cc"); diff --git a/src/google/protobuf/compiler/cpp/message.cc b/src/google/protobuf/compiler/cpp/message.cc index 903331515a..a7078256da 100644 --- a/src/google/protobuf/compiler/cpp/message.cc +++ b/src/google/protobuf/compiler/cpp/message.cc @@ -187,7 +187,7 @@ RunMap FindRuns(const std::vector& fields, return runs; } -void EmitNonDefaultCheck(io::Printer* p, const std::string& prefix, +void EmitNonDefaultCheck(io::Printer* p, absl::string_view prefix, const FieldDescriptor* field) { ABSL_CHECK(GetFieldHasbitMode(field) != HasbitMode::kTrueHasbit); ABSL_CHECK(!field->is_repeated()); @@ -224,6 +224,49 @@ bool ShouldEmitNonDefaultCheck(const FieldDescriptor* field) { return !field->is_repeated(); } +void EmitNonDefaultCheckForString(io::Printer* p, absl::string_view prefix, + const FieldDescriptor* field, bool split, + absl::AnyInvocable emit_body) { + ABSL_DCHECK(field->cpp_type() == FieldDescriptor::CPPTYPE_STRING); + ABSL_DCHECK(field->cpp_string_type() == + FieldDescriptor::CppStringType::kString); + p->Emit( + { + {"condition", [&] { EmitNonDefaultCheck(p, prefix, field); }}, + {"emit_body", [&] { emit_body(); }}, + {"set_empty_string", + [&] { + p->Emit( + { + {"prefix", prefix}, + {"name", FieldName(field)}, + {"field", FieldMemberName(field, split)}, + }, + // The merge semantic is "overwrite if present". This statement + // is emitted when hasbit is set and src proto field is + // nonpresent (i.e. an empty string). Now, the destination + // string can be either empty or nonempty. + // - If dst is empty and pointing to the default instance, + // allocate a new empty instance. + // - If dst is already pointing to a nondefault instance, + // do nothing. + // This will allow destructors and Clear() to be simpler. + R"cc( + if (_this->$field$.IsDefault()) { + _this->_internal_set_$name$(""); + } + )cc"); + }}, + }, + R"cc( + if ($condition$) { + $emit_body$; + } else { + $set_empty_string$; + } + )cc"); +} + // Emits an if-statement with a condition that evaluates to true if |field| is // considered non-default (will be sent over the wire), for message types // without true field presence. Should only be called if @@ -235,7 +278,7 @@ bool ShouldEmitNonDefaultCheck(const FieldDescriptor* field) { // } // If |with_enclosing_braces_always| is set to false, enclosing braces will not // be generated if nondefault check is not emitted. -void MayEmitIfNonDefaultCheck(io::Printer* p, const std::string& prefix, +void MayEmitIfNonDefaultCheck(io::Printer* p, absl::string_view prefix, const FieldDescriptor* field, absl::AnyInvocable emit_body, bool with_enclosing_braces_always) { @@ -279,6 +322,29 @@ void MayEmitIfNonDefaultCheck(io::Printer* p, const std::string& prefix, emit_body(); } +void MayEmitMutableIfNonDefaultCheck(io::Printer* p, absl::string_view prefix, + const FieldDescriptor* field, bool split, + absl::AnyInvocable emit_body, + bool with_enclosing_braces_always) { + if (ShouldEmitNonDefaultCheck(field)) { + if (field->cpp_type() == FieldDescriptor::CPPTYPE_STRING && + field->cpp_string_type() == FieldDescriptor::CppStringType::kString) { + // If a field is backed by std::string, when default initialized it will + // point to a global empty std::string instance. We prefer to spend some + // extra cycles here to create a local string instance in the else branch, + // so that we can get rid of a branch when Clear() is called (if we do + // this, Clear() can always assume string instance is nonglobal). + EmitNonDefaultCheckForString(p, prefix, field, split, + std::move(emit_body)); + return; + } + } + + // Fall back to the default implementation. + return MayEmitIfNonDefaultCheck(p, prefix, field, std::move(emit_body), + with_enclosing_braces_always); +} + bool HasInternalHasMethod(const FieldDescriptor* field) { return !HasHasbit(field) && field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE; @@ -4252,8 +4318,8 @@ void MessageGenerator::GenerateClassSpecificMergeImpl(io::Printer* p) { } else if (field->is_optional() && !HasHasbit(field)) { // Merge semantics without true field presence: primitive fields are // merged only if non-zero (numeric) or non-empty (string). - MayEmitIfNonDefaultCheck( - p, "from.", field, + MayEmitMutableIfNonDefaultCheck( + p, "from.", field, ShouldSplit(field, options_), /*emit_body=*/[&]() { generator.GenerateMergingCode(p); }, /*with_enclosing_braces_always=*/true); } else if (field->options().weak() || @@ -4279,8 +4345,8 @@ void MessageGenerator::GenerateClassSpecificMergeImpl(io::Printer* p) { if (GetFieldHasbitMode(field) == HasbitMode::kHintHasbit) { // Merge semantics without true field presence: primitive fields are // merged only if non-zero (numeric) or non-empty (string). - MayEmitIfNonDefaultCheck( - p, "from.", field, + MayEmitMutableIfNonDefaultCheck( + p, "from.", field, ShouldSplit(field, options_), /*emit_body=*/[&]() { generator.GenerateMergingCode(p); }, /*with_enclosing_braces_always=*/false); } else {