Maintain an invariant that hasbit is set iff string is nondefault.

As an optimization, string fields are initialized with a pointer to a global
immutable std::string instance and create a local std::string only when
"set". If a field has hasbits, it presents a possibility that the hasbit is set
but the string field is still pointing to the global empty string instance.
This can happen, for example, when the field is implicit-presence but hasbit
has been generated for it.

Maintaining an invariant that hasbit is set iff string is nondefault can
simplify the implementation of destructors and message.Clear(). The code would
not need to branch further after scanning hasbits, instead it can always assume
that a local std::string object exists as soon as it sees that the hasbit is
set.

However, this does require an else block in the merge implementation of
implicit-presence string fields. When hasbits are implemented for
implicit-presence string fields, merging from a non-present (i.e. empty) string
field requires a nondefault std::string instance to be created. On the other
hand, branches in Clear() can be eliminated. We think this is the right
tradeoff because:

1. The allocation of nondefault string instance can only happen when the source
   proto has hasbit set but the field is empty. This is a relatively rare
   scenario.
2. Clear() is called every time a protobuf object is "overwritten" via an
   assignment operator or ParseFrom(). This happens probably more frequently than 1.

PiperOrigin-RevId: 691951661
pull/19052/head
Tony Liao 5 months ago committed by Copybara-Service
parent 7bfe237b45
commit 79ccb8fac9
  1. 23
      src/google/protobuf/compiler/cpp/field_generators/string_field.cc
  2. 78
      src/google/protobuf/compiler/cpp/message.cc

@ -16,7 +16,6 @@
#include "absl/log/absl_check.h"
#include "absl/memory/memory.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "google/protobuf/compiler/cpp/field.h"
#include "google/protobuf/compiler/cpp/field_generators/generators.h"
#include "google/protobuf/compiler/cpp/helpers.h"
@ -24,15 +23,12 @@
#include "google/protobuf/descriptor.h"
#include "google/protobuf/descriptor.pb.h"
#include "google/protobuf/io/printer.h"
#include "google/protobuf/port.h"
namespace google {
namespace protobuf {
namespace compiler {
namespace cpp {
namespace {
using ::google::protobuf::internal::cpp::GetFieldHasbitMode;
using ::google::protobuf::internal::cpp::HasbitMode;
using ::google::protobuf::internal::cpp::HasHasbit;
using ::google::protobuf::io::AnnotationCollector;
using Sub = ::google::protobuf::io::Printer::Sub;
@ -537,22 +533,6 @@ void SingularString::GenerateClearingCode(io::Printer* p) const {
)cc");
}
// Returns "ClearNonDefaultToEmpty" or "ClearToEmpty" depending on whether the
// field might still point to the default string instance.
absl::string_view GetClearFunctionForField(const FieldDescriptor* field) {
switch (GetFieldHasbitMode(field)) {
case HasbitMode::kNoHasbit:
case HasbitMode::kHintHasbit:
// TODO: b/376149315 - Would be nice to call ClearNonDefaultToEmpty for
// hint hasbits too.
return "ClearToEmpty";
case HasbitMode::kTrueHasbit:
return "ClearNonDefaultToEmpty";
default:
internal::Unreachable();
}
}
void SingularString::GenerateMessageClearingCode(io::Printer* p) const {
if (is_oneof()) {
p->Emit(R"cc(
@ -593,7 +573,8 @@ void SingularString::GenerateMessageClearingCode(io::Printer* p) const {
return;
}
p->Emit({{"Clear", GetClearFunctionForField(field_)}},
p->Emit({{"Clear",
HasHasbit(field_) ? "ClearNonDefaultToEmpty" : "ClearToEmpty"}},
R"cc(
$field_$.$Clear$();
)cc");

@ -187,7 +187,7 @@ RunMap FindRuns(const std::vector<const FieldDescriptor*>& fields,
return runs;
}
void EmitNonDefaultCheck(io::Printer* p, const std::string& prefix,
void EmitNonDefaultCheck(io::Printer* p, absl::string_view prefix,
const FieldDescriptor* field) {
ABSL_CHECK(GetFieldHasbitMode(field) != HasbitMode::kTrueHasbit);
ABSL_CHECK(!field->is_repeated());
@ -224,6 +224,49 @@ bool ShouldEmitNonDefaultCheck(const FieldDescriptor* field) {
return !field->is_repeated();
}
void EmitNonDefaultCheckForString(io::Printer* p, absl::string_view prefix,
const FieldDescriptor* field, bool split,
absl::AnyInvocable<void()> emit_body) {
ABSL_DCHECK(field->cpp_type() == FieldDescriptor::CPPTYPE_STRING);
ABSL_DCHECK(field->cpp_string_type() ==
FieldDescriptor::CppStringType::kString);
p->Emit(
{
{"condition", [&] { EmitNonDefaultCheck(p, prefix, field); }},
{"emit_body", [&] { emit_body(); }},
{"set_empty_string",
[&] {
p->Emit(
{
{"prefix", prefix},
{"name", FieldName(field)},
{"field", FieldMemberName(field, split)},
},
// The merge semantic is "overwrite if present". This statement
// is emitted when hasbit is set and src proto field is
// nonpresent (i.e. an empty string). Now, the destination
// string can be either empty or nonempty.
// - If dst is empty and pointing to the default instance,
// allocate a new empty instance.
// - If dst is already pointing to a nondefault instance,
// do nothing.
// This will allow destructors and Clear() to be simpler.
R"cc(
if (_this->$field$.IsDefault()) {
_this->_internal_set_$name$("");
}
)cc");
}},
},
R"cc(
if ($condition$) {
$emit_body$;
} else {
$set_empty_string$;
}
)cc");
}
// Emits an if-statement with a condition that evaluates to true if |field| is
// considered non-default (will be sent over the wire), for message types
// without true field presence. Should only be called if
@ -235,7 +278,7 @@ bool ShouldEmitNonDefaultCheck(const FieldDescriptor* field) {
// }
// If |with_enclosing_braces_always| is set to false, enclosing braces will not
// be generated if nondefault check is not emitted.
void MayEmitIfNonDefaultCheck(io::Printer* p, const std::string& prefix,
void MayEmitIfNonDefaultCheck(io::Printer* p, absl::string_view prefix,
const FieldDescriptor* field,
absl::AnyInvocable<void()> emit_body,
bool with_enclosing_braces_always) {
@ -279,6 +322,29 @@ void MayEmitIfNonDefaultCheck(io::Printer* p, const std::string& prefix,
emit_body();
}
void MayEmitMutableIfNonDefaultCheck(io::Printer* p, absl::string_view prefix,
const FieldDescriptor* field, bool split,
absl::AnyInvocable<void()> emit_body,
bool with_enclosing_braces_always) {
if (ShouldEmitNonDefaultCheck(field)) {
if (field->cpp_type() == FieldDescriptor::CPPTYPE_STRING &&
field->cpp_string_type() == FieldDescriptor::CppStringType::kString) {
// If a field is backed by std::string, when default initialized it will
// point to a global empty std::string instance. We prefer to spend some
// extra cycles here to create a local string instance in the else branch,
// so that we can get rid of a branch when Clear() is called (if we do
// this, Clear() can always assume string instance is nonglobal).
EmitNonDefaultCheckForString(p, prefix, field, split,
std::move(emit_body));
return;
}
}
// Fall back to the default implementation.
return MayEmitIfNonDefaultCheck(p, prefix, field, std::move(emit_body),
with_enclosing_braces_always);
}
bool HasInternalHasMethod(const FieldDescriptor* field) {
return !HasHasbit(field) &&
field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE;
@ -4252,8 +4318,8 @@ void MessageGenerator::GenerateClassSpecificMergeImpl(io::Printer* p) {
} else if (field->is_optional() && !HasHasbit(field)) {
// Merge semantics without true field presence: primitive fields are
// merged only if non-zero (numeric) or non-empty (string).
MayEmitIfNonDefaultCheck(
p, "from.", field,
MayEmitMutableIfNonDefaultCheck(
p, "from.", field, ShouldSplit(field, options_),
/*emit_body=*/[&]() { generator.GenerateMergingCode(p); },
/*with_enclosing_braces_always=*/true);
} else if (field->options().weak() ||
@ -4279,8 +4345,8 @@ void MessageGenerator::GenerateClassSpecificMergeImpl(io::Printer* p) {
if (GetFieldHasbitMode(field) == HasbitMode::kHintHasbit) {
// Merge semantics without true field presence: primitive fields are
// merged only if non-zero (numeric) or non-empty (string).
MayEmitIfNonDefaultCheck(
p, "from.", field,
MayEmitMutableIfNonDefaultCheck(
p, "from.", field, ShouldSplit(field, options_),
/*emit_body=*/[&]() { generator.GenerateMergingCode(p); },
/*with_enclosing_braces_always=*/false);
} else {

Loading…
Cancel
Save