Improve ByteSize calculation for fields with fixed wire size.

Use popcount with a mask on the has bits to count the number of present fields
instead of checking fields one by one with a conditional.

PiperOrigin-RevId: 716692546
pull/20019/head
Protobuf Team Bot 1 month ago committed by Copybara-Service
parent 3423d7c451
commit c58621b6ff
  1. 104
      src/google/protobuf/compiler/cpp/message.cc
  2. 11
      src/google/protobuf/compiler/java/java_features.pb.cc
  3. 11
      src/google/protobuf/cpp_features.pb.cc
  4. 174
      src/google/protobuf/descriptor.pb.cc

@ -19,10 +19,12 @@
#include <iterator>
#include <limits>
#include <memory>
#include <optional>
#include <string>
#include <utility>
#include <vector>
#include "absl/algorithm/container.h"
#include "absl/container/flat_hash_map.h"
#include "absl/container/flat_hash_set.h"
#include "absl/functional/any_invocable.h"
@ -5010,6 +5012,34 @@ std::vector<uint32_t> MessageGenerator::RequiredFieldsBitMask() const {
return masks;
}
static std::optional<int> FixedSize(const FieldDescriptor* field) {
if (field->is_repeated() || field->real_containing_oneof() ||
!field->has_presence()) {
return std::nullopt;
}
const size_t tag_size = WireFormat::TagSize(field->number(), field->type());
switch (field->type()) {
case FieldDescriptor::TYPE_FIXED32:
return tag_size + WireFormatLite::kFixed32Size;
case FieldDescriptor::TYPE_FIXED64:
return tag_size + WireFormatLite::kFixed64Size;
case FieldDescriptor::TYPE_SFIXED32:
return tag_size + WireFormatLite::kSFixed32Size;
case FieldDescriptor::TYPE_SFIXED64:
return tag_size + WireFormatLite::kSFixed64Size;
case FieldDescriptor::TYPE_FLOAT:
return tag_size + WireFormatLite::kFloatSize;
case FieldDescriptor::TYPE_DOUBLE:
return tag_size + WireFormatLite::kDoubleSize;
case FieldDescriptor::TYPE_BOOL:
return tag_size + WireFormatLite::kBoolSize;
default:
return std::nullopt;
}
}
void MessageGenerator::GenerateByteSize(io::Printer* p) {
if (HasSimpleBaseClass(descriptor_, options_)) return;
@ -5039,14 +5069,45 @@ void MessageGenerator::GenerateByteSize(io::Printer* p) {
return;
}
std::vector<FieldChunk> chunks = CollectFields(
optimized_order_, options_,
[&](const FieldDescriptor* a, const FieldDescriptor* b) -> bool {
std::vector<const FieldDescriptor*> fixed;
std::vector<const FieldDescriptor*> rest;
for (auto* f : optimized_order_) {
if (FixedSize(f).has_value()) {
fixed.push_back(f);
} else {
rest.push_back(f);
}
}
// Sort the fixed fields to ensure maximum grouping.
// The layout of the fields is irrelevant because we are not going to read
// them. We only look at the hasbits.
const auto fixed_tuple = [&](auto* f) {
return std::make_tuple(HasWordIndex(f), FixedSize(f));
};
absl::c_sort(
fixed, [&](auto* a, auto* b) { return fixed_tuple(a) < fixed_tuple(b); });
std::vector<FieldChunk> fixed_chunks =
CollectFields(fixed, options_, [&](const auto* a, const auto* b) {
return fixed_tuple(a) == fixed_tuple(b);
});
std::vector<FieldChunk> chunks =
CollectFields(rest, options_, [&](const auto* a, const auto* b) {
return a->label() == b->label() && HasByteIndex(a) == HasByteIndex(b) &&
IsLikelyPresent(a, options_) == IsLikelyPresent(b, options_) &&
ShouldSplit(a, options_) == ShouldSplit(b, options_);
});
// Interleave the fixed chunks in the right place to be able to reuse
// cached_has_bits if available. Otherwise, add them to the end.
for (auto& chunk : fixed_chunks) {
auto it = std::find_if(chunks.begin(), chunks.end(), [&](auto& c) {
return HasWordIndex(c.fields[0]) == HasWordIndex(chunk.fields[0]);
});
chunks.insert(it, std::move(chunk));
}
p->Emit(
{{"handle_extension_set",
[&] {
@ -5086,6 +5147,15 @@ void MessageGenerator::GenerateByteSize(io::Printer* p) {
auto it = chunks.begin();
auto end = chunks.end();
int cached_has_word_index = -1;
const auto update_cached_has_bits = [&](auto& fields) {
if (cached_has_word_index == HasWordIndex(fields.front())) return;
cached_has_word_index = HasWordIndex(fields.front());
p->Emit({{"index", cached_has_word_index}},
R"cc(
cached_has_bits = this_.$has_bits$[$index$];
)cc");
};
while (it != end) {
auto next =
@ -5096,6 +5166,25 @@ void MessageGenerator::GenerateByteSize(io::Printer* p) {
while (it != next) {
const auto& fields = it->fields;
// If the chunk is a fixed size singular chunk, use a branchless
// approach for it.
if (std::optional<int> fsize = FixedSize(fields[0])) {
update_cached_has_bits(fields);
uint32_t mask = GenChunkMask(fields, has_bit_indices_);
p->Emit({{"mask", absl::StrFormat("0x%08xu", mask)},
{"popcount", absl::has_single_bit(mask)
? "static_cast<bool>"
: "::absl::popcount"},
{"fsize", *fsize}},
R"cc(
//~
total_size += $popcount$($mask$ & cached_has_bits) * $fsize$;
)cc");
++it;
continue;
}
const bool check_has_byte =
fields.size() > 1 && HasWordIndex(fields[0]) != kNoHasbit &&
!IsLikelyPresent(fields.back(), options_);
@ -5112,14 +5201,7 @@ void MessageGenerator::GenerateByteSize(io::Printer* p) {
{"may_update_cached_has_word_index",
[&] {
if (!check_has_byte) return;
if (cached_has_word_index == HasWordIndex(fields.front()))
return;
cached_has_word_index = HasWordIndex(fields.front());
p->Emit({{"index", cached_has_word_index}},
R"cc(
cached_has_bits = this_.$has_bits$[$index$];
)cc");
update_cached_has_bits(fields);
}},
{"check_if_chunk_present",
[&] {

@ -362,20 +362,13 @@ PROTOBUF_NOINLINE void JavaFeatures::Clear() {
::_pbi::Prefetch5LinesFrom7Lines(&this_);
cached_has_bits = this_._impl_._has_bits_[0];
if (cached_has_bits & 0x00000007u) {
total_size += ::absl::popcount(0x00000006u & cached_has_bits) * 2;
{
// optional .pb.JavaFeatures.Utf8Validation utf8_validation = 2 [retention = RETENTION_RUNTIME, targets = TARGET_TYPE_FIELD, targets = TARGET_TYPE_FILE, edition_defaults = {
if (cached_has_bits & 0x00000001u) {
total_size += 1 +
::_pbi::WireFormatLite::EnumSize(this_._internal_utf8_validation());
}
// optional bool legacy_closed_enum = 1 [retention = RETENTION_RUNTIME, targets = TARGET_TYPE_FIELD, targets = TARGET_TYPE_FILE, edition_defaults = {
if (cached_has_bits & 0x00000002u) {
total_size += 2;
}
// optional bool use_old_outer_classname_default = 4 [retention = RETENTION_RUNTIME, targets = TARGET_TYPE_FILE, edition_defaults = {
if (cached_has_bits & 0x00000004u) {
total_size += 2;
}
}
return this_.MaybeComputeUnknownFieldsSize(total_size,
&this_._impl_._cached_size_);

@ -356,20 +356,13 @@ PROTOBUF_NOINLINE void CppFeatures::Clear() {
::_pbi::Prefetch5LinesFrom7Lines(&this_);
cached_has_bits = this_._impl_._has_bits_[0];
if (cached_has_bits & 0x00000007u) {
total_size += ::absl::popcount(0x00000006u & cached_has_bits) * 2;
{
// optional .pb.CppFeatures.StringType string_type = 2 [retention = RETENTION_RUNTIME, targets = TARGET_TYPE_FIELD, targets = TARGET_TYPE_FILE, edition_defaults = {
if (cached_has_bits & 0x00000001u) {
total_size += 1 +
::_pbi::WireFormatLite::EnumSize(this_._internal_string_type());
}
// optional bool legacy_closed_enum = 1 [retention = RETENTION_RUNTIME, targets = TARGET_TYPE_FIELD, targets = TARGET_TYPE_FILE, edition_defaults = {
if (cached_has_bits & 0x00000002u) {
total_size += 2;
}
// optional bool enum_name_uses_string_view = 3 [retention = RETENTION_SOURCE, targets = TARGET_TYPE_ENUM, targets = TARGET_TYPE_FILE, edition_defaults = {
if (cached_has_bits & 0x00000004u) {
total_size += 2;
}
}
return this_.MaybeComputeUnknownFieldsSize(total_size,
&this_._impl_._cached_size_);

@ -4818,7 +4818,8 @@ PROTOBUF_NOINLINE void ExtensionRangeOptions_Declaration::Clear() {
::_pbi::Prefetch5LinesFrom7Lines(&this_);
cached_has_bits = this_._impl_._has_bits_[0];
if (cached_has_bits & 0x0000001fu) {
total_size += ::absl::popcount(0x00000018u & cached_has_bits) * 2;
if (cached_has_bits & 0x00000007u) {
// optional string full_name = 2;
if (cached_has_bits & 0x00000001u) {
total_size += 1 + ::google::protobuf::internal::WireFormatLite::StringSize(
@ -4834,14 +4835,6 @@ PROTOBUF_NOINLINE void ExtensionRangeOptions_Declaration::Clear() {
total_size += ::_pbi::WireFormatLite::Int32SizePlusOne(
this_._internal_number());
}
// optional bool reserved = 5;
if (cached_has_bits & 0x00000008u) {
total_size += 2;
}
// optional bool repeated = 6;
if (cached_has_bits & 0x00000010u) {
total_size += 2;
}
}
return this_.MaybeComputeUnknownFieldsSize(total_size,
&this_._impl_._cached_size_);
@ -5719,6 +5712,7 @@ PROTOBUF_NOINLINE void FieldDescriptorProto::Clear() {
::_pbi::Prefetch5LinesFrom7Lines(&this_);
cached_has_bits = this_._impl_._has_bits_[0];
total_size += static_cast<bool>(0x00000100u & cached_has_bits) * 3;
if (cached_has_bits & 0x000000ffu) {
// optional string name = 1;
if (cached_has_bits & 0x00000001u) {
@ -5761,11 +5755,7 @@ PROTOBUF_NOINLINE void FieldDescriptorProto::Clear() {
this_._internal_oneof_index());
}
}
if (cached_has_bits & 0x00000700u) {
// optional bool proto3_optional = 17;
if (cached_has_bits & 0x00000100u) {
total_size += 3;
}
if (cached_has_bits & 0x00000600u) {
// optional .google.protobuf.FieldDescriptorProto.Label label = 4;
if (cached_has_bits & 0x00000200u) {
total_size += 1 +
@ -7881,7 +7871,8 @@ PROTOBUF_NOINLINE void MethodDescriptorProto::Clear() {
::_pbi::Prefetch5LinesFrom7Lines(&this_);
cached_has_bits = this_._impl_._has_bits_[0];
if (cached_has_bits & 0x0000003fu) {
total_size += ::absl::popcount(0x00000030u & cached_has_bits) * 2;
if (cached_has_bits & 0x0000000fu) {
// optional string name = 1;
if (cached_has_bits & 0x00000001u) {
total_size += 1 + ::google::protobuf::internal::WireFormatLite::StringSize(
@ -7902,14 +7893,6 @@ PROTOBUF_NOINLINE void MethodDescriptorProto::Clear() {
total_size += 1 +
::google::protobuf::internal::WireFormatLite::MessageSize(*this_._impl_.options_);
}
// optional bool client_streaming = 5 [default = false];
if (cached_has_bits & 0x00000010u) {
total_size += 2;
}
// optional bool server_streaming = 6 [default = false];
if (cached_has_bits & 0x00000020u) {
total_size += 2;
}
}
return this_.MaybeComputeUnknownFieldsSize(total_size,
&this_._impl_._cached_size_);
@ -8617,6 +8600,8 @@ PROTOBUF_NOINLINE void FileOptions::Clear() {
}
}
cached_has_bits = this_._impl_._has_bits_[0];
total_size += ::absl::popcount(0x000bf000u & cached_has_bits) * 3;
total_size += static_cast<bool>(0x00000800u & cached_has_bits) * 2;
if (cached_has_bits & 0x000000ffu) {
// optional string java_package = 1;
if (cached_has_bits & 0x00000001u) {
@ -8659,7 +8644,7 @@ PROTOBUF_NOINLINE void FileOptions::Clear() {
this_._internal_php_namespace());
}
}
if (cached_has_bits & 0x0000ff00u) {
if (cached_has_bits & 0x00000700u) {
// optional string php_metadata_namespace = 44;
if (cached_has_bits & 0x00000100u) {
total_size += 2 + ::google::protobuf::internal::WireFormatLite::StringSize(
@ -8675,45 +8660,13 @@ PROTOBUF_NOINLINE void FileOptions::Clear() {
total_size += 2 +
::google::protobuf::internal::WireFormatLite::MessageSize(*this_._impl_.features_);
}
// optional bool java_multiple_files = 10 [default = false];
if (cached_has_bits & 0x00000800u) {
total_size += 2;
}
// optional bool java_generate_equals_and_hash = 20 [deprecated = true];
if (cached_has_bits & 0x00001000u) {
total_size += 3;
}
// optional bool java_string_check_utf8 = 27 [default = false];
if (cached_has_bits & 0x00002000u) {
total_size += 3;
}
// optional bool cc_generic_services = 16 [default = false];
if (cached_has_bits & 0x00004000u) {
total_size += 3;
}
// optional bool java_generic_services = 17 [default = false];
if (cached_has_bits & 0x00008000u) {
total_size += 3;
}
}
if (cached_has_bits & 0x000f0000u) {
// optional bool py_generic_services = 18 [default = false];
if (cached_has_bits & 0x00010000u) {
total_size += 3;
}
// optional bool deprecated = 23 [default = false];
if (cached_has_bits & 0x00020000u) {
total_size += 3;
}
{
// optional .google.protobuf.FileOptions.OptimizeMode optimize_for = 9 [default = SPEED];
if (cached_has_bits & 0x00040000u) {
total_size += 1 +
::_pbi::WireFormatLite::EnumSize(this_._internal_optimize_for());
}
// optional bool cc_enable_arenas = 31 [default = true];
if (cached_has_bits & 0x00080000u) {
total_size += 3;
}
}
return this_.MaybeComputeUnknownFieldsSize(total_size,
&this_._impl_._cached_size_);
@ -9199,32 +9152,13 @@ PROTOBUF_NOINLINE void MessageOptions::Clear() {
}
}
cached_has_bits = this_._impl_._has_bits_[0];
if (cached_has_bits & 0x0000003fu) {
total_size += ::absl::popcount(0x0000003eu & cached_has_bits) * 2;
{
// optional .google.protobuf.FeatureSet features = 12;
if (cached_has_bits & 0x00000001u) {
total_size += 1 +
::google::protobuf::internal::WireFormatLite::MessageSize(*this_._impl_.features_);
}
// optional bool message_set_wire_format = 1 [default = false];
if (cached_has_bits & 0x00000002u) {
total_size += 2;
}
// optional bool no_standard_descriptor_accessor = 2 [default = false];
if (cached_has_bits & 0x00000004u) {
total_size += 2;
}
// optional bool deprecated = 3 [default = false];
if (cached_has_bits & 0x00000008u) {
total_size += 2;
}
// optional bool map_entry = 7;
if (cached_has_bits & 0x00000010u) {
total_size += 2;
}
// optional bool deprecated_legacy_json_field_conflicts = 11 [deprecated = true];
if (cached_has_bits & 0x00000020u) {
total_size += 2;
}
}
return this_.MaybeComputeUnknownFieldsSize(total_size,
&this_._impl_._cached_size_);
@ -10423,7 +10357,9 @@ PROTOBUF_NOINLINE void FieldOptions::Clear() {
}
}
cached_has_bits = this_._impl_._has_bits_[0];
if (cached_has_bits & 0x000000ffu) {
total_size += static_cast<bool>(0x00000200u & cached_has_bits) * 3;
total_size += ::absl::popcount(0x000001f0u & cached_has_bits) * 2;
if (cached_has_bits & 0x0000000fu) {
// optional .google.protobuf.FeatureSet features = 21;
if (cached_has_bits & 0x00000001u) {
total_size += 2 +
@ -10444,32 +10380,8 @@ PROTOBUF_NOINLINE void FieldOptions::Clear() {
total_size += 1 +
::_pbi::WireFormatLite::EnumSize(this_._internal_jstype());
}
// optional bool packed = 2;
if (cached_has_bits & 0x00000010u) {
total_size += 2;
}
// optional bool lazy = 5 [default = false];
if (cached_has_bits & 0x00000020u) {
total_size += 2;
}
// optional bool unverified_lazy = 15 [default = false];
if (cached_has_bits & 0x00000040u) {
total_size += 2;
}
// optional bool deprecated = 3 [default = false];
if (cached_has_bits & 0x00000080u) {
total_size += 2;
}
}
if (cached_has_bits & 0x00000700u) {
// optional bool weak = 10 [default = false];
if (cached_has_bits & 0x00000100u) {
total_size += 2;
}
// optional bool debug_redact = 16 [default = false];
if (cached_has_bits & 0x00000200u) {
total_size += 3;
}
{
// optional .google.protobuf.FieldOptions.OptionRetention retention = 17;
if (cached_has_bits & 0x00000400u) {
total_size += 2 +
@ -11239,24 +11151,13 @@ PROTOBUF_NOINLINE void EnumOptions::Clear() {
}
}
cached_has_bits = this_._impl_._has_bits_[0];
if (cached_has_bits & 0x0000000fu) {
total_size += ::absl::popcount(0x0000000eu & cached_has_bits) * 2;
{
// optional .google.protobuf.FeatureSet features = 7;
if (cached_has_bits & 0x00000001u) {
total_size += 1 +
::google::protobuf::internal::WireFormatLite::MessageSize(*this_._impl_.features_);
}
// optional bool allow_alias = 2;
if (cached_has_bits & 0x00000002u) {
total_size += 2;
}
// optional bool deprecated = 3 [default = false];
if (cached_has_bits & 0x00000004u) {
total_size += 2;
}
// optional bool deprecated_legacy_json_field_conflicts = 6 [deprecated = true];
if (cached_has_bits & 0x00000008u) {
total_size += 2;
}
}
return this_.MaybeComputeUnknownFieldsSize(total_size,
&this_._impl_._cached_size_);
@ -11667,7 +11568,8 @@ PROTOBUF_NOINLINE void EnumValueOptions::Clear() {
}
}
cached_has_bits = this_._impl_._has_bits_[0];
if (cached_has_bits & 0x0000000fu) {
total_size += ::absl::popcount(0x0000000cu & cached_has_bits) * 2;
if (cached_has_bits & 0x00000003u) {
// optional .google.protobuf.FeatureSet features = 2;
if (cached_has_bits & 0x00000001u) {
total_size += 1 +
@ -11678,14 +11580,6 @@ PROTOBUF_NOINLINE void EnumValueOptions::Clear() {
total_size += 1 +
::google::protobuf::internal::WireFormatLite::MessageSize(*this_._impl_.feature_support_);
}
// optional bool deprecated = 1 [default = false];
if (cached_has_bits & 0x00000004u) {
total_size += 2;
}
// optional bool debug_redact = 3 [default = false];
if (cached_has_bits & 0x00000008u) {
total_size += 2;
}
}
return this_.MaybeComputeUnknownFieldsSize(total_size,
&this_._impl_._cached_size_);
@ -12055,16 +11949,13 @@ PROTOBUF_NOINLINE void ServiceOptions::Clear() {
}
}
cached_has_bits = this_._impl_._has_bits_[0];
if (cached_has_bits & 0x00000003u) {
total_size += static_cast<bool>(0x00000002u & cached_has_bits) * 3;
{
// optional .google.protobuf.FeatureSet features = 34;
if (cached_has_bits & 0x00000001u) {
total_size += 2 +
::google::protobuf::internal::WireFormatLite::MessageSize(*this_._impl_.features_);
}
// optional bool deprecated = 33 [default = false];
if (cached_has_bits & 0x00000002u) {
total_size += 3;
}
}
return this_.MaybeComputeUnknownFieldsSize(total_size,
&this_._impl_._cached_size_);
@ -12450,16 +12341,13 @@ PROTOBUF_NOINLINE void MethodOptions::Clear() {
}
}
cached_has_bits = this_._impl_._has_bits_[0];
if (cached_has_bits & 0x00000007u) {
total_size += static_cast<bool>(0x00000002u & cached_has_bits) * 3;
if (cached_has_bits & 0x00000005u) {
// optional .google.protobuf.FeatureSet features = 35;
if (cached_has_bits & 0x00000001u) {
total_size += 2 +
::google::protobuf::internal::WireFormatLite::MessageSize(*this_._impl_.features_);
}
// optional bool deprecated = 33 [default = false];
if (cached_has_bits & 0x00000002u) {
total_size += 3;
}
// optional .google.protobuf.MethodOptions.IdempotencyLevel idempotency_level = 34 [default = IDEMPOTENCY_UNKNOWN];
if (cached_has_bits & 0x00000004u) {
total_size += 2 +
@ -12766,16 +12654,13 @@ PROTOBUF_NOINLINE void UninterpretedOption_NamePart::Clear() {
::_pbi::Prefetch5LinesFrom7Lines(&this_);
cached_has_bits = this_._impl_._has_bits_[0];
if (cached_has_bits & 0x00000003u) {
total_size += static_cast<bool>(0x00000002u & cached_has_bits) * 2;
{
// required string name_part = 1;
if (cached_has_bits & 0x00000001u) {
total_size += 1 + ::google::protobuf::internal::WireFormatLite::StringSize(
this_._internal_name_part());
}
// required bool is_extension = 2;
if (cached_has_bits & 0x00000002u) {
total_size += 2;
}
}
return this_.MaybeComputeUnknownFieldsSize(total_size,
&this_._impl_._cached_size_);
@ -13179,7 +13064,8 @@ PROTOBUF_NOINLINE void UninterpretedOption::Clear() {
}
}
cached_has_bits = this_._impl_._has_bits_[0];
if (cached_has_bits & 0x0000003fu) {
total_size += static_cast<bool>(0x00000020u & cached_has_bits) * 9;
if (cached_has_bits & 0x0000001fu) {
// optional string identifier_value = 3;
if (cached_has_bits & 0x00000001u) {
total_size += 1 + ::google::protobuf::internal::WireFormatLite::StringSize(
@ -13205,10 +13091,6 @@ PROTOBUF_NOINLINE void UninterpretedOption::Clear() {
total_size += ::_pbi::WireFormatLite::Int64SizePlusOne(
this_._internal_negative_int_value());
}
// optional double double_value = 6;
if (cached_has_bits & 0x00000020u) {
total_size += 9;
}
}
return this_.MaybeComputeUnknownFieldsSize(total_size,
&this_._impl_._cached_size_);

Loading…
Cancel
Save