diff --git a/src/google/protobuf/compiler/cpp/message.cc b/src/google/protobuf/compiler/cpp/message.cc index 2e35aefe88..7506f897df 100644 --- a/src/google/protobuf/compiler/cpp/message.cc +++ b/src/google/protobuf/compiler/cpp/message.cc @@ -4513,6 +4513,15 @@ void MessageGenerator::GenerateByteSize(io::Printer* p) { "// Prevent compiler warnings about cached_has_bits being unused\n" "(void) cached_has_bits;\n\n"); + // See comment in third_party/protobuf/port.h for details, + // on how much we are prefetching. Only insert prefetch once per + // function, since advancing is actually slower. We sometimes + // prefetch more than sizeof(message), because it helps with + // next message on arena. + p->Emit(R"cc( + ::_pbi::Prefetch5LinesFrom7Lines(reinterpret_cast(this)); + )cc"); + while (it != end) { auto next = FindNextUnequalChunk(it, end, MayGroupChunksForHaswordsCheck); bool has_haswords_check = MaybeEmitHaswordsCheck( diff --git a/src/google/protobuf/compiler/java/java_features.pb.cc b/src/google/protobuf/compiler/java/java_features.pb.cc index c2bd61a8e1..c3f4705945 100644 --- a/src/google/protobuf/compiler/java/java_features.pb.cc +++ b/src/google/protobuf/compiler/java/java_features.pb.cc @@ -289,6 +289,7 @@ const char* JavaFeatures::_InternalParse( // Prevent compiler warnings about cached_has_bits being unused (void) cached_has_bits; + ::_pbi::Prefetch5LinesFrom7Lines(reinterpret_cast(this)); cached_has_bits = _impl_._has_bits_[0]; if (cached_has_bits & 0x00000003u) { // optional bool legacy_closed_enum = 1 [retention = RETENTION_RUNTIME, targets = TARGET_TYPE_FIELD, targets = TARGET_TYPE_FILE, edition_defaults = { diff --git a/src/google/protobuf/compiler/plugin.pb.cc b/src/google/protobuf/compiler/plugin.pb.cc index 71bfbf2364..ca0938ee4c 100644 --- a/src/google/protobuf/compiler/plugin.pb.cc +++ b/src/google/protobuf/compiler/plugin.pb.cc @@ -508,6 +508,7 @@ const char* Version::_InternalParse( // Prevent compiler warnings about cached_has_bits being unused (void) cached_has_bits; + ::_pbi::Prefetch5LinesFrom7Lines(reinterpret_cast(this)); cached_has_bits = _impl_._has_bits_[0]; if (cached_has_bits & 0x0000000fu) { // optional string suffix = 4; @@ -849,6 +850,7 @@ const char* CodeGeneratorRequest::_InternalParse( // Prevent compiler warnings about cached_has_bits being unused (void) cached_has_bits; + ::_pbi::Prefetch5LinesFrom7Lines(reinterpret_cast(this)); // repeated string file_to_generate = 1; total_size += 1 * ::google::protobuf::internal::FromIntSize(_internal_file_to_generate().size()); for (int i = 0, n = _internal_file_to_generate().size(); i < n; ++i) { @@ -1178,6 +1180,7 @@ const char* CodeGeneratorResponse_File::_InternalParse( // Prevent compiler warnings about cached_has_bits being unused (void) cached_has_bits; + ::_pbi::Prefetch5LinesFrom7Lines(reinterpret_cast(this)); cached_has_bits = _impl_._has_bits_[0]; if (cached_has_bits & 0x0000000fu) { // optional string name = 1; @@ -1510,6 +1513,7 @@ const char* CodeGeneratorResponse::_InternalParse( // Prevent compiler warnings about cached_has_bits being unused (void) cached_has_bits; + ::_pbi::Prefetch5LinesFrom7Lines(reinterpret_cast(this)); // repeated .google.protobuf.compiler.CodeGeneratorResponse.File file = 15; total_size += 1UL * this->_internal_file_size(); for (const auto& msg : this->_internal_file()) { diff --git a/src/google/protobuf/cpp_features.pb.cc b/src/google/protobuf/cpp_features.pb.cc index 061e13d3a7..69cad9978b 100644 --- a/src/google/protobuf/cpp_features.pb.cc +++ b/src/google/protobuf/cpp_features.pb.cc @@ -289,6 +289,7 @@ const char* CppFeatures::_InternalParse( // Prevent compiler warnings about cached_has_bits being unused (void) cached_has_bits; + ::_pbi::Prefetch5LinesFrom7Lines(reinterpret_cast(this)); cached_has_bits = _impl_._has_bits_[0]; if (cached_has_bits & 0x00000003u) { // optional bool legacy_closed_enum = 1 [retention = RETENTION_RUNTIME, targets = TARGET_TYPE_FIELD, targets = TARGET_TYPE_FILE, edition_defaults = { diff --git a/src/google/protobuf/descriptor.pb.cc b/src/google/protobuf/descriptor.pb.cc index ec78e65879..b6e3185ae3 100644 --- a/src/google/protobuf/descriptor.pb.cc +++ b/src/google/protobuf/descriptor.pb.cc @@ -2479,6 +2479,7 @@ const char* FileDescriptorSet::_InternalParse( // Prevent compiler warnings about cached_has_bits being unused (void) cached_has_bits; + ::_pbi::Prefetch5LinesFrom7Lines(reinterpret_cast(this)); // repeated .google.protobuf.FileDescriptorProto file = 1; total_size += 1UL * this->_internal_file_size(); for (const auto& msg : this->_internal_file()) { @@ -2927,6 +2928,7 @@ const char* FileDescriptorProto::_InternalParse( // Prevent compiler warnings about cached_has_bits being unused (void) cached_has_bits; + ::_pbi::Prefetch5LinesFrom7Lines(reinterpret_cast(this)); // repeated string dependency = 3; total_size += 1 * ::google::protobuf::internal::FromIntSize(_internal_dependency().size()); for (int i = 0, n = _internal_dependency().size(); i < n; ++i) { @@ -3321,6 +3323,7 @@ const char* DescriptorProto_ExtensionRange::_InternalParse( // Prevent compiler warnings about cached_has_bits being unused (void) cached_has_bits; + ::_pbi::Prefetch5LinesFrom7Lines(reinterpret_cast(this)); cached_has_bits = _impl_._has_bits_[0]; if (cached_has_bits & 0x00000007u) { // optional .google.protobuf.ExtensionRangeOptions options = 3; @@ -3567,6 +3570,7 @@ const char* DescriptorProto_ReservedRange::_InternalParse( // Prevent compiler warnings about cached_has_bits being unused (void) cached_has_bits; + ::_pbi::Prefetch5LinesFrom7Lines(reinterpret_cast(this)); cached_has_bits = _impl_._has_bits_[0]; if (cached_has_bits & 0x00000003u) { // optional int32 start = 1; @@ -3988,6 +3992,7 @@ const char* DescriptorProto::_InternalParse( // Prevent compiler warnings about cached_has_bits being unused (void) cached_has_bits; + ::_pbi::Prefetch5LinesFrom7Lines(reinterpret_cast(this)); // repeated .google.protobuf.FieldDescriptorProto field = 2; total_size += 1UL * this->_internal_field_size(); for (const auto& msg : this->_internal_field()) { @@ -4375,6 +4380,7 @@ const char* ExtensionRangeOptions_Declaration::_InternalParse( // Prevent compiler warnings about cached_has_bits being unused (void) cached_has_bits; + ::_pbi::Prefetch5LinesFrom7Lines(reinterpret_cast(this)); cached_has_bits = _impl_._has_bits_[0]; if (cached_has_bits & 0x0000001fu) { // optional string full_name = 2; @@ -4704,6 +4710,7 @@ const char* ExtensionRangeOptions::_InternalParse( // Prevent compiler warnings about cached_has_bits being unused (void) cached_has_bits; + ::_pbi::Prefetch5LinesFrom7Lines(reinterpret_cast(this)); // repeated .google.protobuf.ExtensionRangeOptions.Declaration declaration = 2 [retention = RETENTION_SOURCE]; total_size += 1UL * this->_internal_declaration_size(); for (const auto& msg : this->_internal_declaration()) { @@ -5164,6 +5171,7 @@ const char* FieldDescriptorProto::_InternalParse( // Prevent compiler warnings about cached_has_bits being unused (void) cached_has_bits; + ::_pbi::Prefetch5LinesFrom7Lines(reinterpret_cast(this)); cached_has_bits = _impl_._has_bits_[0]; if (cached_has_bits & 0x000000ffu) { // optional string name = 1; @@ -5515,6 +5523,7 @@ const char* OneofDescriptorProto::_InternalParse( // Prevent compiler warnings about cached_has_bits being unused (void) cached_has_bits; + ::_pbi::Prefetch5LinesFrom7Lines(reinterpret_cast(this)); cached_has_bits = _impl_._has_bits_[0]; if (cached_has_bits & 0x00000003u) { // optional string name = 1; @@ -5750,6 +5759,7 @@ const char* EnumDescriptorProto_EnumReservedRange::_InternalParse( // Prevent compiler warnings about cached_has_bits being unused (void) cached_has_bits; + ::_pbi::Prefetch5LinesFrom7Lines(reinterpret_cast(this)); cached_has_bits = _impl_._has_bits_[0]; if (cached_has_bits & 0x00000003u) { // optional int32 start = 1; @@ -6063,6 +6073,7 @@ const char* EnumDescriptorProto::_InternalParse( // Prevent compiler warnings about cached_has_bits being unused (void) cached_has_bits; + ::_pbi::Prefetch5LinesFrom7Lines(reinterpret_cast(this)); // repeated .google.protobuf.EnumValueDescriptorProto value = 2; total_size += 1UL * this->_internal_value_size(); for (const auto& msg : this->_internal_value()) { @@ -6367,6 +6378,7 @@ const char* EnumValueDescriptorProto::_InternalParse( // Prevent compiler warnings about cached_has_bits being unused (void) cached_has_bits; + ::_pbi::Prefetch5LinesFrom7Lines(reinterpret_cast(this)); cached_has_bits = _impl_._has_bits_[0]; if (cached_has_bits & 0x00000007u) { // optional string name = 1; @@ -6661,6 +6673,7 @@ const char* ServiceDescriptorProto::_InternalParse( // Prevent compiler warnings about cached_has_bits being unused (void) cached_has_bits; + ::_pbi::Prefetch5LinesFrom7Lines(reinterpret_cast(this)); // repeated .google.protobuf.MethodDescriptorProto method = 2; total_size += 1UL * this->_internal_method_size(); for (const auto& msg : this->_internal_method()) { @@ -7013,6 +7026,7 @@ const char* MethodDescriptorProto::_InternalParse( // Prevent compiler warnings about cached_has_bits being unused (void) cached_has_bits; + ::_pbi::Prefetch5LinesFrom7Lines(reinterpret_cast(this)); cached_has_bits = _impl_._has_bits_[0]; if (cached_has_bits & 0x0000003fu) { // optional string name = 1; @@ -7683,6 +7697,7 @@ const char* FileOptions::_InternalParse( // Prevent compiler warnings about cached_has_bits being unused (void) cached_has_bits; + ::_pbi::Prefetch5LinesFrom7Lines(reinterpret_cast(this)); // repeated .google.protobuf.UninterpretedOption uninterpreted_option = 999; total_size += 2UL * this->_internal_uninterpreted_option_size(); for (const auto& msg : this->_internal_uninterpreted_option()) { @@ -8217,6 +8232,7 @@ const char* MessageOptions::_InternalParse( // Prevent compiler warnings about cached_has_bits being unused (void) cached_has_bits; + ::_pbi::Prefetch5LinesFrom7Lines(reinterpret_cast(this)); // repeated .google.protobuf.UninterpretedOption uninterpreted_option = 999; total_size += 2UL * this->_internal_uninterpreted_option_size(); for (const auto& msg : this->_internal_uninterpreted_option()) { @@ -8514,6 +8530,7 @@ const char* FieldOptions_EditionDefault::_InternalParse( // Prevent compiler warnings about cached_has_bits being unused (void) cached_has_bits; + ::_pbi::Prefetch5LinesFrom7Lines(reinterpret_cast(this)); cached_has_bits = _impl_._has_bits_[0]; if (cached_has_bits & 0x00000003u) { // optional string value = 2; @@ -8947,6 +8964,7 @@ const char* FieldOptions::_InternalParse( // Prevent compiler warnings about cached_has_bits being unused (void) cached_has_bits; + ::_pbi::Prefetch5LinesFrom7Lines(reinterpret_cast(this)); // repeated .google.protobuf.FieldOptions.OptionTargetType targets = 19; { std::size_t data_size = 0; @@ -9322,6 +9340,7 @@ const char* OneofOptions::_InternalParse( // Prevent compiler warnings about cached_has_bits being unused (void) cached_has_bits; + ::_pbi::Prefetch5LinesFrom7Lines(reinterpret_cast(this)); // repeated .google.protobuf.UninterpretedOption uninterpreted_option = 999; total_size += 2UL * this->_internal_uninterpreted_option_size(); for (const auto& msg : this->_internal_uninterpreted_option()) { @@ -9640,6 +9659,7 @@ const char* EnumOptions::_InternalParse( // Prevent compiler warnings about cached_has_bits being unused (void) cached_has_bits; + ::_pbi::Prefetch5LinesFrom7Lines(reinterpret_cast(this)); // repeated .google.protobuf.UninterpretedOption uninterpreted_option = 999; total_size += 2UL * this->_internal_uninterpreted_option_size(); for (const auto& msg : this->_internal_uninterpreted_option()) { @@ -9981,6 +10001,7 @@ const char* EnumValueOptions::_InternalParse( // Prevent compiler warnings about cached_has_bits being unused (void) cached_has_bits; + ::_pbi::Prefetch5LinesFrom7Lines(reinterpret_cast(this)); // repeated .google.protobuf.UninterpretedOption uninterpreted_option = 999; total_size += 2UL * this->_internal_uninterpreted_option_size(); for (const auto& msg : this->_internal_uninterpreted_option()) { @@ -10291,6 +10312,7 @@ const char* ServiceOptions::_InternalParse( // Prevent compiler warnings about cached_has_bits being unused (void) cached_has_bits; + ::_pbi::Prefetch5LinesFrom7Lines(reinterpret_cast(this)); // repeated .google.protobuf.UninterpretedOption uninterpreted_option = 999; total_size += 2UL * this->_internal_uninterpreted_option_size(); for (const auto& msg : this->_internal_uninterpreted_option()) { @@ -10620,6 +10642,7 @@ const char* MethodOptions::_InternalParse( // Prevent compiler warnings about cached_has_bits being unused (void) cached_has_bits; + ::_pbi::Prefetch5LinesFrom7Lines(reinterpret_cast(this)); // repeated .google.protobuf.UninterpretedOption uninterpreted_option = 999; total_size += 2UL * this->_internal_uninterpreted_option_size(); for (const auto& msg : this->_internal_uninterpreted_option()) { @@ -10897,6 +10920,7 @@ const char* UninterpretedOption_NamePart::_InternalParse( // Prevent compiler warnings about cached_has_bits being unused (void) cached_has_bits; + ::_pbi::Prefetch5LinesFrom7Lines(reinterpret_cast(this)); cached_has_bits = _impl_._has_bits_[0]; if (cached_has_bits & 0x00000003u) { // required string name_part = 1; @@ -11242,6 +11266,7 @@ const char* UninterpretedOption::_InternalParse( // Prevent compiler warnings about cached_has_bits being unused (void) cached_has_bits; + ::_pbi::Prefetch5LinesFrom7Lines(reinterpret_cast(this)); // repeated .google.protobuf.UninterpretedOption.NamePart name = 2; total_size += 1UL * this->_internal_name_size(); for (const auto& msg : this->_internal_name()) { @@ -11606,6 +11631,7 @@ const char* FeatureSet::_InternalParse( // Prevent compiler warnings about cached_has_bits being unused (void) cached_has_bits; + ::_pbi::Prefetch5LinesFrom7Lines(reinterpret_cast(this)); cached_has_bits = _impl_._has_bits_[0]; if (cached_has_bits & 0x0000003fu) { // optional .google.protobuf.FeatureSet.FieldPresence field_presence = 1 [retention = RETENTION_RUNTIME, targets = TARGET_TYPE_FIELD, targets = TARGET_TYPE_FILE, edition_defaults = { @@ -11893,6 +11919,7 @@ const char* FeatureSetDefaults_FeatureSetEditionDefault::_InternalParse( // Prevent compiler warnings about cached_has_bits being unused (void) cached_has_bits; + ::_pbi::Prefetch5LinesFrom7Lines(reinterpret_cast(this)); cached_has_bits = _impl_._has_bits_[0]; if (cached_has_bits & 0x00000003u) { // optional .google.protobuf.FeatureSet features = 2; @@ -12169,6 +12196,7 @@ const char* FeatureSetDefaults::_InternalParse( // Prevent compiler warnings about cached_has_bits being unused (void) cached_has_bits; + ::_pbi::Prefetch5LinesFrom7Lines(reinterpret_cast(this)); // repeated .google.protobuf.FeatureSetDefaults.FeatureSetEditionDefault defaults = 1; total_size += 1UL * this->_internal_defaults_size(); for (const auto& msg : this->_internal_defaults()) { @@ -12488,6 +12516,7 @@ const char* SourceCodeInfo_Location::_InternalParse( // Prevent compiler warnings about cached_has_bits being unused (void) cached_has_bits; + ::_pbi::Prefetch5LinesFrom7Lines(reinterpret_cast(this)); // repeated int32 path = 1 [packed = true]; { std::size_t data_size = ::_pbi::WireFormatLite::Int32Size( @@ -12740,6 +12769,7 @@ const char* SourceCodeInfo::_InternalParse( // Prevent compiler warnings about cached_has_bits being unused (void) cached_has_bits; + ::_pbi::Prefetch5LinesFrom7Lines(reinterpret_cast(this)); // repeated .google.protobuf.SourceCodeInfo.Location location = 1; total_size += 1UL * this->_internal_location_size(); for (const auto& msg : this->_internal_location()) { @@ -13022,6 +13052,7 @@ const char* GeneratedCodeInfo_Annotation::_InternalParse( // Prevent compiler warnings about cached_has_bits being unused (void) cached_has_bits; + ::_pbi::Prefetch5LinesFrom7Lines(reinterpret_cast(this)); // repeated int32 path = 1 [packed = true]; { std::size_t data_size = ::_pbi::WireFormatLite::Int32Size( @@ -13274,6 +13305,7 @@ const char* GeneratedCodeInfo::_InternalParse( // Prevent compiler warnings about cached_has_bits being unused (void) cached_has_bits; + ::_pbi::Prefetch5LinesFrom7Lines(reinterpret_cast(this)); // repeated .google.protobuf.GeneratedCodeInfo.Annotation annotation = 1; total_size += 1UL * this->_internal_annotation_size(); for (const auto& msg : this->_internal_annotation()) { diff --git a/src/google/protobuf/editions/golden/compare_cpp_codegen_failure.txt b/src/google/protobuf/editions/golden/compare_cpp_codegen_failure.txt index 03be36fe32..b6048b1fa5 100644 --- a/src/google/protobuf/editions/golden/compare_cpp_codegen_failure.txt +++ b/src/google/protobuf/editions/golden/compare_cpp_codegen_failure.txt @@ -25,9 +25,9 @@ target = ::proto2::internal::WireFormatLite:: WriteInt32ToArrayWithField<1>( @@ @@ - // Prevent compiler warnings about cached_has_bits being unused (void) cached_has_bits; + ::_pbi::Prefetch5LinesFrom7Lines(reinterpret_cast(this)); - // optional int32 int32_field = 1; + // int32 int32_field = 1; cached_has_bits = _impl_._has_bits_[0]; diff --git a/src/google/protobuf/editions/golden/compare_cpp_codegen_failure.xml b/src/google/protobuf/editions/golden/compare_cpp_codegen_failure.xml index 34a5543e6e..d5ad450307 100644 --- a/src/google/protobuf/editions/golden/compare_cpp_codegen_failure.xml +++ b/src/google/protobuf/editions/golden/compare_cpp_codegen_failure.xml @@ -2,7 +2,7 @@ - + diff --git a/src/google/protobuf/port.h b/src/google/protobuf/port.h index a5f3b98b12..907050597e 100644 --- a/src/google/protobuf/port.h +++ b/src/google/protobuf/port.h @@ -212,6 +212,23 @@ inline constexpr bool DebugHardenStringValues() { #endif } +// Prefetch 5 64-byte cache line starting from 7 cache-lines ahead. +// Constants are somewhat arbitrary and pretty aggressive, but were +// chosen to give a better benchmark results. E.g. this is ~20% +// faster, single cache line prefetch is ~12% faster, increasing +// decreasing distance makes results 2-4% worse. Important note, +// prefetch doesn't require a valid address, so it is ok to prefetch +// past the end of message/valid memory, however we are doing this +// inside inline asm block, since computing the invalid pointer +// is a potential UB. Only insert prefetch once per function, +PROTOBUF_ALWAYS_INLINE inline void Prefetch5LinesFrom7Lines(const void* ptr) { + PROTOBUF_PREFETCH_WITH_OFFSET(ptr, 448); + PROTOBUF_PREFETCH_WITH_OFFSET(ptr, 512); + PROTOBUF_PREFETCH_WITH_OFFSET(ptr, 576); + PROTOBUF_PREFETCH_WITH_OFFSET(ptr, 640); + PROTOBUF_PREFETCH_WITH_OFFSET(ptr, 704); +} + #if defined(NDEBUG) && ABSL_HAVE_BUILTIN(__builtin_unreachable) [[noreturn]] ABSL_ATTRIBUTE_COLD PROTOBUF_ALWAYS_INLINE inline void Unreachable() { diff --git a/src/google/protobuf/port_def.inc b/src/google/protobuf/port_def.inc index 34ece99ac3..720b44615f 100644 --- a/src/google/protobuf/port_def.inc +++ b/src/google/protobuf/port_def.inc @@ -746,6 +746,16 @@ static_assert(PROTOBUF_ABSL_MIN(20230125, 3), #error PROTOBUF_PREFETCH_PARSE_TABLE was previously defined #endif +#ifdef PROTOBUF_PREFETCH_WITH_OFFSET +#error PROTOBUF_PREFETCH_WITH_OFFSET was previously defined +#endif +#if defined(__x86_64__) && defined(__GNUC__) +#define PROTOBUF_PREFETCH_WITH_OFFSET(base, offset) \ + asm("prefetcht0 " #offset "(%0)" : : "r"(base)); +#else +#define PROTOBUF_PREFETCH_WITH_OFFSET(base, offset) +#endif + // ThreadSafeArenaz is turned off completely in opensource builds. // noreturn is defined as a macro in C's stdnoreturn.h diff --git a/src/google/protobuf/port_undef.inc b/src/google/protobuf/port_undef.inc index 4e210c5c1e..f5654ea838 100644 --- a/src/google/protobuf/port_undef.inc +++ b/src/google/protobuf/port_undef.inc @@ -76,6 +76,7 @@ #undef PROTOBUF_BUILTIN_CONSTANT_P #undef PROTOBUF_DESCRIPTOR_WEAK_MESSAGES_ALLOWED #undef PROTOBUF_PREFETCH_PARSE_TABLE +#undef PROTOBUF_PREFETCH_WITH_OFFSET #undef PROTOBUF_TC_PARAM_DECL #undef PROTOBUF_DEBUG #undef PROTO2_IS_OSS