Merge pull request #10653 from mkruskal-google/sync-stage

Integrate from Piper for C++, Java, and Python
pull/10649/head
Mike Kruskal 2 years ago committed by GitHub
commit 391cc7b355
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 3
      CHANGES.txt
  2. 2
      cmake/install.cmake
  3. 17
      java/lite/src/test/java/com/google/protobuf/LiteTest.java
  4. 5
      python/google/protobuf/text_format.py
  5. 4
      src/google/protobuf/arena.cc
  6. 46
      src/google/protobuf/compiler/cpp/message.cc
  7. 1
      src/google/protobuf/compiler/java/enum_field_lite.cc
  8. 32
      src/google/protobuf/generated_message_tctable_gen.cc
  9. 43
      src/google/protobuf/generated_message_tctable_gen.h
  10. 54
      src/google/protobuf/generated_message_tctable_impl.h
  11. 26
      src/google/protobuf/generated_message_tctable_lite.cc

@ -39,6 +39,9 @@
constructed FieldMaskTrees. constructed FieldMaskTrees.
* Optimized Java proto serialization gencode for protos having many extension ranges with few fields in between. * Optimized Java proto serialization gencode for protos having many extension ranges with few fields in between.
* More thoroughly annotate public generated code in Java lite protocol buffers. * More thoroughly annotate public generated code in Java lite protocol buffers.
* Fixed Bug in proto3 java lite repeated enum fields. Failed to call copyOnWrite before modifying previously built message. Causes modification to already "built" messages that should be immutable.
* Refactoring java full runtime to reuse sub-message builders and prepare to migrate parsing logic from parse constructor to builder.
* Fix Java reflection serialization of empty packed fields.
Python Python
* Changes ordering of printed fields in .pyi files from lexicographic to the same ordering found in the proto descriptor. * Changes ordering of printed fields in .pyi files from lexicographic to the same ordering found in the proto descriptor.

@ -47,7 +47,7 @@ include(${protobuf_SOURCE_DIR}/src/file_lists.cmake)
set(protobuf_HEADERS set(protobuf_HEADERS
${libprotobuf_hdrs} ${libprotobuf_hdrs}
${libprotoc_hdrs} ${libprotoc_hdrs}
${wkt_protos_files} ${wkt_protos_proto_srcs}
${descriptor_proto_proto_srcs} ${descriptor_proto_proto_srcs}
${plugin_proto_proto_srcs} ${plugin_proto_proto_srcs}
) )

@ -51,6 +51,7 @@ import com.google.protobuf.UnittestLite.TestAllTypesLite.RepeatedGroup;
import com.google.protobuf.UnittestLite.TestAllTypesLiteOrBuilder; import com.google.protobuf.UnittestLite.TestAllTypesLiteOrBuilder;
import com.google.protobuf.UnittestLite.TestHugeFieldNumbersLite; import com.google.protobuf.UnittestLite.TestHugeFieldNumbersLite;
import com.google.protobuf.UnittestLite.TestNestedExtensionLite; import com.google.protobuf.UnittestLite.TestNestedExtensionLite;
import com.google.protobuf.testing.Proto3TestingLite.Proto3MessageLite;
import map_lite_test.MapTestProto.TestMap; import map_lite_test.MapTestProto.TestMap;
import map_lite_test.MapTestProto.TestMap.MessageValue; import map_lite_test.MapTestProto.TestMap.MessageValue;
import protobuf_unittest.NestedExtensionLite; import protobuf_unittest.NestedExtensionLite;
@ -215,6 +216,22 @@ public class LiteTest {
assertThat(((Byte) memo.get(message)).intValue()).isEqualTo(1); assertThat(((Byte) memo.get(message)).intValue()).isEqualTo(1);
} }
@Test
public void testProto3EnumListValueCopyOnWrite() {
Proto3MessageLite.Builder builder = Proto3MessageLite.newBuilder();
Proto3MessageLite message = builder.build();
builder.addFieldEnumList30Value(Proto3MessageLite.TestEnum.ONE_VALUE);
assertThat(message.getFieldEnumList30List()).isEmpty();
assertThat(builder.getFieldEnumList30List()).containsExactly(Proto3MessageLite.TestEnum.ONE);
assertThat(message.getFieldEnumList30List()).isEmpty();
Proto3MessageLite messageAfterBuild = builder.build();
builder.clearFieldEnumList30();
assertThat(builder.getFieldEnumList30List()).isEmpty();
assertThat(messageAfterBuild.getFieldEnumList30List())
.containsExactly(Proto3MessageLite.TestEnum.ONE);
}
@Test @Test
public void testSanityCopyOnWrite() throws InvalidProtocolBufferException { public void testSanityCopyOnWrite() throws InvalidProtocolBufferException {
// Since builders are implemented as a thin wrapper around a message // Since builders are implemented as a thin wrapper around a message

@ -557,7 +557,7 @@ class _Printer(object):
# For groups, use the capitalized name. # For groups, use the capitalized name.
out.write(field.message_type.name) out.write(field.message_type.name)
else: else:
out.write(field.name) out.write(field.name)
if (self.force_colon or if (self.force_colon or
field.cpp_type != descriptor.FieldDescriptor.CPPTYPE_MESSAGE): field.cpp_type != descriptor.FieldDescriptor.CPPTYPE_MESSAGE):
@ -923,8 +923,6 @@ class _Parser(object):
# pylint: disable=protected-access # pylint: disable=protected-access
field = message.Extensions._FindExtensionByName(name) field = message.Extensions._FindExtensionByName(name)
# pylint: enable=protected-access # pylint: enable=protected-access
if not field: if not field:
if self.allow_unknown_extension: if self.allow_unknown_extension:
field = None field = None
@ -1013,7 +1011,6 @@ class _Parser(object):
if not tokenizer.TryConsume(','): if not tokenizer.TryConsume(','):
tokenizer.TryConsume(';') tokenizer.TryConsume(';')
def _LogSilentMarker(self, field_name): def _LogSilentMarker(self, field_name):
pass pass

@ -56,14 +56,14 @@ namespace internal {
namespace { namespace {
PROTOBUF_ATTRIBUTE_NO_DESTROY PROTOBUF_CONSTINIT ArenaBlock PROTOBUF_ATTRIBUTE_NO_DESTROY PROTOBUF_CONSTINIT ArenaBlock
kSentryArenaBlock = {}; kSentryArenaBlock = {};
ArenaBlock* SentryArenaBlock() { ArenaBlock* SentryArenaBlock() {
// const_cast<> is okay as kSentryArenaBlock will never be mutated. // const_cast<> is okay as kSentryArenaBlock will never be mutated.
return const_cast<ArenaBlock*>(&kSentryArenaBlock); return const_cast<ArenaBlock*>(&kSentryArenaBlock);
} }
} } // namespace
static SerialArena::Memory AllocateMemory(const AllocationPolicy* policy_ptr, static SerialArena::Memory AllocateMemory(const AllocationPolicy* policy_ptr,
size_t last_size, size_t min_bytes) { size_t last_size, size_t min_bytes) {

@ -67,6 +67,7 @@
#include "google/protobuf/compiler/cpp/padding_optimizer.h" #include "google/protobuf/compiler/cpp/padding_optimizer.h"
#include "google/protobuf/compiler/cpp/parse_function_generator.h" #include "google/protobuf/compiler/cpp/parse_function_generator.h"
#include "google/protobuf/descriptor.pb.h" #include "google/protobuf/descriptor.pb.h"
#include "google/protobuf/generated_message_tctable_gen.h"
// Must be included last. // Must be included last.
@ -473,8 +474,7 @@ void ColdChunkSkipper::OnStartChunk(int chunk, int cached_has_word_index,
} }
// Emit has_bit check for each has_bit_dword index. // Emit has_bit check for each has_bit_dword index.
format("if (PROTOBUF_PREDICT_FALSE("); format("if (PROTOBUF_PREDICT_FALSE(false");
int first_word = HasbitWord(chunk, 0);
while (chunk < limit_chunk_) { while (chunk < limit_chunk_) {
uint32_t mask = 0; uint32_t mask = 0;
int this_word = HasbitWord(chunk, 0); int this_word = HasbitWord(chunk, 0);
@ -488,9 +488,7 @@ void ColdChunkSkipper::OnStartChunk(int chunk, int cached_has_word_index,
} }
} }
if (this_word != first_word) { format(" ||\n ");
format(" ||\n ");
}
format.Set("mask", absl::Hex(mask, absl::kZeroPad8)); format.Set("mask", absl::Hex(mask, absl::kZeroPad8));
if (this_word == cached_has_word_index) { if (this_word == cached_has_word_index) {
format("(cached_has_bits & 0x$mask$u) != 0"); format("(cached_has_bits & 0x$mask$u) != 0");
@ -669,13 +667,46 @@ MessageGenerator::MessageGenerator(
message_layout_helper_->OptimizeLayout(&optimized_order_, options_, message_layout_helper_->OptimizeLayout(&optimized_order_, options_,
scc_analyzer_); scc_analyzer_);
// This message has hasbits iff one or more fields need one. // Allocate has_bit_indices_ iff the message has a field that needs hasbits.
int has_bit_fields = 0;
for (auto field : optimized_order_) { for (auto field : optimized_order_) {
if (HasHasbit(field)) { if (HasHasbit(field)) {
++has_bit_fields;
if (has_bit_indices_.empty()) { if (has_bit_indices_.empty()) {
has_bit_indices_.resize(descriptor_->field_count(), kNoHasbit); has_bit_indices_.resize(descriptor_->field_count(), kNoHasbit);
} }
has_bit_indices_[field->index()] = max_has_bit_index_++; }
}
// For messages with more than 32 has bits, do a first pass which only
// allocates has bits to fields that might be in the fast table.
bool allocated_fast[32] = {};
if (has_bit_fields > 32) {
for (const auto* field : optimized_order_) {
if (HasHasbit(field)) {
if (!IsDescriptorEligibleForFastParsing(field)) continue;
// A fast-table with > 16 entries ends up incorporating
// the high-bit for VarInt encoding into the index for the table,
// so we have to incorporate it here also, when computing the
// index likely to be used for that table.
int fast_index = field->number();
if (fast_index >= 16) fast_index = 16 + (fast_index % 16);
GOOGLE_CHECK_GE(fast_index, 1);
GOOGLE_CHECK_LT(fast_index, 32);
if (allocated_fast[fast_index]) continue;
allocated_fast[fast_index] = true;
has_bit_indices_[field->index()] = max_has_bit_index_++;
}
}
}
// Then do a second pass which allocates all remaining has bits
for (const auto* field : optimized_order_) {
if (HasHasbit(field)) {
if (has_bit_indices_[field->index()] == kNoHasbit) {
has_bit_indices_[field->index()] = max_has_bit_index_++;
}
} }
if (IsStringInlined(field, options_)) { if (IsStringInlined(field, options_)) {
if (inlined_string_indices_.empty()) { if (inlined_string_indices_.empty()) {
@ -687,6 +718,7 @@ MessageGenerator::MessageGenerator(
inlined_string_indices_[field->index()] = max_inlined_string_index_++; inlined_string_indices_[field->index()] = max_inlined_string_index_++;
} }
} }
GOOGLE_CHECK_EQ(max_has_bit_index_, has_bit_fields);
if (!has_bit_indices_.empty()) { if (!has_bit_indices_.empty()) {
field_generators_.SetHasBitIndices(has_bit_indices_); field_generators_.SetHasBitIndices(has_bit_indices_);

@ -850,6 +850,7 @@ void RepeatedImmutableEnumFieldLiteGenerator::GenerateBuilderMembers(
printer->Print(variables_, printer->Print(variables_,
"$deprecation$public Builder " "$deprecation$public Builder "
"${$add$capitalized_name$Value$}$(int value) {\n" "${$add$capitalized_name$Value$}$(int value) {\n"
" copyOnWrite();\n"
" instance.add$capitalized_name$Value(value);\n" " instance.add$capitalized_name$Value(value);\n"
" return this;\n" " return this;\n"
"}\n"); "}\n");

@ -179,30 +179,21 @@ bool IsFieldEligibleForFastParsing(
const TailCallTableInfo::FieldEntryInfo& entry, const TailCallTableInfo::FieldEntryInfo& entry,
const TailCallTableInfo::OptionProvider& option_provider) { const TailCallTableInfo::OptionProvider& option_provider) {
const auto* field = entry.field; const auto* field = entry.field;
// Conditions which exclude a field from being handled by fast parsing
// regardless of options should be considered in IsExcludedFromFastParsing.
if (!IsDescriptorEligibleForFastParsing(field)) return false;
const auto options = option_provider.GetForField(field); const auto options = option_provider.GetForField(field);
// Map, oneof, weak, and lazy fields are not handled on the fast path. // Weak, lazy, and split fields are not handled on the fast path.
if (field->is_map() || field->real_containing_oneof() || if (options.is_implicitly_weak) return false;
field->options().weak() || options.is_implicitly_weak || if (options.is_lazy) return false;
options.is_lazy || options.should_split) { if (options.should_split) return false;
return false;
}
// We will check for a valid auxiliary index range later. However, we might // We will check for a valid auxiliary index range later. However, we might
// want to change the value we check for inlined string fields. // want to change the value we check for inlined string fields.
int aux_idx = entry.aux_idx; int aux_idx = entry.aux_idx;
switch (field->type()) { switch (field->type()) {
case FieldDescriptor::TYPE_ENUM:
// If enum values are not validated at parse time, then this field can be
// handled on the fast path like an int32.
if (cpp::HasPreservingUnknownEnumSemantics(field)) {
break;
}
if (field->is_repeated() && field->is_packed()) {
return false;
}
break;
// Some bytes fields can be handled on fast path. // Some bytes fields can be handled on fast path.
case FieldDescriptor::TYPE_STRING: case FieldDescriptor::TYPE_STRING:
case FieldDescriptor::TYPE_BYTES: case FieldDescriptor::TYPE_BYTES:
@ -235,13 +226,6 @@ bool IsFieldEligibleForFastParsing(
return false; return false;
} }
// The largest tag that can be read by the tailcall parser is two bytes
// when varint-coded. This allows 14 bits for the numeric tag value:
// byte 0 byte 1
// 1nnnnttt 0nnnnnnn
// ^^^^^^^ ^^^^^^^
if (field->number() >= 1 << 11) return false;
return true; return true;
} }

@ -154,6 +154,49 @@ struct PROTOBUF_EXPORT TailCallTableInfo {
bool use_generated_fallback; bool use_generated_fallback;
}; };
// A quick check to see if a field can be placed into the fast-table.
// This is meant to be fast but not exhaustive; options that apply to some
// fields may also exclude them.
inline bool IsDescriptorEligibleForFastParsing(const FieldDescriptor* field) {
// Map, oneof, weak, and lazy fields are not handled on the fast path.
if (field->is_map()) return false;
if (field->real_containing_oneof()) return false;
if (field->options().weak()) return false;
switch (field->type()) {
case FieldDescriptor::TYPE_ENUM:
// If enum values are not validated at parse time, then this field can be
// handled on the fast path like an int32.
if (cpp::HasPreservingUnknownEnumSemantics(field)) {
break;
}
if (field->is_repeated() && field->is_packed()) {
return false;
}
break;
// Some bytes fields can be handled on fast path.
case FieldDescriptor::TYPE_STRING:
case FieldDescriptor::TYPE_BYTES:
if (field->options().ctype() != FieldOptions::STRING) {
return false;
}
break;
default:
break;
}
// The largest tag that can be read by the tailcall parser is two bytes
// when varint-coded. This allows 14 bits for the numeric tag value:
// byte 0 byte 1
// 1nnnnttt 0nnnnnnn
// ^^^^^^^ ^^^^^^^
if (field->number() >= 1 << 11) return false;
return true;
}
} // namespace internal } // namespace internal
} // namespace protobuf } // namespace protobuf
} // namespace google } // namespace google

@ -345,10 +345,15 @@ class PROTOBUF_EXPORT TcParser final {
// Manually unrolled and specialized Varint parsing. // Manually unrolled and specialized Varint parsing.
template <typename FieldType, int data_offset, int hasbit_idx> template <typename FieldType, int data_offset, int hasbit_idx>
static const char* SpecializedUnrolledVImpl1(PROTOBUF_TC_PARAM_DECL); static const char* SpecializedUnrolledVImpl1(PROTOBUF_TC_PARAM_DECL);
template <int data_offset, int hasbit_idx>
static const char* SpecializedFastV8S1(PROTOBUF_TC_PARAM_DECL);
template <typename FieldType, int data_offset, int hasbit_idx> template <typename FieldType, int data_offset, int hasbit_idx>
static constexpr TailCallParseFunc SingularVarintNoZag1() { static constexpr TailCallParseFunc SingularVarintNoZag1() {
if (data_offset < 100) { if (data_offset < 100) {
if (sizeof(FieldType) == 1) {
return &SpecializedFastV8S1<data_offset, hasbit_idx>;
}
return &SpecializedUnrolledVImpl1<FieldType, data_offset, hasbit_idx>; return &SpecializedUnrolledVImpl1<FieldType, data_offset, hasbit_idx>;
} else if (sizeof(FieldType) == 1) { } else if (sizeof(FieldType) == 1) {
return &FastV8S1; return &FastV8S1;
@ -625,6 +630,55 @@ class PROTOBUF_EXPORT TcParser final {
static const char* MpMap(PROTOBUF_TC_PARAM_DECL); static const char* MpMap(PROTOBUF_TC_PARAM_DECL);
}; };
// Notes:
// 1) if data_offset is negative, it's read from data.offset()
// 2) if hasbit_idx is negative, it's read from data.hasbit_idx()
template <int data_offset, int hasbit_idx>
const char* TcParser::SpecializedFastV8S1(PROTOBUF_TC_PARAM_DECL) {
using TagType = uint8_t;
// Special case for a varint bool field with a tag of 1 byte:
// The coded_tag() field will actually contain the value too and we can check
// both at the same time.
auto coded_tag = data.coded_tag<uint16_t>();
if (PROTOBUF_PREDICT_TRUE(coded_tag == 0x0000 || coded_tag == 0x0100)) {
auto& field =
RefAt<bool>(msg, data_offset >= 0 ? data_offset : data.offset());
// Note: we use `data.data` because Clang generates suboptimal code when
// using coded_tag.
// In x86_64 this uses the CH register to read the second byte out of
// `data`.
uint8_t value = data.data >> 8;
// The assume allows using a mov instead of test+setne.
PROTOBUF_ASSUME(value <= 1);
field = static_cast<bool>(value);
ptr += sizeof(TagType) + 1; // Consume the tag and the value.
if (hasbit_idx < 0) {
hasbits |= (uint64_t{1} << data.hasbit_idx());
} else {
if (hasbit_idx < 32) {
hasbits |= (uint64_t{1} << hasbit_idx);
} else {
static_assert(hasbit_idx == 63 || (hasbit_idx < 32),
"hard-coded hasbit_idx should be 0-31, or the special"
"value 63, which indicates the field has no has-bit.");
// TODO(jorg): investigate whether higher hasbit indices are worth
// supporting. Something like:
// auto& hasblock = TcParser::RefAt<uint32_t>(msg, hasbit_idx / 32 * 4);
// hasblock |= uint32_t{1} << (hasbit_idx % 32);
}
}
PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_PASS);
}
// If it didn't match above either the tag is wrong, or the value is encoded
// non-canonically.
// Jump to MiniParse as wrong tag is the most probable reason.
PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_PASS);
}
template <typename FieldType, int data_offset, int hasbit_idx> template <typename FieldType, int data_offset, int hasbit_idx>
const char* TcParser::SpecializedUnrolledVImpl1(PROTOBUF_TC_PARAM_DECL) { const char* TcParser::SpecializedUnrolledVImpl1(PROTOBUF_TC_PARAM_DECL) {
using TagType = uint8_t; using TagType = uint8_t;

@ -850,31 +850,7 @@ PROTOBUF_NOINLINE const char* TcParser::SingularVarBigint(
} }
const char* TcParser::FastV8S1(PROTOBUF_TC_PARAM_DECL) { const char* TcParser::FastV8S1(PROTOBUF_TC_PARAM_DECL) {
// Special case for a varint bool field with a tag of 1 byte: return SpecializedFastV8S1<-1, -1>(PROTOBUF_TC_PARAM_PASS);
// The coded_tag() field will actually contain the value too and we can check
// both at the same time.
auto coded_tag = data.coded_tag<uint16_t>();
if (PROTOBUF_PREDICT_TRUE(coded_tag == 0x0000 || coded_tag == 0x0100)) {
auto& field = RefAt<bool>(msg, data.offset());
// Note: we use `data.data` because Clang generates suboptimal code when
// using coded_tag.
// In x86_64 this uses the CH register to read the second byte out of
// `data`.
uint8_t value = data.data >> 8;
// The assume allows using a mov instead of test+setne.
PROTOBUF_ASSUME(value <= 1);
field = static_cast<bool>(value);
ptr += 2; // Consume the tag and the value.
hasbits |= (uint64_t{1} << data.hasbit_idx());
PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_PASS);
}
// If it didn't match above either the tag is wrong, or the value is encoded
// non-canonically.
// Jump to MiniParse as wrong tag is the most probable reason.
PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_PASS);
} }
const char* TcParser::FastV8S2(PROTOBUF_TC_PARAM_DECL) { const char* TcParser::FastV8S2(PROTOBUF_TC_PARAM_DECL) {
PROTOBUF_MUSTTAIL return SingularVarint<bool, uint16_t>( PROTOBUF_MUSTTAIL return SingularVarint<bool, uint16_t>(

Loading…
Cancel
Save