Merge tag 'refs/tags/sync-piper' into sync-stage

# Conflicts:
#	src/google/protobuf/arena.cc
#	src/google/protobuf/compiler/java/enum_field_lite.cc
pull/10653/head
Mike Kruskal 2 years ago
commit 38da0b3d42
  1. 2
      cmake/install.cmake
  2. 17
      java/lite/src/test/java/com/google/protobuf/LiteTest.java
  3. 5
      python/google/protobuf/text_format.py
  4. 4
      src/google/protobuf/arena.cc
  5. 46
      src/google/protobuf/compiler/cpp/message.cc
  6. 1
      src/google/protobuf/compiler/java/enum_field_lite.cc
  7. 32
      src/google/protobuf/generated_message_tctable_gen.cc
  8. 43
      src/google/protobuf/generated_message_tctable_gen.h
  9. 54
      src/google/protobuf/generated_message_tctable_impl.h
  10. 26
      src/google/protobuf/generated_message_tctable_lite.cc

@ -47,7 +47,7 @@ include(${protobuf_SOURCE_DIR}/src/file_lists.cmake)
set(protobuf_HEADERS
${libprotobuf_hdrs}
${libprotoc_hdrs}
${wkt_protos_files}
${wkt_protos_proto_srcs}
${descriptor_proto_proto_srcs}
${plugin_proto_proto_srcs}
)

@ -51,6 +51,7 @@ import com.google.protobuf.UnittestLite.TestAllTypesLite.RepeatedGroup;
import com.google.protobuf.UnittestLite.TestAllTypesLiteOrBuilder;
import com.google.protobuf.UnittestLite.TestHugeFieldNumbersLite;
import com.google.protobuf.UnittestLite.TestNestedExtensionLite;
import com.google.protobuf.testing.Proto3TestingLite.Proto3MessageLite;
import map_lite_test.MapTestProto.TestMap;
import map_lite_test.MapTestProto.TestMap.MessageValue;
import protobuf_unittest.NestedExtensionLite;
@ -215,6 +216,22 @@ public class LiteTest {
assertThat(((Byte) memo.get(message)).intValue()).isEqualTo(1);
}
@Test
public void testProto3EnumListValueCopyOnWrite() {
Proto3MessageLite.Builder builder = Proto3MessageLite.newBuilder();
Proto3MessageLite message = builder.build();
builder.addFieldEnumList30Value(Proto3MessageLite.TestEnum.ONE_VALUE);
assertThat(message.getFieldEnumList30List()).isEmpty();
assertThat(builder.getFieldEnumList30List()).containsExactly(Proto3MessageLite.TestEnum.ONE);
assertThat(message.getFieldEnumList30List()).isEmpty();
Proto3MessageLite messageAfterBuild = builder.build();
builder.clearFieldEnumList30();
assertThat(builder.getFieldEnumList30List()).isEmpty();
assertThat(messageAfterBuild.getFieldEnumList30List())
.containsExactly(Proto3MessageLite.TestEnum.ONE);
}
@Test
public void testSanityCopyOnWrite() throws InvalidProtocolBufferException {
// Since builders are implemented as a thin wrapper around a message

@ -557,7 +557,7 @@ class _Printer(object):
# For groups, use the capitalized name.
out.write(field.message_type.name)
else:
out.write(field.name)
out.write(field.name)
if (self.force_colon or
field.cpp_type != descriptor.FieldDescriptor.CPPTYPE_MESSAGE):
@ -923,8 +923,6 @@ class _Parser(object):
# pylint: disable=protected-access
field = message.Extensions._FindExtensionByName(name)
# pylint: enable=protected-access
if not field:
if self.allow_unknown_extension:
field = None
@ -1013,7 +1011,6 @@ class _Parser(object):
if not tokenizer.TryConsume(','):
tokenizer.TryConsume(';')
def _LogSilentMarker(self, field_name):
pass

@ -56,14 +56,14 @@ namespace internal {
namespace {
PROTOBUF_ATTRIBUTE_NO_DESTROY PROTOBUF_CONSTINIT ArenaBlock
kSentryArenaBlock = {};
kSentryArenaBlock = {};
ArenaBlock* SentryArenaBlock() {
// const_cast<> is okay as kSentryArenaBlock will never be mutated.
return const_cast<ArenaBlock*>(&kSentryArenaBlock);
}
}
} // namespace
static SerialArena::Memory AllocateMemory(const AllocationPolicy* policy_ptr,
size_t last_size, size_t min_bytes) {

@ -67,6 +67,7 @@
#include "google/protobuf/compiler/cpp/padding_optimizer.h"
#include "google/protobuf/compiler/cpp/parse_function_generator.h"
#include "google/protobuf/descriptor.pb.h"
#include "google/protobuf/generated_message_tctable_gen.h"
// Must be included last.
@ -473,8 +474,7 @@ void ColdChunkSkipper::OnStartChunk(int chunk, int cached_has_word_index,
}
// Emit has_bit check for each has_bit_dword index.
format("if (PROTOBUF_PREDICT_FALSE(");
int first_word = HasbitWord(chunk, 0);
format("if (PROTOBUF_PREDICT_FALSE(false");
while (chunk < limit_chunk_) {
uint32_t mask = 0;
int this_word = HasbitWord(chunk, 0);
@ -488,9 +488,7 @@ void ColdChunkSkipper::OnStartChunk(int chunk, int cached_has_word_index,
}
}
if (this_word != first_word) {
format(" ||\n ");
}
format(" ||\n ");
format.Set("mask", absl::Hex(mask, absl::kZeroPad8));
if (this_word == cached_has_word_index) {
format("(cached_has_bits & 0x$mask$u) != 0");
@ -669,13 +667,46 @@ MessageGenerator::MessageGenerator(
message_layout_helper_->OptimizeLayout(&optimized_order_, options_,
scc_analyzer_);
// This message has hasbits iff one or more fields need one.
// Allocate has_bit_indices_ iff the message has a field that needs hasbits.
int has_bit_fields = 0;
for (auto field : optimized_order_) {
if (HasHasbit(field)) {
++has_bit_fields;
if (has_bit_indices_.empty()) {
has_bit_indices_.resize(descriptor_->field_count(), kNoHasbit);
}
has_bit_indices_[field->index()] = max_has_bit_index_++;
}
}
// For messages with more than 32 has bits, do a first pass which only
// allocates has bits to fields that might be in the fast table.
bool allocated_fast[32] = {};
if (has_bit_fields > 32) {
for (const auto* field : optimized_order_) {
if (HasHasbit(field)) {
if (!IsDescriptorEligibleForFastParsing(field)) continue;
// A fast-table with > 16 entries ends up incorporating
// the high-bit for VarInt encoding into the index for the table,
// so we have to incorporate it here also, when computing the
// index likely to be used for that table.
int fast_index = field->number();
if (fast_index >= 16) fast_index = 16 + (fast_index % 16);
GOOGLE_CHECK_GE(fast_index, 1);
GOOGLE_CHECK_LT(fast_index, 32);
if (allocated_fast[fast_index]) continue;
allocated_fast[fast_index] = true;
has_bit_indices_[field->index()] = max_has_bit_index_++;
}
}
}
// Then do a second pass which allocates all remaining has bits
for (const auto* field : optimized_order_) {
if (HasHasbit(field)) {
if (has_bit_indices_[field->index()] == kNoHasbit) {
has_bit_indices_[field->index()] = max_has_bit_index_++;
}
}
if (IsStringInlined(field, options_)) {
if (inlined_string_indices_.empty()) {
@ -687,6 +718,7 @@ MessageGenerator::MessageGenerator(
inlined_string_indices_[field->index()] = max_inlined_string_index_++;
}
}
GOOGLE_CHECK_EQ(max_has_bit_index_, has_bit_fields);
if (!has_bit_indices_.empty()) {
field_generators_.SetHasBitIndices(has_bit_indices_);

@ -850,6 +850,7 @@ void RepeatedImmutableEnumFieldLiteGenerator::GenerateBuilderMembers(
printer->Print(variables_,
"$deprecation$public Builder "
"${$add$capitalized_name$Value$}$(int value) {\n"
" copyOnWrite();\n"
" instance.add$capitalized_name$Value(value);\n"
" return this;\n"
"}\n");

@ -179,30 +179,21 @@ bool IsFieldEligibleForFastParsing(
const TailCallTableInfo::FieldEntryInfo& entry,
const TailCallTableInfo::OptionProvider& option_provider) {
const auto* field = entry.field;
// Conditions which exclude a field from being handled by fast parsing
// regardless of options should be considered in IsExcludedFromFastParsing.
if (!IsDescriptorEligibleForFastParsing(field)) return false;
const auto options = option_provider.GetForField(field);
// Map, oneof, weak, and lazy fields are not handled on the fast path.
if (field->is_map() || field->real_containing_oneof() ||
field->options().weak() || options.is_implicitly_weak ||
options.is_lazy || options.should_split) {
return false;
}
// Weak, lazy, and split fields are not handled on the fast path.
if (options.is_implicitly_weak) return false;
if (options.is_lazy) return false;
if (options.should_split) return false;
// We will check for a valid auxiliary index range later. However, we might
// want to change the value we check for inlined string fields.
int aux_idx = entry.aux_idx;
switch (field->type()) {
case FieldDescriptor::TYPE_ENUM:
// If enum values are not validated at parse time, then this field can be
// handled on the fast path like an int32.
if (cpp::HasPreservingUnknownEnumSemantics(field)) {
break;
}
if (field->is_repeated() && field->is_packed()) {
return false;
}
break;
// Some bytes fields can be handled on fast path.
case FieldDescriptor::TYPE_STRING:
case FieldDescriptor::TYPE_BYTES:
@ -235,13 +226,6 @@ bool IsFieldEligibleForFastParsing(
return false;
}
// The largest tag that can be read by the tailcall parser is two bytes
// when varint-coded. This allows 14 bits for the numeric tag value:
// byte 0 byte 1
// 1nnnnttt 0nnnnnnn
// ^^^^^^^ ^^^^^^^
if (field->number() >= 1 << 11) return false;
return true;
}

@ -154,6 +154,49 @@ struct PROTOBUF_EXPORT TailCallTableInfo {
bool use_generated_fallback;
};
// A quick check to see if a field can be placed into the fast-table.
// This is meant to be fast but not exhaustive; options that apply to some
// fields may also exclude them.
inline bool IsDescriptorEligibleForFastParsing(const FieldDescriptor* field) {
// Map, oneof, weak, and lazy fields are not handled on the fast path.
if (field->is_map()) return false;
if (field->real_containing_oneof()) return false;
if (field->options().weak()) return false;
switch (field->type()) {
case FieldDescriptor::TYPE_ENUM:
// If enum values are not validated at parse time, then this field can be
// handled on the fast path like an int32.
if (cpp::HasPreservingUnknownEnumSemantics(field)) {
break;
}
if (field->is_repeated() && field->is_packed()) {
return false;
}
break;
// Some bytes fields can be handled on fast path.
case FieldDescriptor::TYPE_STRING:
case FieldDescriptor::TYPE_BYTES:
if (field->options().ctype() != FieldOptions::STRING) {
return false;
}
break;
default:
break;
}
// The largest tag that can be read by the tailcall parser is two bytes
// when varint-coded. This allows 14 bits for the numeric tag value:
// byte 0 byte 1
// 1nnnnttt 0nnnnnnn
// ^^^^^^^ ^^^^^^^
if (field->number() >= 1 << 11) return false;
return true;
}
} // namespace internal
} // namespace protobuf
} // namespace google

@ -345,10 +345,15 @@ class PROTOBUF_EXPORT TcParser final {
// Manually unrolled and specialized Varint parsing.
template <typename FieldType, int data_offset, int hasbit_idx>
static const char* SpecializedUnrolledVImpl1(PROTOBUF_TC_PARAM_DECL);
template <int data_offset, int hasbit_idx>
static const char* SpecializedFastV8S1(PROTOBUF_TC_PARAM_DECL);
template <typename FieldType, int data_offset, int hasbit_idx>
static constexpr TailCallParseFunc SingularVarintNoZag1() {
if (data_offset < 100) {
if (sizeof(FieldType) == 1) {
return &SpecializedFastV8S1<data_offset, hasbit_idx>;
}
return &SpecializedUnrolledVImpl1<FieldType, data_offset, hasbit_idx>;
} else if (sizeof(FieldType) == 1) {
return &FastV8S1;
@ -625,6 +630,55 @@ class PROTOBUF_EXPORT TcParser final {
static const char* MpMap(PROTOBUF_TC_PARAM_DECL);
};
// Notes:
// 1) if data_offset is negative, it's read from data.offset()
// 2) if hasbit_idx is negative, it's read from data.hasbit_idx()
template <int data_offset, int hasbit_idx>
const char* TcParser::SpecializedFastV8S1(PROTOBUF_TC_PARAM_DECL) {
using TagType = uint8_t;
// Special case for a varint bool field with a tag of 1 byte:
// The coded_tag() field will actually contain the value too and we can check
// both at the same time.
auto coded_tag = data.coded_tag<uint16_t>();
if (PROTOBUF_PREDICT_TRUE(coded_tag == 0x0000 || coded_tag == 0x0100)) {
auto& field =
RefAt<bool>(msg, data_offset >= 0 ? data_offset : data.offset());
// Note: we use `data.data` because Clang generates suboptimal code when
// using coded_tag.
// In x86_64 this uses the CH register to read the second byte out of
// `data`.
uint8_t value = data.data >> 8;
// The assume allows using a mov instead of test+setne.
PROTOBUF_ASSUME(value <= 1);
field = static_cast<bool>(value);
ptr += sizeof(TagType) + 1; // Consume the tag and the value.
if (hasbit_idx < 0) {
hasbits |= (uint64_t{1} << data.hasbit_idx());
} else {
if (hasbit_idx < 32) {
hasbits |= (uint64_t{1} << hasbit_idx);
} else {
static_assert(hasbit_idx == 63 || (hasbit_idx < 32),
"hard-coded hasbit_idx should be 0-31, or the special"
"value 63, which indicates the field has no has-bit.");
// TODO(jorg): investigate whether higher hasbit indices are worth
// supporting. Something like:
// auto& hasblock = TcParser::RefAt<uint32_t>(msg, hasbit_idx / 32 * 4);
// hasblock |= uint32_t{1} << (hasbit_idx % 32);
}
}
PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_PASS);
}
// If it didn't match above either the tag is wrong, or the value is encoded
// non-canonically.
// Jump to MiniParse as wrong tag is the most probable reason.
PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_PASS);
}
template <typename FieldType, int data_offset, int hasbit_idx>
const char* TcParser::SpecializedUnrolledVImpl1(PROTOBUF_TC_PARAM_DECL) {
using TagType = uint8_t;

@ -850,31 +850,7 @@ PROTOBUF_NOINLINE const char* TcParser::SingularVarBigint(
}
const char* TcParser::FastV8S1(PROTOBUF_TC_PARAM_DECL) {
// Special case for a varint bool field with a tag of 1 byte:
// The coded_tag() field will actually contain the value too and we can check
// both at the same time.
auto coded_tag = data.coded_tag<uint16_t>();
if (PROTOBUF_PREDICT_TRUE(coded_tag == 0x0000 || coded_tag == 0x0100)) {
auto& field = RefAt<bool>(msg, data.offset());
// Note: we use `data.data` because Clang generates suboptimal code when
// using coded_tag.
// In x86_64 this uses the CH register to read the second byte out of
// `data`.
uint8_t value = data.data >> 8;
// The assume allows using a mov instead of test+setne.
PROTOBUF_ASSUME(value <= 1);
field = static_cast<bool>(value);
ptr += 2; // Consume the tag and the value.
hasbits |= (uint64_t{1} << data.hasbit_idx());
PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_PASS);
}
// If it didn't match above either the tag is wrong, or the value is encoded
// non-canonically.
// Jump to MiniParse as wrong tag is the most probable reason.
PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_PASS);
return SpecializedFastV8S1<-1, -1>(PROTOBUF_TC_PARAM_PASS);
}
const char* TcParser::FastV8S2(PROTOBUF_TC_PARAM_DECL) {
PROTOBUF_MUSTTAIL return SingularVarint<bool, uint16_t>(

Loading…
Cancel
Save