Merge pull request #10653 from mkruskal-google/sync-stage

Integrate from Piper for C++, Java, and Python
2 years ago · 391cc7b355
parent bc4cbf9a05 12bedd20c3
commit 391cc7b355
11 changed files with 170 additions and 63 deletions
--- a/CHANGES.txt
+++ b/CHANGES.txt
@ -39,6 +39,9 @@
    constructed FieldMaskTrees.
  * Optimized Java proto serialization gencode for protos having many extension ranges with few fields in between.
  * More thoroughly annotate public generated code in Java lite protocol buffers.
  * Fixed Bug in proto3 java lite repeated enum fields. Failed to call copyOnWrite before modifying previously built message. Causes modification to already "built" messages that should be immutable.
  * Refactoring java full runtime to reuse sub-message builders and prepare to migrate parsing logic from parse constructor to builder.
  * Fix Java reflection serialization of empty packed fields.
  Python
  * Changes ordering of printed fields in .pyi files from lexicographic to the same ordering found in the proto descriptor.
--- a/cmake/install.cmake
+++ b/cmake/install.cmake
@ -47,7 +47,7 @@ include(${protobuf_SOURCE_DIR}/src/file_lists.cmake)
 set(protobuf_HEADERS
  ${libprotobuf_hdrs}
  ${libprotoc_hdrs}
-  ${wkt_protos_files}
+  ${wkt_protos_proto_srcs}
  ${descriptor_proto_proto_srcs}
  ${plugin_proto_proto_srcs}
 )
--- a/java/lite/src/test/java/com/google/protobuf/LiteTest.java
+++ b/java/lite/src/test/java/com/google/protobuf/LiteTest.java
@ -51,6 +51,7 @@ import com.google.protobuf.UnittestLite.TestAllTypesLite.RepeatedGroup;
 import com.google.protobuf.UnittestLite.TestAllTypesLiteOrBuilder;
 import com.google.protobuf.UnittestLite.TestHugeFieldNumbersLite;
 import com.google.protobuf.UnittestLite.TestNestedExtensionLite;
 import com.google.protobuf.testing.Proto3TestingLite.Proto3MessageLite;
 import map_lite_test.MapTestProto.TestMap;
 import map_lite_test.MapTestProto.TestMap.MessageValue;
 import protobuf_unittest.NestedExtensionLite;
@ -215,6 +216,22 @@ public class LiteTest {
    assertThat(((Byte) memo.get(message)).intValue()).isEqualTo(1);
  }
  @Test
  public void testProto3EnumListValueCopyOnWrite() {
    Proto3MessageLite.Builder builder = Proto3MessageLite.newBuilder();
    Proto3MessageLite message = builder.build();
    builder.addFieldEnumList30Value(Proto3MessageLite.TestEnum.ONE_VALUE);
    assertThat(message.getFieldEnumList30List()).isEmpty();
    assertThat(builder.getFieldEnumList30List()).containsExactly(Proto3MessageLite.TestEnum.ONE);
    assertThat(message.getFieldEnumList30List()).isEmpty();
    Proto3MessageLite messageAfterBuild = builder.build();
    builder.clearFieldEnumList30();
    assertThat(builder.getFieldEnumList30List()).isEmpty();
    assertThat(messageAfterBuild.getFieldEnumList30List())
        .containsExactly(Proto3MessageLite.TestEnum.ONE);
  }
  @Test
  public void testSanityCopyOnWrite() throws InvalidProtocolBufferException {
    // Since builders are implemented as a thin wrapper around a message
--- a/python/google/protobuf/text_format.py
+++ b/python/google/protobuf/text_format.py
@ -557,7 +557,7 @@ class _Printer(object):
        # For groups, use the capitalized name.
        out.write(field.message_type.name)
      else:
-          out.write(field.name)
+        out.write(field.name)
    if (self.force_colon or
        field.cpp_type != descriptor.FieldDescriptor.CPPTYPE_MESSAGE):
@ -923,8 +923,6 @@ class _Parser(object):
      # pylint: disable=protected-access
      field = message.Extensions._FindExtensionByName(name)
      # pylint: enable=protected-access
      if not field:
        if self.allow_unknown_extension:
          field = None
@ -1013,7 +1011,6 @@ class _Parser(object):
    if not tokenizer.TryConsume(','):
      tokenizer.TryConsume(';')
  def _LogSilentMarker(self, field_name):
    pass
--- a/src/google/protobuf/arena.cc
+++ b/src/google/protobuf/arena.cc
@ -56,14 +56,14 @@ namespace internal {
 namespace {
 PROTOBUF_ATTRIBUTE_NO_DESTROY PROTOBUF_CONSTINIT ArenaBlock
-kSentryArenaBlock = {};
+    kSentryArenaBlock = {};
 ArenaBlock* SentryArenaBlock() {
  // const_cast<> is okay as kSentryArenaBlock will never be mutated.
  return const_cast<ArenaBlock*>(&kSentryArenaBlock);
 }
-}
+}  // namespace
 static SerialArena::Memory AllocateMemory(const AllocationPolicy* policy_ptr,
                                          size_t last_size, size_t min_bytes) {
--- a/src/google/protobuf/compiler/cpp/message.cc
+++ b/src/google/protobuf/compiler/cpp/message.cc
@ -67,6 +67,7 @@
 #include "google/protobuf/compiler/cpp/padding_optimizer.h"
 #include "google/protobuf/compiler/cpp/parse_function_generator.h"
 #include "google/protobuf/descriptor.pb.h"
 #include "google/protobuf/generated_message_tctable_gen.h"
 // Must be included last.
@ -473,8 +474,7 @@ void ColdChunkSkipper::OnStartChunk(int chunk, int cached_has_word_index,
  }
  // Emit has_bit check for each has_bit_dword index.
-  format("if (PROTOBUF_PREDICT_FALSE(");
+  format("if (PROTOBUF_PREDICT_FALSE(false");
  int first_word = HasbitWord(chunk, 0);
  while (chunk < limit_chunk_) {
    uint32_t mask = 0;
    int this_word = HasbitWord(chunk, 0);
@ -488,9 +488,7 @@ void ColdChunkSkipper::OnStartChunk(int chunk, int cached_has_word_index,
      }
    }
-    if (this_word != first_word) {
+    format(" ||\n    ");
      format(" ||\n    ");
    }
    format.Set("mask", absl::Hex(mask, absl::kZeroPad8));
    if (this_word == cached_has_word_index) {
      format("(cached_has_bits & 0x$mask$u) != 0");
@ -669,13 +667,46 @@ MessageGenerator::MessageGenerator(
  message_layout_helper_->OptimizeLayout(&optimized_order_, options_,
                                         scc_analyzer_);
-  // This message has hasbits iff one or more fields need one.
+  // Allocate has_bit_indices_ iff the message has a field that needs hasbits.
  int has_bit_fields = 0;
  for (auto field : optimized_order_) {
    if (HasHasbit(field)) {
      ++has_bit_fields;
      if (has_bit_indices_.empty()) {
        has_bit_indices_.resize(descriptor_->field_count(), kNoHasbit);
      }
-      has_bit_indices_[field->index()] = max_has_bit_index_++;
+    }
  }
  // For messages with more than 32 has bits, do a first pass which only
  // allocates has bits to fields that might be in the fast table.
  bool allocated_fast[32] = {};
  if (has_bit_fields > 32) {
    for (const auto* field : optimized_order_) {
      if (HasHasbit(field)) {
        if (!IsDescriptorEligibleForFastParsing(field)) continue;
        // A fast-table with > 16 entries ends up incorporating
        // the high-bit for VarInt encoding into the index for the table,
        // so we have to incorporate it here also, when computing the
        // index likely to be used for that table.
        int fast_index = field->number();
        if (fast_index >= 16) fast_index = 16 + (fast_index % 16);
        GOOGLE_CHECK_GE(fast_index, 1);
        GOOGLE_CHECK_LT(fast_index, 32);
        if (allocated_fast[fast_index]) continue;
        allocated_fast[fast_index] = true;
        has_bit_indices_[field->index()] = max_has_bit_index_++;
      }
    }
  }
  // Then do a second pass which allocates all remaining has bits
  for (const auto* field : optimized_order_) {
    if (HasHasbit(field)) {
      if (has_bit_indices_[field->index()] == kNoHasbit) {
        has_bit_indices_[field->index()] = max_has_bit_index_++;
      }
    }
    if (IsStringInlined(field, options_)) {
      if (inlined_string_indices_.empty()) {
@ -687,6 +718,7 @@ MessageGenerator::MessageGenerator(
      inlined_string_indices_[field->index()] = max_inlined_string_index_++;
    }
  }
  GOOGLE_CHECK_EQ(max_has_bit_index_, has_bit_fields);
  if (!has_bit_indices_.empty()) {
    field_generators_.SetHasBitIndices(has_bit_indices_);
--- a/src/google/protobuf/compiler/java/enum_field_lite.cc
+++ b/src/google/protobuf/compiler/java/enum_field_lite.cc
@ -850,6 +850,7 @@ void RepeatedImmutableEnumFieldLiteGenerator::GenerateBuilderMembers(
    printer->Print(variables_,
                   "$deprecation$public Builder "
                   "${$add$capitalized_name$Value$}$(int value) {\n"
                   "  copyOnWrite();\n"
                   "  instance.add$capitalized_name$Value(value);\n"
                   "  return this;\n"
                   "}\n");
--- a/src/google/protobuf/generated_message_tctable_gen.cc
+++ b/src/google/protobuf/generated_message_tctable_gen.cc
@ -179,30 +179,21 @@ bool IsFieldEligibleForFastParsing(
    const TailCallTableInfo::FieldEntryInfo& entry,
    const TailCallTableInfo::OptionProvider& option_provider) {
  const auto* field = entry.field;
  // Conditions which exclude a field from being handled by fast parsing
  // regardless of options should be considered in IsExcludedFromFastParsing.
  if (!IsDescriptorEligibleForFastParsing(field)) return false;
  const auto options = option_provider.GetForField(field);
-  // Map, oneof, weak, and lazy fields are not handled on the fast path.
+  // Weak, lazy, and split fields are not handled on the fast path.
-  if (field->is_map() || field->real_containing_oneof() ||
+  if (options.is_implicitly_weak) return false;
-      field->options().weak() || options.is_implicitly_weak ||
+  if (options.is_lazy) return false;
-      options.is_lazy || options.should_split) {
+  if (options.should_split) return false;
    return false;
  }
  // We will check for a valid auxiliary index range later. However, we might
  // want to change the value we check for inlined string fields.
  int aux_idx = entry.aux_idx;
  switch (field->type()) {
    case FieldDescriptor::TYPE_ENUM:
      // If enum values are not validated at parse time, then this field can be
      // handled on the fast path like an int32.
      if (cpp::HasPreservingUnknownEnumSemantics(field)) {
        break;
      }
      if (field->is_repeated() && field->is_packed()) {
        return false;
      }
      break;
      // Some bytes fields can be handled on fast path.
    case FieldDescriptor::TYPE_STRING:
    case FieldDescriptor::TYPE_BYTES:
@ -235,13 +226,6 @@ bool IsFieldEligibleForFastParsing(
    return false;
  }
  // The largest tag that can be read by the tailcall parser is two bytes
  // when varint-coded. This allows 14 bits for the numeric tag value:
  //   byte 0   byte 1
  //   1nnnnttt 0nnnnnnn
  //    ^^^^^^^  ^^^^^^^
  if (field->number() >= 1 << 11) return false;
  return true;
 }
--- a/src/google/protobuf/generated_message_tctable_gen.h
+++ b/src/google/protobuf/generated_message_tctable_gen.h
@ -154,6 +154,49 @@ struct PROTOBUF_EXPORT TailCallTableInfo {
  bool use_generated_fallback;
 };
 // A quick check to see if a field can be placed into the fast-table.
 // This is meant to be fast but not exhaustive; options that apply to some
 // fields may also exclude them.
 inline bool IsDescriptorEligibleForFastParsing(const FieldDescriptor* field) {
  // Map, oneof, weak, and lazy fields are not handled on the fast path.
  if (field->is_map()) return false;
  if (field->real_containing_oneof()) return false;
  if (field->options().weak()) return false;
  switch (field->type()) {
    case FieldDescriptor::TYPE_ENUM:
      // If enum values are not validated at parse time, then this field can be
      // handled on the fast path like an int32.
      if (cpp::HasPreservingUnknownEnumSemantics(field)) {
        break;
      }
      if (field->is_repeated() && field->is_packed()) {
        return false;
      }
      break;
      // Some bytes fields can be handled on fast path.
    case FieldDescriptor::TYPE_STRING:
    case FieldDescriptor::TYPE_BYTES:
      if (field->options().ctype() != FieldOptions::STRING) {
        return false;
      }
      break;
    default:
      break;
  }
  // The largest tag that can be read by the tailcall parser is two bytes
  // when varint-coded. This allows 14 bits for the numeric tag value:
  //   byte 0   byte 1
  //   1nnnnttt 0nnnnnnn
  //    ^^^^^^^  ^^^^^^^
  if (field->number() >= 1 << 11) return false;
  return true;
 }
 }  // namespace internal
 }  // namespace protobuf
 }  // namespace google
--- a/src/google/protobuf/generated_message_tctable_impl.h
+++ b/src/google/protobuf/generated_message_tctable_impl.h
@ -345,10 +345,15 @@ class PROTOBUF_EXPORT TcParser final {
  // Manually unrolled and specialized Varint parsing.
  template <typename FieldType, int data_offset, int hasbit_idx>
  static const char* SpecializedUnrolledVImpl1(PROTOBUF_TC_PARAM_DECL);
  template <int data_offset, int hasbit_idx>
  static const char* SpecializedFastV8S1(PROTOBUF_TC_PARAM_DECL);
  template <typename FieldType, int data_offset, int hasbit_idx>
  static constexpr TailCallParseFunc SingularVarintNoZag1() {
    if (data_offset < 100) {
      if (sizeof(FieldType) == 1) {
        return &SpecializedFastV8S1<data_offset, hasbit_idx>;
      }
      return &SpecializedUnrolledVImpl1<FieldType, data_offset, hasbit_idx>;
    } else if (sizeof(FieldType) == 1) {
      return &FastV8S1;
@ -625,6 +630,55 @@ class PROTOBUF_EXPORT TcParser final {
  static const char* MpMap(PROTOBUF_TC_PARAM_DECL);
 };
 // Notes:
 // 1) if data_offset is negative, it's read from data.offset()
 // 2) if hasbit_idx is negative, it's read from data.hasbit_idx()
 template <int data_offset, int hasbit_idx>
 const char* TcParser::SpecializedFastV8S1(PROTOBUF_TC_PARAM_DECL) {
  using TagType = uint8_t;
  // Special case for a varint bool field with a tag of 1 byte:
  // The coded_tag() field will actually contain the value too and we can check
  // both at the same time.
  auto coded_tag = data.coded_tag<uint16_t>();
  if (PROTOBUF_PREDICT_TRUE(coded_tag == 0x0000 || coded_tag == 0x0100)) {
    auto& field =
        RefAt<bool>(msg, data_offset >= 0 ? data_offset : data.offset());
    // Note: we use `data.data` because Clang generates suboptimal code when
    // using coded_tag.
    // In x86_64 this uses the CH register to read the second byte out of
    // `data`.
    uint8_t value = data.data >> 8;
    // The assume allows using a mov instead of test+setne.
    PROTOBUF_ASSUME(value <= 1);
    field = static_cast<bool>(value);
    ptr += sizeof(TagType) + 1;  // Consume the tag and the value.
    if (hasbit_idx < 0) {
      hasbits |= (uint64_t{1} << data.hasbit_idx());
    } else {
      if (hasbit_idx < 32) {
        hasbits |= (uint64_t{1} << hasbit_idx);
      } else {
        static_assert(hasbit_idx == 63 || (hasbit_idx < 32),
                      "hard-coded hasbit_idx should be 0-31, or the special"
                      "value 63, which indicates the field has no has-bit.");
        // TODO(jorg): investigate whether higher hasbit indices are worth
        // supporting. Something like:
        // auto& hasblock = TcParser::RefAt<uint32_t>(msg, hasbit_idx / 32 * 4);
        // hasblock |= uint32_t{1} << (hasbit_idx % 32);
      }
    }
    PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_PASS);
  }
  // If it didn't match above either the tag is wrong, or the value is encoded
  // non-canonically.
  // Jump to MiniParse as wrong tag is the most probable reason.
  PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_PASS);
 }
 template <typename FieldType, int data_offset, int hasbit_idx>
 const char* TcParser::SpecializedUnrolledVImpl1(PROTOBUF_TC_PARAM_DECL) {
  using TagType = uint8_t;
--- a/src/google/protobuf/generated_message_tctable_lite.cc
+++ b/src/google/protobuf/generated_message_tctable_lite.cc
@ -850,31 +850,7 @@ PROTOBUF_NOINLINE const char* TcParser::SingularVarBigint(
 }
 const char* TcParser::FastV8S1(PROTOBUF_TC_PARAM_DECL) {
-  // Special case for a varint bool field with a tag of 1 byte:
+  return SpecializedFastV8S1<-1, -1>(PROTOBUF_TC_PARAM_PASS);
  // The coded_tag() field will actually contain the value too and we can check
  // both at the same time.
  auto coded_tag = data.coded_tag<uint16_t>();
  if (PROTOBUF_PREDICT_TRUE(coded_tag == 0x0000 || coded_tag == 0x0100)) {
    auto& field = RefAt<bool>(msg, data.offset());
    // Note: we use `data.data` because Clang generates suboptimal code when
    // using coded_tag.
    // In x86_64 this uses the CH register to read the second byte out of
    // `data`.
    uint8_t value = data.data >> 8;
    // The assume allows using a mov instead of test+setne.
    PROTOBUF_ASSUME(value <= 1);
    field = static_cast<bool>(value);
    ptr += 2;  // Consume the tag and the value.
    hasbits |= (uint64_t{1} << data.hasbit_idx());
    PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_PASS);
  }
  // If it didn't match above either the tag is wrong, or the value is encoded
  // non-canonically.
  // Jump to MiniParse as wrong tag is the most probable reason.
  PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_PASS);
 }
 const char* TcParser::FastV8S2(PROTOBUF_TC_PARAM_DECL) {
  PROTOBUF_MUSTTAIL return SingularVarint<bool, uint16_t>(