Merge tag 'refs/tags/sync-piper' into sync-stage

# Conflicts: # src/google/protobuf/arena.cc # src/google/protobuf/compiler/java/enum_field_lite.cc
2 years ago · 38da0b3d42
parent bc4cbf9a05 e99b2819ee
commit 38da0b3d42
10 changed files with 167 additions and 63 deletions
--- a/cmake/install.cmake
+++ b/cmake/install.cmake
@ -47,7 +47,7 @@ include(${protobuf_SOURCE_DIR}/src/file_lists.cmake)
 set(protobuf_HEADERS
  ${libprotobuf_hdrs}
  ${libprotoc_hdrs}
-  ${wkt_protos_files}
+  ${wkt_protos_proto_srcs}
  ${descriptor_proto_proto_srcs}
  ${plugin_proto_proto_srcs}
 )
--- a/java/lite/src/test/java/com/google/protobuf/LiteTest.java
+++ b/java/lite/src/test/java/com/google/protobuf/LiteTest.java
@ -51,6 +51,7 @@ import com.google.protobuf.UnittestLite.TestAllTypesLite.RepeatedGroup;
 import com.google.protobuf.UnittestLite.TestAllTypesLiteOrBuilder;
 import com.google.protobuf.UnittestLite.TestHugeFieldNumbersLite;
 import com.google.protobuf.UnittestLite.TestNestedExtensionLite;
+import com.google.protobuf.testing.Proto3TestingLite.Proto3MessageLite;
 import map_lite_test.MapTestProto.TestMap;
 import map_lite_test.MapTestProto.TestMap.MessageValue;
 import protobuf_unittest.NestedExtensionLite;
@ -215,6 +216,22 @@ public class LiteTest {
    assertThat(((Byte) memo.get(message)).intValue()).isEqualTo(1);
  }

+  @Test
+  public void testProto3EnumListValueCopyOnWrite() {
+    Proto3MessageLite.Builder builder = Proto3MessageLite.newBuilder();
+
+    Proto3MessageLite message = builder.build();
+    builder.addFieldEnumList30Value(Proto3MessageLite.TestEnum.ONE_VALUE);
+    assertThat(message.getFieldEnumList30List()).isEmpty();
+    assertThat(builder.getFieldEnumList30List()).containsExactly(Proto3MessageLite.TestEnum.ONE);
+    assertThat(message.getFieldEnumList30List()).isEmpty();
+    Proto3MessageLite messageAfterBuild = builder.build();
+    builder.clearFieldEnumList30();
+    assertThat(builder.getFieldEnumList30List()).isEmpty();
+    assertThat(messageAfterBuild.getFieldEnumList30List())
+        .containsExactly(Proto3MessageLite.TestEnum.ONE);
+  }
+
  @Test
  public void testSanityCopyOnWrite() throws InvalidProtocolBufferException {
    // Since builders are implemented as a thin wrapper around a message
--- a/python/google/protobuf/text_format.py
+++ b/python/google/protobuf/text_format.py
@ -557,7 +557,7 @@ class _Printer(object):
        # For groups, use the capitalized name.
        out.write(field.message_type.name)
      else:
-          out.write(field.name)
+        out.write(field.name)

    if (self.force_colon or
        field.cpp_type != descriptor.FieldDescriptor.CPPTYPE_MESSAGE):
@ -923,8 +923,6 @@ class _Parser(object):
      # pylint: disable=protected-access
      field = message.Extensions._FindExtensionByName(name)
      # pylint: enable=protected-access
-
-
      if not field:
        if self.allow_unknown_extension:
          field = None
@ -1013,7 +1011,6 @@ class _Parser(object):
    if not tokenizer.TryConsume(','):
      tokenizer.TryConsume(';')

-
  def _LogSilentMarker(self, field_name):
    pass

--- a/src/google/protobuf/arena.cc
+++ b/src/google/protobuf/arena.cc
@ -56,14 +56,14 @@ namespace internal {
 namespace {

 PROTOBUF_ATTRIBUTE_NO_DESTROY PROTOBUF_CONSTINIT ArenaBlock
-kSentryArenaBlock = {};
+    kSentryArenaBlock = {};

 ArenaBlock* SentryArenaBlock() {
  // const_cast<> is okay as kSentryArenaBlock will never be mutated.
  return const_cast<ArenaBlock*>(&kSentryArenaBlock);
 }

-}
+}  // namespace

 static SerialArena::Memory AllocateMemory(const AllocationPolicy* policy_ptr,
                                          size_t last_size, size_t min_bytes) {
--- a/src/google/protobuf/compiler/cpp/message.cc
+++ b/src/google/protobuf/compiler/cpp/message.cc
@ -67,6 +67,7 @@
 #include "google/protobuf/compiler/cpp/padding_optimizer.h"
 #include "google/protobuf/compiler/cpp/parse_function_generator.h"
 #include "google/protobuf/descriptor.pb.h"
+#include "google/protobuf/generated_message_tctable_gen.h"


 // Must be included last.
@ -473,8 +474,7 @@ void ColdChunkSkipper::OnStartChunk(int chunk, int cached_has_word_index,
  }

  // Emit has_bit check for each has_bit_dword index.
-  format("if (PROTOBUF_PREDICT_FALSE(");
-  int first_word = HasbitWord(chunk, 0);
+  format("if (PROTOBUF_PREDICT_FALSE(false");
  while (chunk < limit_chunk_) {
    uint32_t mask = 0;
    int this_word = HasbitWord(chunk, 0);
@ -488,9 +488,7 @@ void ColdChunkSkipper::OnStartChunk(int chunk, int cached_has_word_index,
      }
    }

-    if (this_word != first_word) {
-      format(" ||\n    ");
-    }
+    format(" ||\n    ");
    format.Set("mask", absl::Hex(mask, absl::kZeroPad8));
    if (this_word == cached_has_word_index) {
      format("(cached_has_bits & 0x$mask$u) != 0");
@ -669,13 +667,46 @@ MessageGenerator::MessageGenerator(
  message_layout_helper_->OptimizeLayout(&optimized_order_, options_,
                                         scc_analyzer_);

-  // This message has hasbits iff one or more fields need one.
+  // Allocate has_bit_indices_ iff the message has a field that needs hasbits.
+  int has_bit_fields = 0;
  for (auto field : optimized_order_) {
    if (HasHasbit(field)) {
+      ++has_bit_fields;
      if (has_bit_indices_.empty()) {
        has_bit_indices_.resize(descriptor_->field_count(), kNoHasbit);
      }
-      has_bit_indices_[field->index()] = max_has_bit_index_++;
+    }
+  }
+  // For messages with more than 32 has bits, do a first pass which only
+  // allocates has bits to fields that might be in the fast table.
+  bool allocated_fast[32] = {};
+  if (has_bit_fields > 32) {
+    for (const auto* field : optimized_order_) {
+      if (HasHasbit(field)) {
+        if (!IsDescriptorEligibleForFastParsing(field)) continue;
+
+        // A fast-table with > 16 entries ends up incorporating
+        // the high-bit for VarInt encoding into the index for the table,
+        // so we have to incorporate it here also, when computing the
+        // index likely to be used for that table.
+        int fast_index = field->number();
+        if (fast_index >= 16) fast_index = 16 + (fast_index % 16);
+        GOOGLE_CHECK_GE(fast_index, 1);
+        GOOGLE_CHECK_LT(fast_index, 32);
+
+        if (allocated_fast[fast_index]) continue;
+        allocated_fast[fast_index] = true;
+
+        has_bit_indices_[field->index()] = max_has_bit_index_++;
+      }
+    }
+  }
+  // Then do a second pass which allocates all remaining has bits
+  for (const auto* field : optimized_order_) {
+    if (HasHasbit(field)) {
+      if (has_bit_indices_[field->index()] == kNoHasbit) {
+        has_bit_indices_[field->index()] = max_has_bit_index_++;
+      }
    }
    if (IsStringInlined(field, options_)) {
      if (inlined_string_indices_.empty()) {
@ -687,6 +718,7 @@ MessageGenerator::MessageGenerator(
      inlined_string_indices_[field->index()] = max_inlined_string_index_++;
    }
  }
+  GOOGLE_CHECK_EQ(max_has_bit_index_, has_bit_fields);

  if (!has_bit_indices_.empty()) {
    field_generators_.SetHasBitIndices(has_bit_indices_);
--- a/src/google/protobuf/compiler/java/enum_field_lite.cc
+++ b/src/google/protobuf/compiler/java/enum_field_lite.cc
@ -850,6 +850,7 @@ void RepeatedImmutableEnumFieldLiteGenerator::GenerateBuilderMembers(
    printer->Print(variables_,
                   "$deprecation$public Builder "
                   "${$add$capitalized_name$Value$}$(int value) {\n"
+                   "  copyOnWrite();\n"
                   "  instance.add$capitalized_name$Value(value);\n"
                   "  return this;\n"
                   "}\n");
--- a/src/google/protobuf/generated_message_tctable_gen.cc
+++ b/src/google/protobuf/generated_message_tctable_gen.cc
@ -179,30 +179,21 @@ bool IsFieldEligibleForFastParsing(
    const TailCallTableInfo::FieldEntryInfo& entry,
    const TailCallTableInfo::OptionProvider& option_provider) {
  const auto* field = entry.field;
+  // Conditions which exclude a field from being handled by fast parsing
+  // regardless of options should be considered in IsExcludedFromFastParsing.
+  if (!IsDescriptorEligibleForFastParsing(field)) return false;
+
  const auto options = option_provider.GetForField(field);
-  // Map, oneof, weak, and lazy fields are not handled on the fast path.
-  if (field->is_map() || field->real_containing_oneof() ||
-      field->options().weak() || options.is_implicitly_weak ||
-      options.is_lazy || options.should_split) {
-    return false;
-  }
+  // Weak, lazy, and split fields are not handled on the fast path.
+  if (options.is_implicitly_weak) return false;
+  if (options.is_lazy) return false;
+  if (options.should_split) return false;

  // We will check for a valid auxiliary index range later. However, we might
  // want to change the value we check for inlined string fields.
  int aux_idx = entry.aux_idx;

  switch (field->type()) {
-    case FieldDescriptor::TYPE_ENUM:
-      // If enum values are not validated at parse time, then this field can be
-      // handled on the fast path like an int32.
-      if (cpp::HasPreservingUnknownEnumSemantics(field)) {
-        break;
-      }
-      if (field->is_repeated() && field->is_packed()) {
-        return false;
-      }
-      break;
-
      // Some bytes fields can be handled on fast path.
    case FieldDescriptor::TYPE_STRING:
    case FieldDescriptor::TYPE_BYTES:
@ -235,13 +226,6 @@ bool IsFieldEligibleForFastParsing(
    return false;
  }

-  // The largest tag that can be read by the tailcall parser is two bytes
-  // when varint-coded. This allows 14 bits for the numeric tag value:
-  //   byte 0   byte 1
-  //   1nnnnttt 0nnnnnnn
-  //    ^^^^^^^  ^^^^^^^
-  if (field->number() >= 1 << 11) return false;
-
  return true;
 }

--- a/src/google/protobuf/generated_message_tctable_gen.h
+++ b/src/google/protobuf/generated_message_tctable_gen.h
@ -154,6 +154,49 @@ struct PROTOBUF_EXPORT TailCallTableInfo {
  bool use_generated_fallback;
 };

+// A quick check to see if a field can be placed into the fast-table.
+// This is meant to be fast but not exhaustive; options that apply to some
+// fields may also exclude them.
+inline bool IsDescriptorEligibleForFastParsing(const FieldDescriptor* field) {
+  // Map, oneof, weak, and lazy fields are not handled on the fast path.
+  if (field->is_map()) return false;
+  if (field->real_containing_oneof()) return false;
+  if (field->options().weak()) return false;
+
+  switch (field->type()) {
+    case FieldDescriptor::TYPE_ENUM:
+      // If enum values are not validated at parse time, then this field can be
+      // handled on the fast path like an int32.
+      if (cpp::HasPreservingUnknownEnumSemantics(field)) {
+        break;
+      }
+      if (field->is_repeated() && field->is_packed()) {
+        return false;
+      }
+      break;
+
+      // Some bytes fields can be handled on fast path.
+    case FieldDescriptor::TYPE_STRING:
+    case FieldDescriptor::TYPE_BYTES:
+      if (field->options().ctype() != FieldOptions::STRING) {
+        return false;
+      }
+      break;
+
+    default:
+      break;
+  }
+
+  // The largest tag that can be read by the tailcall parser is two bytes
+  // when varint-coded. This allows 14 bits for the numeric tag value:
+  //   byte 0   byte 1
+  //   1nnnnttt 0nnnnnnn
+  //    ^^^^^^^  ^^^^^^^
+  if (field->number() >= 1 << 11) return false;
+
+  return true;
+}
+
 }  // namespace internal
 }  // namespace protobuf
 }  // namespace google
--- a/src/google/protobuf/generated_message_tctable_impl.h
+++ b/src/google/protobuf/generated_message_tctable_impl.h
@ -345,10 +345,15 @@ class PROTOBUF_EXPORT TcParser final {
  // Manually unrolled and specialized Varint parsing.
  template <typename FieldType, int data_offset, int hasbit_idx>
  static const char* SpecializedUnrolledVImpl1(PROTOBUF_TC_PARAM_DECL);
+  template <int data_offset, int hasbit_idx>
+  static const char* SpecializedFastV8S1(PROTOBUF_TC_PARAM_DECL);

  template <typename FieldType, int data_offset, int hasbit_idx>
  static constexpr TailCallParseFunc SingularVarintNoZag1() {
    if (data_offset < 100) {
+      if (sizeof(FieldType) == 1) {
+        return &SpecializedFastV8S1<data_offset, hasbit_idx>;
+      }
      return &SpecializedUnrolledVImpl1<FieldType, data_offset, hasbit_idx>;
    } else if (sizeof(FieldType) == 1) {
      return &FastV8S1;
@ -625,6 +630,55 @@ class PROTOBUF_EXPORT TcParser final {
  static const char* MpMap(PROTOBUF_TC_PARAM_DECL);
 };

+// Notes:
+// 1) if data_offset is negative, it's read from data.offset()
+// 2) if hasbit_idx is negative, it's read from data.hasbit_idx()
+template <int data_offset, int hasbit_idx>
+const char* TcParser::SpecializedFastV8S1(PROTOBUF_TC_PARAM_DECL) {
+  using TagType = uint8_t;
+
+  // Special case for a varint bool field with a tag of 1 byte:
+  // The coded_tag() field will actually contain the value too and we can check
+  // both at the same time.
+  auto coded_tag = data.coded_tag<uint16_t>();
+  if (PROTOBUF_PREDICT_TRUE(coded_tag == 0x0000 || coded_tag == 0x0100)) {
+    auto& field =
+        RefAt<bool>(msg, data_offset >= 0 ? data_offset : data.offset());
+    // Note: we use `data.data` because Clang generates suboptimal code when
+    // using coded_tag.
+    // In x86_64 this uses the CH register to read the second byte out of
+    // `data`.
+    uint8_t value = data.data >> 8;
+    // The assume allows using a mov instead of test+setne.
+    PROTOBUF_ASSUME(value <= 1);
+    field = static_cast<bool>(value);
+
+    ptr += sizeof(TagType) + 1;  // Consume the tag and the value.
+    if (hasbit_idx < 0) {
+      hasbits |= (uint64_t{1} << data.hasbit_idx());
+    } else {
+      if (hasbit_idx < 32) {
+        hasbits |= (uint64_t{1} << hasbit_idx);
+      } else {
+        static_assert(hasbit_idx == 63 || (hasbit_idx < 32),
+                      "hard-coded hasbit_idx should be 0-31, or the special"
+                      "value 63, which indicates the field has no has-bit.");
+        // TODO(jorg): investigate whether higher hasbit indices are worth
+        // supporting. Something like:
+        // auto& hasblock = TcParser::RefAt<uint32_t>(msg, hasbit_idx / 32 * 4);
+        // hasblock |= uint32_t{1} << (hasbit_idx % 32);
+      }
+    }
+
+    PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_PASS);
+  }
+
+  // If it didn't match above either the tag is wrong, or the value is encoded
+  // non-canonically.
+  // Jump to MiniParse as wrong tag is the most probable reason.
+  PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_PASS);
+}
+
 template <typename FieldType, int data_offset, int hasbit_idx>
 const char* TcParser::SpecializedUnrolledVImpl1(PROTOBUF_TC_PARAM_DECL) {
  using TagType = uint8_t;
--- a/src/google/protobuf/generated_message_tctable_lite.cc
+++ b/src/google/protobuf/generated_message_tctable_lite.cc
@ -850,31 +850,7 @@ PROTOBUF_NOINLINE const char* TcParser::SingularVarBigint(
 }

 const char* TcParser::FastV8S1(PROTOBUF_TC_PARAM_DECL) {
-  // Special case for a varint bool field with a tag of 1 byte:
-  // The coded_tag() field will actually contain the value too and we can check
-  // both at the same time.
-  auto coded_tag = data.coded_tag<uint16_t>();
-  if (PROTOBUF_PREDICT_TRUE(coded_tag == 0x0000 || coded_tag == 0x0100)) {
-    auto& field = RefAt<bool>(msg, data.offset());
-    // Note: we use `data.data` because Clang generates suboptimal code when
-    // using coded_tag.
-    // In x86_64 this uses the CH register to read the second byte out of
-    // `data`.
-    uint8_t value = data.data >> 8;
-    // The assume allows using a mov instead of test+setne.
-    PROTOBUF_ASSUME(value <= 1);
-    field = static_cast<bool>(value);
-
-    ptr += 2;  // Consume the tag and the value.
-    hasbits |= (uint64_t{1} << data.hasbit_idx());
-
-    PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_PASS);
-  }
-
-  // If it didn't match above either the tag is wrong, or the value is encoded
-  // non-canonically.
-  // Jump to MiniParse as wrong tag is the most probable reason.
-  PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_PASS);
+  return SpecializedFastV8S1<-1, -1>(PROTOBUF_TC_PARAM_PASS);
 }
 const char* TcParser::FastV8S2(PROTOBUF_TC_PARAM_DECL) {
  PROTOBUF_MUSTTAIL return SingularVarint<bool, uint16_t>(