Merge pull request #10388 from mkruskal-google/sync-stage

Integrate from Piper for C++, Java, and Python
pull/10389/head
Mike Kruskal 3 years ago committed by GitHub
commit 13b3647016
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 1
      CHANGES.txt
  2. 10
      java/core/src/main/java/com/google/protobuf/CheckReturnValue.java
  3. 9
      java/core/src/main/java/com/google/protobuf/ExtensionRegistryLite.java
  4. 3
      python/google/protobuf/internal/text_format_test.py
  5. 3
      python/google/protobuf/proto_api.h
  6. 17
      python/google/protobuf/pyext/message.cc
  7. 188
      python/google/protobuf/text_format.py
  8. 2
      src/Makefile.am
  9. 2
      src/file_lists.cmake
  10. 2
      src/google/protobuf/BUILD.bazel
  11. 2
      src/google/protobuf/arena.cc
  12. 27
      src/google/protobuf/arenaz_sampler_test.cc
  13. 8
      src/google/protobuf/compiler/cpp/enum_field.cc
  14. 2
      src/google/protobuf/compiler/cpp/field.cc
  15. 1
      src/google/protobuf/compiler/cpp/file.cc
  16. 35
      src/google/protobuf/compiler/cpp/helpers.cc
  17. 68
      src/google/protobuf/compiler/cpp/helpers.h
  18. 8
      src/google/protobuf/compiler/cpp/message.cc
  19. 1
      src/google/protobuf/compiler/cpp/message.h
  20. 6
      src/google/protobuf/compiler/cpp/message_field.cc
  21. 911
      src/google/protobuf/compiler/cpp/parse_function_generator.cc
  22. 49
      src/google/protobuf/compiler/cpp/parse_function_generator.h
  23. 6
      src/google/protobuf/compiler/cpp/string_field.cc
  24. 124
      src/google/protobuf/compiler/python/generator.cc
  25. 3
      src/google/protobuf/compiler/python/generator.h
  26. 66
      src/google/protobuf/compiler/python/pyi_generator.cc
  27. 4
      src/google/protobuf/compiler/python/pyi_generator.h
  28. 42
      src/google/protobuf/descriptor.cc
  29. 70
      src/google/protobuf/descriptor.h
  30. 18
      src/google/protobuf/dynamic_message.cc
  31. 4
      src/google/protobuf/extension_set.h
  32. 359
      src/google/protobuf/generated_message_reflection.cc
  33. 21
      src/google/protobuf/generated_message_tctable_decl.h
  34. 36
      src/google/protobuf/generated_message_tctable_full.cc
  35. 779
      src/google/protobuf/generated_message_tctable_gen.cc
  36. 162
      src/google/protobuf/generated_message_tctable_gen.h
  37. 2
      src/google/protobuf/generated_message_tctable_impl.h
  38. 1
      src/google/protobuf/inlined_string_field_unittest.cc
  39. 9
      src/google/protobuf/message.cc
  40. 28
      src/google/protobuf/message.h
  41. 4
      src/google/protobuf/port.h
  42. 9
      src/google/protobuf/port_def.inc
  43. 1
      src/google/protobuf/port_undef.inc
  44. 1
      src/google/protobuf/proto3_arena_lite_unittest.cc
  45. 1
      src/google/protobuf/proto3_arena_unittest.cc
  46. 1
      src/google/protobuf/proto3_lite_unittest.inc
  47. 1
      src/google/protobuf/reflection_ops_unittest.cc
  48. 11
      src/google/protobuf/repeated_field.h
  49. 1
      src/google/protobuf/repeated_field_unittest.cc
  50. 2
      src/google/protobuf/repeated_ptr_field.h
  51. 1
      src/google/protobuf/text_format.cc
  52. 1
      src/google/protobuf/wire_format.h

@ -10,6 +10,7 @@
* Added a default implementation of MessageDifferencer::Reporter methods.
* proto2::MapPair is now an alias to std::pair.
* Hide C++ RepeatedField::UnsafeArenaSwap
* Use table-driven parser for reflection based objects.
Kotlin
* Suppress deprecation warnings in Kotlin generated code.

@ -41,13 +41,13 @@ import java.lang.annotation.Retention;
import java.lang.annotation.Target;
/**
* Indicates that the return value of the annotated method must be checked. An error is triggered
* when one of these methods is called but the result is not used.
* Indicates that the return value of the annotated method must be used. An error is triggered when
* one of these methods is called but the result is not used.
*
* <p>{@code @CheckReturnValue} may be applied to a class or package to indicate that all methods in
* that class or package must have their return values checked. For convenience, we provide an
* annotation, {@link CanIgnoreReturnValue}, to exempt specific methods or classes from this
* behavior.
* that class (including indirectly; that is, methods of inner classes within the annotated class)
* or package must have their return values used. For convenience, we provide an annotation, {@link
* CanIgnoreReturnValue}, to exempt specific methods or classes from this behavior.
*/
@Documented
@Target({METHOD, CONSTRUCTOR, TYPE, PACKAGE})

@ -123,16 +123,15 @@ public class ExtensionRegistryLite {
* ExtensionRegistry} (if the full (non-Lite) proto libraries are available).
*/
public static ExtensionRegistryLite getEmptyRegistry() {
if (!doFullRuntimeInheritanceCheck) {
return EMPTY_REGISTRY_LITE;
}
ExtensionRegistryLite result = emptyRegistry;
if (result == null) {
synchronized (ExtensionRegistryLite.class) {
result = emptyRegistry;
if (result == null) {
result =
emptyRegistry =
doFullRuntimeInheritanceCheck
? ExtensionRegistryFactory.createEmpty()
: EMPTY_REGISTRY_LITE;
result = emptyRegistry = ExtensionRegistryFactory.createEmpty();
}
}
}

@ -38,6 +38,7 @@ import string
import textwrap
import unittest
import unittest.mock
from google.protobuf import any_pb2
from google.protobuf import struct_pb2
@ -2484,5 +2485,3 @@ class OptionalColonMessageToStringTest(unittest.TestCase):
self.assertEqual('repeated_int32: [1]\n', output)
if __name__ == '__main__':
unittest.main()

@ -133,8 +133,7 @@ struct PyProto_API {
};
inline const char* PyProtoAPICapsuleName() {
static const char kCapsuleName[] =
"google.protobuf.pyext._message.proto_API";
static const char kCapsuleName[] = "google.protobuf.pyext._message.proto_API";
return kCapsuleName;
}

@ -88,6 +88,9 @@
: 0) \
: PyBytes_AsStringAndSize(ob, (charpp), (sizep)))
#define PROTOBUF_PYTHON_PUBLIC "google.protobuf"
#define PROTOBUF_PYTHON_INTERNAL "google.protobuf.internal"
namespace google {
namespace protobuf {
namespace python {
@ -246,8 +249,8 @@ static PyObject* New(PyTypeObject* type, PyObject* args, PyObject* kwargs) {
ScopedPyObjectPtr new_args;
if (WKT_classes == nullptr) {
ScopedPyObjectPtr well_known_types(PyImport_ImportModule(
"google.protobuf.internal.well_known_types"));
ScopedPyObjectPtr well_known_types(
PyImport_ImportModule(PROTOBUF_PYTHON_INTERNAL ".well_known_types"));
GOOGLE_DCHECK(well_known_types != nullptr);
WKT_classes = PyObject_GetAttrString(well_known_types.get(), "WKTBASES");
@ -2372,7 +2375,7 @@ PyObject* DeepCopy(CMessage* self, PyObject* arg) {
PyObject* ToUnicode(CMessage* self) {
// Lazy import to prevent circular dependencies
ScopedPyObjectPtr text_format(
PyImport_ImportModule("google.protobuf.text_format"));
PyImport_ImportModule(PROTOBUF_PYTHON_PUBLIC ".text_format"));
if (text_format == nullptr) {
return nullptr;
}
@ -3034,8 +3037,8 @@ bool InitProto2MessageModule(PyObject *m) {
PyModule_AddObject(m, "MethodDescriptor",
reinterpret_cast<PyObject*>(&PyMethodDescriptor_Type));
PyObject* enum_type_wrapper = PyImport_ImportModule(
"google.protobuf.internal.enum_type_wrapper");
PyObject* enum_type_wrapper =
PyImport_ImportModule(PROTOBUF_PYTHON_INTERNAL ".enum_type_wrapper");
if (enum_type_wrapper == nullptr) {
return false;
}
@ -3043,8 +3046,8 @@ bool InitProto2MessageModule(PyObject *m) {
PyObject_GetAttrString(enum_type_wrapper, "EnumTypeWrapper");
Py_DECREF(enum_type_wrapper);
PyObject* message_module = PyImport_ImportModule(
"google.protobuf.message");
PyObject* message_module =
PyImport_ImportModule(PROTOBUF_PYTHON_PUBLIC ".message");
if (message_module == nullptr) {
return false;
}

@ -67,6 +67,7 @@ _FLOAT_INFINITY = re.compile('-?inf(?:inity)?f?$', re.IGNORECASE)
_FLOAT_NAN = re.compile('nanf?$', re.IGNORECASE)
_QUOTES = frozenset(("'", '"'))
_ANY_FULL_TYPE_NAME = 'google.protobuf.Any'
_DEBUG_STRING_SILENT_MARKER = '\t '
class Error(Exception):
@ -880,6 +881,7 @@ class _Parser(object):
type_url_prefix, packed_type_name = self._ConsumeAnyTypeUrl(tokenizer)
tokenizer.Consume(']')
tokenizer.TryConsume(':')
self._DetectSilentMarker(tokenizer)
if tokenizer.TryConsume('<'):
expanded_any_end_token = '>'
else:
@ -979,9 +981,11 @@ class _Parser(object):
if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:
tokenizer.TryConsume(':')
self._DetectSilentMarker(tokenizer)
merger = self._MergeMessageField
else:
tokenizer.Consume(':')
self._DetectSilentMarker(tokenizer)
merger = self._MergeScalarField
if (field.label == descriptor.FieldDescriptor.LABEL_REPEATED and
@ -999,7 +1003,7 @@ class _Parser(object):
else: # Proto field is unknown.
assert (self.allow_unknown_extension or self.allow_unknown_field)
_SkipFieldContents(tokenizer)
self._SkipFieldContents(tokenizer)
# For historical reasons, fields may optionally be separated by commas or
# semicolons.
@ -1007,6 +1011,13 @@ class _Parser(object):
tokenizer.TryConsume(';')
def _LogSilentMarker(self):
pass
def _DetectSilentMarker(self, tokenizer):
if tokenizer.contains_silent_marker_before_current_token:
self._LogSilentMarker()
def _ConsumeAnyTypeUrl(self, tokenizer):
"""Consumes a google.protobuf.Any type URL and returns the type name."""
# Consume "type.googleapis.com/".
@ -1161,112 +1172,108 @@ class _Parser(object):
else:
setattr(message, field.name, value)
def _SkipFieldContents(self, tokenizer):
"""Skips over contents (value or message) of a field.
def _SkipFieldContents(tokenizer):
"""Skips over contents (value or message) of a field.
Args:
tokenizer: A tokenizer to parse the field name and values.
"""
# Try to guess the type of this field.
# If this field is not a message, there should be a ":" between the
# field name and the field value and also the field value should not
# start with "{" or "<" which indicates the beginning of a message body.
# If there is no ":" or there is a "{" or "<" after ":", this field has
# to be a message or the input is ill-formed.
if tokenizer.TryConsume(
':') and not tokenizer.LookingAt('{') and not tokenizer.LookingAt('<'):
if tokenizer.LookingAt('['):
_SkipRepeatedFieldValue(tokenizer)
Args:
tokenizer: A tokenizer to parse the field name and values.
"""
# Try to guess the type of this field.
# If this field is not a message, there should be a ":" between the
# field name and the field value and also the field value should not
# start with "{" or "<" which indicates the beginning of a message body.
# If there is no ":" or there is a "{" or "<" after ":", this field has
# to be a message or the input is ill-formed.
if tokenizer.TryConsume(
':') and not tokenizer.LookingAt('{') and not tokenizer.LookingAt('<'):
self._DetectSilentMarker(tokenizer)
if tokenizer.LookingAt('['):
self._SkipRepeatedFieldValue(tokenizer)
else:
self._SkipFieldValue(tokenizer)
else:
_SkipFieldValue(tokenizer)
else:
_SkipFieldMessage(tokenizer)
self._DetectSilentMarker(tokenizer)
self._SkipFieldMessage(tokenizer)
def _SkipField(tokenizer):
"""Skips over a complete field (name and value/message).
def _SkipField(self, tokenizer):
"""Skips over a complete field (name and value/message).
Args:
tokenizer: A tokenizer to parse the field name and values.
"""
if tokenizer.TryConsume('['):
# Consume extension or google.protobuf.Any type URL
tokenizer.ConsumeIdentifier()
num_identifiers = 1
while tokenizer.TryConsume('.'):
tokenizer.ConsumeIdentifier()
num_identifiers += 1
# This is possibly a type URL for an Any message.
if num_identifiers == 3 and tokenizer.TryConsume('/'):
Args:
tokenizer: A tokenizer to parse the field name and values.
"""
if tokenizer.TryConsume('['):
# Consume extension or google.protobuf.Any type URL
tokenizer.ConsumeIdentifier()
num_identifiers = 1
while tokenizer.TryConsume('.'):
tokenizer.ConsumeIdentifier()
tokenizer.Consume(']')
else:
tokenizer.ConsumeIdentifierOrNumber()
_SkipFieldContents(tokenizer)
# For historical reasons, fields may optionally be separated by commas or
# semicolons.
if not tokenizer.TryConsume(','):
tokenizer.TryConsume(';')
def _SkipFieldMessage(tokenizer):
"""Skips over a field message.
num_identifiers += 1
# This is possibly a type URL for an Any message.
if num_identifiers == 3 and tokenizer.TryConsume('/'):
tokenizer.ConsumeIdentifier()
while tokenizer.TryConsume('.'):
tokenizer.ConsumeIdentifier()
tokenizer.Consume(']')
else:
tokenizer.ConsumeIdentifierOrNumber()
Args:
tokenizer: A tokenizer to parse the field name and values.
"""
self._SkipFieldContents(tokenizer)
if tokenizer.TryConsume('<'):
delimiter = '>'
else:
tokenizer.Consume('{')
delimiter = '}'
# For historical reasons, fields may optionally be separated by commas or
# semicolons.
if not tokenizer.TryConsume(','):
tokenizer.TryConsume(';')
while not tokenizer.LookingAt('>') and not tokenizer.LookingAt('}'):
_SkipField(tokenizer)
def _SkipFieldMessage(self, tokenizer):
"""Skips over a field message.
tokenizer.Consume(delimiter)
Args:
tokenizer: A tokenizer to parse the field name and values.
"""
if tokenizer.TryConsume('<'):
delimiter = '>'
else:
tokenizer.Consume('{')
delimiter = '}'
while not tokenizer.LookingAt('>') and not tokenizer.LookingAt('}'):
self._SkipField(tokenizer)
def _SkipFieldValue(tokenizer):
"""Skips over a field value.
tokenizer.Consume(delimiter)
Args:
tokenizer: A tokenizer to parse the field name and values.
def _SkipFieldValue(self, tokenizer):
"""Skips over a field value.
Raises:
ParseError: In case an invalid field value is found.
"""
# String/bytes tokens can come in multiple adjacent string literals.
# If we can consume one, consume as many as we can.
if tokenizer.TryConsumeByteString():
while tokenizer.TryConsumeByteString():
pass
return
Args:
tokenizer: A tokenizer to parse the field name and values.
if (not tokenizer.TryConsumeIdentifier() and
not _TryConsumeInt64(tokenizer) and not _TryConsumeUint64(tokenizer) and
not tokenizer.TryConsumeFloat()):
raise ParseError('Invalid field value: ' + tokenizer.token)
Raises:
ParseError: In case an invalid field value is found.
"""
# String/bytes tokens can come in multiple adjacent string literals.
# If we can consume one, consume as many as we can.
if tokenizer.TryConsumeByteString():
while tokenizer.TryConsumeByteString():
pass
return
if (not tokenizer.TryConsumeIdentifier() and
not _TryConsumeInt64(tokenizer) and not _TryConsumeUint64(tokenizer) and
not tokenizer.TryConsumeFloat()):
raise ParseError('Invalid field value: ' + tokenizer.token)
def _SkipRepeatedFieldValue(tokenizer):
"""Skips over a repeated field value.
def _SkipRepeatedFieldValue(self, tokenizer):
"""Skips over a repeated field value.
Args:
tokenizer: A tokenizer to parse the field value.
"""
tokenizer.Consume('[')
if not tokenizer.LookingAt(']'):
_SkipFieldValue(tokenizer)
while tokenizer.TryConsume(','):
_SkipFieldValue(tokenizer)
tokenizer.Consume(']')
Args:
tokenizer: A tokenizer to parse the field value.
"""
tokenizer.Consume('[')
if not tokenizer.LookingAt(']'):
self._SkipFieldValue(tokenizer)
while tokenizer.TryConsume(','):
self._SkipFieldValue(tokenizer)
tokenizer.Consume(']')
class Tokenizer(object):
@ -1307,6 +1314,8 @@ class Tokenizer(object):
self._skip_comments = skip_comments
self._whitespace_pattern = (skip_comments and self._WHITESPACE_OR_COMMENT
or self._WHITESPACE)
self.contains_silent_marker_before_current_token = False
self._SkipWhitespace()
self.NextToken()
@ -1339,6 +1348,8 @@ class Tokenizer(object):
match = self._whitespace_pattern.match(self._current_line, self._column)
if not match:
break
self.contains_silent_marker_before_current_token = match.group(0) == (
' ' + _DEBUG_STRING_SILENT_MARKER)
length = len(match.group(0))
self._column += length
@ -1591,6 +1602,7 @@ class Tokenizer(object):
"""Reads the next meaningful token."""
self._previous_line = self._line
self._previous_column = self._column
self.contains_silent_marker_before_current_token = False
self._column += len(self.token)
self._SkipWhitespace()

@ -115,6 +115,7 @@ nobase_include_HEADERS = \
google/protobuf/generated_message_bases.h \
google/protobuf/generated_message_reflection.h \
google/protobuf/generated_message_tctable_decl.h \
google/protobuf/generated_message_tctable_gen.h \
google/protobuf/generated_message_tctable_impl.h \
google/protobuf/generated_message_util.h \
google/protobuf/has_bits.h \
@ -258,6 +259,7 @@ libprotobuf_la_SOURCES = \
google/protobuf/field_mask.pb.cc \
google/protobuf/generated_message_bases.cc \
google/protobuf/generated_message_reflection.cc \
google/protobuf/generated_message_tctable_gen.cc \
google/protobuf/generated_message_tctable_full.cc \
google/protobuf/io/gzip_stream.cc \
google/protobuf/io/printer.cc \

@ -33,6 +33,7 @@ set(libprotobuf_srcs
${protobuf_SOURCE_DIR}/src/google/protobuf/generated_message_bases.cc
${protobuf_SOURCE_DIR}/src/google/protobuf/generated_message_reflection.cc
${protobuf_SOURCE_DIR}/src/google/protobuf/generated_message_tctable_full.cc
${protobuf_SOURCE_DIR}/src/google/protobuf/generated_message_tctable_gen.cc
${protobuf_SOURCE_DIR}/src/google/protobuf/generated_message_tctable_lite.cc
${protobuf_SOURCE_DIR}/src/google/protobuf/generated_message_util.cc
${protobuf_SOURCE_DIR}/src/google/protobuf/implicit_weak_message.cc
@ -126,6 +127,7 @@ set(libprotobuf_hdrs
${protobuf_SOURCE_DIR}/src/google/protobuf/generated_message_bases.h
${protobuf_SOURCE_DIR}/src/google/protobuf/generated_message_reflection.h
${protobuf_SOURCE_DIR}/src/google/protobuf/generated_message_tctable_decl.h
${protobuf_SOURCE_DIR}/src/google/protobuf/generated_message_tctable_gen.h
${protobuf_SOURCE_DIR}/src/google/protobuf/generated_message_tctable_impl.h
${protobuf_SOURCE_DIR}/src/google/protobuf/generated_message_util.h
${protobuf_SOURCE_DIR}/src/google/protobuf/has_bits.h

@ -216,6 +216,7 @@ cc_library(
"field_mask.pb.cc",
"generated_message_bases.cc",
"generated_message_reflection.cc",
"generated_message_tctable_gen.cc",
"generated_message_tctable_full.cc",
"map_field.cc",
"message.cc",
@ -244,6 +245,7 @@ cc_library(
"generated_enum_reflection.h",
"generated_message_bases.h",
"generated_message_reflection.h",
"generated_message_tctable_gen.h",
"map_entry.h",
"map_field.h",
"map_field_inl.h",

@ -117,7 +117,7 @@ SerialArena* SerialArena::New(Memory mem, void* owner,
ThreadSafeArenaStats* stats) {
GOOGLE_DCHECK_LE(kBlockHeaderSize + ThreadSafeArena::kSerialArenaSize, mem.size);
ThreadSafeArenaStats::RecordAllocateStats(
stats, /*requested=*/mem.size, /*allocated=*/mem.size, /*wasted=*/0);
stats, /*used=*/0, /*allocated=*/mem.size, /*wasted=*/0);
auto b = new (mem.ptr) Block{nullptr, mem.size};
return new (b->Pointer(kBlockHeaderSize)) SerialArena(b, owner, stats);
}

@ -411,6 +411,33 @@ TEST(ThreadSafeArenazSamplerTest, Callback) {
sampler.Unregister(info2);
}
TEST(ThreadSafeArenazSamplerTest, InitialBlockReportsZeroUsedAndWasted) {
SetThreadSafeArenazEnabled(true);
// Setting 1 as the parameter value means one in every two arenas would be
// sampled, on average.
int32_t oldparam = ThreadSafeArenazSampleParameter();
SetThreadSafeArenazSampleParameter(1);
SetThreadSafeArenazGlobalNextSample(0);
constexpr int kSize = 571;
int count_found_allocation = 0;
auto& sampler = GlobalThreadSafeArenazSampler();
for (int i = 0; i < 10; ++i) {
char block[kSize];
google::protobuf::Arena arena(/*initial_block=*/block, /*initial_block_size=*/kSize);
sampler.Iterate([&](const ThreadSafeArenaStats& h) {
const auto& histbin =
h.block_histogram[ThreadSafeArenaStats::FindBin(kSize)];
if (histbin.bytes_allocated.load(std::memory_order_relaxed) == kSize) {
count_found_allocation++;
EXPECT_EQ(histbin.bytes_used, 0);
EXPECT_EQ(histbin.bytes_wasted, 0);
}
});
}
EXPECT_GT(count_found_allocation, 0);
SetThreadSafeArenazSampleParameter(oldparam);
}
class ThreadSafeArenazSamplerTestThread : public Thread {
protected:
void Run() override {

@ -104,7 +104,7 @@ void EnumFieldGenerator::GenerateInlineAccessorDefinitions(
" return _internal_$name$();\n"
"}\n"
"inline void $classname$::_internal_set_$name$($type$ value) {\n");
if (!HasPreservingUnknownEnumSemantics(descriptor_)) {
if (!internal::cpp::HasPreservingUnknownEnumSemantics(descriptor_)) {
format(" assert($type$_IsValid(value));\n");
}
format(
@ -204,7 +204,7 @@ void EnumOneofFieldGenerator::GenerateInlineAccessorDefinitions(
" return _internal_$name$();\n"
"}\n"
"inline void $classname$::_internal_set_$name$($type$ value) {\n");
if (!HasPreservingUnknownEnumSemantics(descriptor_)) {
if (!internal::cpp::HasPreservingUnknownEnumSemantics(descriptor_)) {
format(" assert($type$_IsValid(value));\n");
}
format(
@ -291,7 +291,7 @@ void RepeatedEnumFieldGenerator::GenerateInlineAccessorDefinitions(
" return _internal_$name$(index);\n"
"}\n"
"inline void $classname$::set_$name$(int index, $type$ value) {\n");
if (!HasPreservingUnknownEnumSemantics(descriptor_)) {
if (!internal::cpp::HasPreservingUnknownEnumSemantics(descriptor_)) {
format(" assert($type$_IsValid(value));\n");
}
format(
@ -300,7 +300,7 @@ void RepeatedEnumFieldGenerator::GenerateInlineAccessorDefinitions(
" // @@protoc_insertion_point(field_set:$full_name$)\n"
"}\n"
"inline void $classname$::_internal_add_$name$($type$ value) {\n");
if (!HasPreservingUnknownEnumSemantics(descriptor_)) {
if (!internal::cpp::HasPreservingUnknownEnumSemantics(descriptor_)) {
format(" assert($type$_IsValid(value));\n");
}
format(

@ -267,7 +267,7 @@ void SetCommonFieldVariables(const FieldDescriptor* descriptor,
}
void FieldGenerator::SetHasBitIndex(int32_t has_bit_index) {
if (!HasHasbit(descriptor_)) {
if (!internal::cpp::HasHasbit(descriptor_)) {
GOOGLE_CHECK_EQ(has_bit_index, -1);
return;
}

@ -51,6 +51,7 @@
#include <google/protobuf/compiler/cpp/helpers.h>
#include <google/protobuf/compiler/cpp/message.h>
#include <google/protobuf/compiler/cpp/service.h>
#include <google/protobuf/descriptor.h>
#include <google/protobuf/descriptor.pb.h>
// Must be last.

@ -1133,39 +1133,16 @@ bool IsWellKnownMessage(const FileDescriptor* file) {
return well_known_files.find(file->name()) != well_known_files.end();
}
static bool FieldEnforceUtf8(const FieldDescriptor* field,
const Options& options) {
return true;
}
static bool FileUtf8Verification(const FileDescriptor* file,
const Options& options) {
return true;
}
// Which level of UTF-8 enforcemant is placed on this file.
Utf8CheckMode GetUtf8CheckMode(const FieldDescriptor* field,
const Options& options) {
if (field->file()->syntax() == FileDescriptor::SYNTAX_PROTO3 &&
FieldEnforceUtf8(field, options)) {
return Utf8CheckMode::kStrict;
} else if (GetOptimizeFor(field->file(), options) !=
FileOptions::LITE_RUNTIME &&
FileUtf8Verification(field->file(), options)) {
return Utf8CheckMode::kVerify;
} else {
return Utf8CheckMode::kNone;
}
}
static void GenerateUtf8CheckCode(const FieldDescriptor* field,
const Options& options, bool for_parse,
const char* parameters,
const char* strict_function,
const char* verify_function,
const Formatter& format) {
switch (GetUtf8CheckMode(field, options)) {
case Utf8CheckMode::kStrict: {
switch (internal::cpp::GetUtf8CheckMode(
field,
GetOptimizeFor(field->file(), options) == FileOptions::LITE_RUNTIME)) {
case internal::cpp::Utf8CheckMode::kStrict: {
if (for_parse) {
format("DO_(");
}
@ -1185,7 +1162,7 @@ static void GenerateUtf8CheckCode(const FieldDescriptor* field,
format.Outdent();
break;
}
case Utf8CheckMode::kVerify: {
case internal::cpp::Utf8CheckMode::kVerify: {
format("::$proto_ns$::internal::WireFormat::$1$(\n", verify_function);
format.Indent();
format(parameters);
@ -1198,7 +1175,7 @@ static void GenerateUtf8CheckCode(const FieldDescriptor* field,
format.Outdent();
break;
}
case Utf8CheckMode::kNone:
case internal::cpp::Utf8CheckMode::kNone:
break;
}
}

@ -462,28 +462,6 @@ inline bool IsProto3(const FileDescriptor* file) {
return file->syntax() == FileDescriptor::SYNTAX_PROTO3;
}
inline bool HasHasbit(const FieldDescriptor* field) {
// This predicate includes proto3 message fields only if they have "optional".
// Foo submsg1 = 1; // HasHasbit() == false
// optional Foo submsg2 = 2; // HasHasbit() == true
// This is slightly odd, as adding "optional" to a singular proto3 field does
// not change the semantics or API. However whenever any field in a message
// has a hasbit, it forces reflection to include hasbit offsets for *all*
// fields, even if almost all of them are set to -1 (no hasbit). So to avoid
// causing a sudden size regression for ~all proto3 messages, we give proto3
// message fields a hasbit only if "optional" is present. If the user is
// explicitly writing "optional", it is likely they are writing it on
// primitive fields also.
return (field->has_optional_keyword() || field->is_required()) &&
!field->options().weak();
}
// Returns true if 'enum' semantics are such that unknown values are preserved
// in the enum field itself, rather than going to the UnknownFieldSet.
inline bool HasPreservingUnknownEnumSemantics(const FieldDescriptor* field) {
return field->file()->syntax() == FileDescriptor::SYNTAX_PROTO3;
}
inline bool IsCrossFileMessage(const FieldDescriptor* field) {
return field->type() == FieldDescriptor::TYPE_MESSAGE &&
field->message_type()->file() != field->file();
@ -935,15 +913,6 @@ class PROTOC_EXPORT NamespaceOpener {
std::vector<std::string> name_stack_;
};
enum class Utf8CheckMode {
kStrict = 0, // Parsing will fail if non UTF-8 data is in string fields.
kVerify = 1, // Only log an error but parsing will succeed.
kNone = 2, // No UTF-8 check.
};
Utf8CheckMode GetUtf8CheckMode(const FieldDescriptor* field,
const Options& options);
void GenerateUtf8CheckCodeForString(const FieldDescriptor* field,
const Options& options, bool for_parse,
const char* parameters,
@ -954,43 +923,6 @@ void GenerateUtf8CheckCodeForCord(const FieldDescriptor* field,
const char* parameters,
const Formatter& format);
template <typename T>
struct FieldRangeImpl {
struct Iterator {
using iterator_category = std::forward_iterator_tag;
using value_type = const FieldDescriptor*;
using difference_type = int;
value_type operator*() { return descriptor->field(idx); }
friend bool operator==(const Iterator& a, const Iterator& b) {
GOOGLE_DCHECK(a.descriptor == b.descriptor);
return a.idx == b.idx;
}
friend bool operator!=(const Iterator& a, const Iterator& b) {
return !(a == b);
}
Iterator& operator++() {
idx++;
return *this;
}
int idx;
const T* descriptor;
};
Iterator begin() const { return {0, descriptor}; }
Iterator end() const { return {descriptor->field_count(), descriptor}; }
const T* descriptor;
};
template <typename T>
FieldRangeImpl<T> FieldRange(const T* desc) {
return {desc};
}
struct OneOfRangeImpl {
struct Iterator {
using iterator_category = std::forward_iterator_tag;

@ -37,8 +37,10 @@
#include <algorithm>
#include <cstdint>
#include <functional>
#include <limits>
#include <map>
#include <memory>
#include <type_traits>
#include <unordered_map>
#include <utility>
#include <vector>
@ -73,6 +75,8 @@ namespace cpp {
using internal::WireFormat;
using internal::WireFormatLite;
using internal::cpp::HasHasbit;
using internal::cpp::Utf8CheckMode;
namespace {
@ -1315,7 +1319,9 @@ void MessageGenerator::GenerateClassDefinition(io::Printer* printer) {
" static const $classname$* internal_default_instance() { return "
"reinterpret_cast<const "
"$classname$*>(&_$classname$_default_instance_); }\n");
auto utf8_check = GetUtf8CheckMode(descriptor_->field(0), options_);
auto utf8_check = internal::cpp::GetUtf8CheckMode(
descriptor_->field(0), GetOptimizeFor(descriptor_->file(), options_) ==
FileOptions::LITE_RUNTIME);
if (descriptor_->field(0)->type() == FieldDescriptor::TYPE_STRING &&
utf8_check != Utf8CheckMode::kNone) {
if (utf8_check == Utf8CheckMode::kStrict) {

@ -36,6 +36,7 @@
#define GOOGLE_PROTOBUF_COMPILER_CPP_MESSAGE_H__
#include <cstdint>
#include <limits>
#include <memory>
#include <set>
#include <string>

@ -343,7 +343,7 @@ void MessageFieldGenerator::GenerateInternalAccessorDefinitions(
format(
"::$proto_ns$::MessageLite*\n"
"$classname$::_Internal::mutable_$name$($classname$* msg) {\n");
if (HasHasbit(descriptor_)) {
if (internal::cpp::HasHasbit(descriptor_)) {
format(" msg->$set_hasbit$\n");
}
if (descriptor_->real_containing_oneof() == nullptr) {
@ -376,7 +376,7 @@ void MessageFieldGenerator::GenerateClearingCode(io::Printer* printer) const {
GOOGLE_CHECK(!IsFieldStripped(descriptor_, options_));
Formatter format(printer, variables_);
if (!HasHasbit(descriptor_)) {
if (!internal::cpp::HasHasbit(descriptor_)) {
// If we don't have has-bits, message presence is indicated only by ptr !=
// nullptr. Thus on clear, we need to delete the object.
format(
@ -394,7 +394,7 @@ void MessageFieldGenerator::GenerateMessageClearingCode(
GOOGLE_CHECK(!IsFieldStripped(descriptor_, options_));
Formatter format(printer, variables_);
if (!HasHasbit(descriptor_)) {
if (!internal::cpp::HasHasbit(descriptor_)) {
// If we don't have has-bits, message presence is indicated only by ptr !=
// nullptr. Thus on clear, we need to delete the object.
format(

@ -40,55 +40,13 @@
#include <google/protobuf/wire_format_lite.h>
#include <google/protobuf/compiler/cpp/helpers.h>
#include <google/protobuf/compiler/cpp/options.h>
#include <google/protobuf/generated_message_tctable_gen.h>
namespace google {
namespace protobuf {
namespace compiler {
namespace cpp {
// Helper class for generating tailcall parsing functions.
struct TailCallTableInfo {
TailCallTableInfo(const Descriptor* descriptor, const Options& options,
const std::vector<const FieldDescriptor*>& ordered_fields,
const std::vector<int>& has_bit_indices,
const std::vector<int>& inlined_string_indices,
MessageSCCAnalyzer* scc_analyzer);
// Fields parsed by the table fast-path.
struct FastFieldInfo {
std::string func_name;
const FieldDescriptor* field;
uint16_t coded_tag;
uint8_t hasbit_idx;
uint8_t aux_idx;
};
std::vector<FastFieldInfo> fast_path_fields;
// Fields parsed by mini parsing routines.
struct FieldEntryInfo {
const FieldDescriptor* field;
int hasbit_idx;
int inlined_string_idx;
uint16_t aux_idx;
// True for enums entirely covered by the start/length fields of FieldAux:
bool is_enum_range;
int32_t enum_range_min;
int32_t enum_range_max;
};
std::vector<FieldEntryInfo> field_entries;
std::vector<std::string> aux_entries;
// Fields parsed by generated fallback function.
std::vector<const FieldDescriptor*> fallback_fields;
// Table size.
int table_size_log2;
// Mask for has-bits of required fields.
uint32_t has_hasbits_required_mask;
// True if a generated fallback function is required instead of generic.
bool use_generated_fallback;
};
// ParseFunctionGenerator generates the _InternalParse function for a message
// (and any associated supporting members).
class ParseFunctionGenerator {
@ -113,6 +71,8 @@ class ParseFunctionGenerator {
void GenerateDataDefinitions(io::Printer* printer);
private:
class GeneratedOptionProvider;
// Returns true if tailcall table code should be generated.
bool should_generate_tctable() const;
@ -136,7 +96,6 @@ class ParseFunctionGenerator {
void GenerateTailCallTable(Formatter& format);
void GenerateFastFieldEntries(Formatter& format);
void GenerateFieldEntries(Formatter& format);
int CalculateFieldNamesSize() const;
void GenerateFieldNames(Formatter& format);
// Generates parsing code for an `ArenaString` field.
@ -168,7 +127,7 @@ class ParseFunctionGenerator {
MessageSCCAnalyzer* scc_analyzer_;
const Options& options_;
std::map<std::string, std::string> variables_;
std::unique_ptr<TailCallTableInfo> tc_table_info_;
std::unique_ptr<internal::TailCallTableInfo> tc_table_info_;
std::vector<int> inlined_string_indices_;
const std::vector<const FieldDescriptor*> ordered_fields_;
int num_hasbits_;

@ -286,7 +286,7 @@ void StringFieldGenerator::GenerateInlineAccessorDefinitions(
"$maybe_prepare_split_message$"
" // @@protoc_insertion_point(field_release:$full_name$)\n");
if (HasHasbit(descriptor_)) {
if (internal::cpp::HasHasbit(descriptor_)) {
format(
" if (!_internal_has_$name$()) {\n"
" return nullptr;\n"
@ -375,7 +375,7 @@ void StringFieldGenerator::GenerateMessageClearingCode(
// If we have a hasbit, then the Clear() method of the protocol buffer
// will have checked that this field is set. If so, we can avoid redundant
// checks against the default variable.
const bool must_be_present = HasHasbit(descriptor_);
const bool must_be_present = internal::cpp::HasHasbit(descriptor_);
if (inlined_ && must_be_present) {
// Calling mutable_$name$() gives us a string reference and sets the has bit
@ -451,7 +451,7 @@ void StringFieldGenerator::GenerateCopyConstructorCode(
format("new (&_this->$field$) ::_pbi::InlinedStringField();\n");
}
if (HasHasbit(descriptor_)) {
if (internal::cpp::HasHasbit(descriptor_)) {
format("if (from._internal_has_$name$()) {\n");
} else {
format("if (!from._internal_$name$().empty()) {\n");

@ -90,29 +90,7 @@ std::string ModuleAlias(const std::string& filename) {
// in proto2/public/reflection.py.
const char kDescriptorKey[] = "DESCRIPTOR";
// file output by this generator.
void PrintTopBoilerplate(io::Printer* printer, const FileDescriptor* file,
bool descriptor_proto) {
// TODO(robinson): Allow parameterization of Python version?
printer->Print(
"# -*- coding: utf-8 -*-\n"
"# Generated by the protocol buffer compiler. DO NOT EDIT!\n"
"# source: $filename$\n"
"\"\"\"Generated protocol buffer code.\"\"\"\n",
"filename", file->name());
printer->Print(
"from google.protobuf.internal import builder as _builder\n"
"from google.protobuf import descriptor as _descriptor\n"
"from google.protobuf import descriptor_pool as "
"_descriptor_pool\n"
"from google.protobuf import symbol_database as "
"_symbol_database\n");
printer->Print("# @@protoc_insertion_point(imports)\n\n");
printer->Print("_sym_db = _symbol_database.Default()\n");
printer->Print("\n\n");
}
const char kThirdPartyPrefix[] = "google3.third_party.py.";
// Returns a Python literal giving the default value for a field.
// If the field specifies no explicit default value, we'll return
@ -217,12 +195,18 @@ bool Generator::Generate(const FileDescriptor* file,
GeneratorContext* context, std::string* error) const {
// -----------------------------------------------------------------
// parse generator options
bool bootstrap = false;
std::vector<std::pair<std::string, std::string> > options;
ParseGeneratorParameter(parameter, &options);
for (int i = 0; i < options.size(); i++) {
if (options[i].first == "pyi_out") {
if (!opensource_runtime_ &&
options[i].first == "no_enforce_api_compatibility") {
// TODO(b/241584880): remove this legacy option, it has no effect.
} else if (!opensource_runtime_ && options[i].first == "bootstrap") {
bootstrap = true;
} else if (options[i].first == "pyi_out") {
python::PyiGenerator pyi_generator;
if (!pyi_generator.Generate(file, "", context, error)) {
return false;
@ -249,13 +233,50 @@ bool Generator::Generate(const FileDescriptor* file,
file_->CopyTo(&fdp);
fdp.SerializeToString(&file_descriptor_serialized_);
if (!opensource_runtime_ && GeneratingDescriptorProto()) {
std::string bootstrap_filename =
"net/proto2/python/internal/descriptor_pb2.py";
if (bootstrap) {
filename = bootstrap_filename;
} else {
std::unique_ptr<io::ZeroCopyOutputStream> output(context->Open(filename));
io::Printer printer(output.get(), '$');
printer.Print(
"from $internal_package$ import descriptor_pb2\n"
"\n",
"internal_package", InternalPackage());
// For static checkers, we need to explicitly assign to the symbols we
// publicly export.
for (int i = 0; i < file_->message_type_count(); i++) {
const Descriptor* message = file_->message_type(i);
printer.Print("$name$ = descriptor_pb2.$name$\n", "name",
message->name());
}
// Sadly some clients access our internal variables (starting with "_").
// To support them, we iterate over *all* symbols to expose even the
// private ones. Statically type-checked code should (especially) never
// use these, so we don't worry about making them available to pytype
// checks.
printer.Print(
"\n"
"globals().update(descriptor_pb2.__dict__)\n"
"\n");
printer.Print(
"# @@protoc_insertion_point(module_scope)\n"
"\n");
return true;
}
}
std::unique_ptr<io::ZeroCopyOutputStream> output(context->Open(filename));
GOOGLE_CHECK(output.get());
io::Printer printer(output.get(), '$');
printer_ = &printer;
PrintTopBoilerplate(printer_, file_, GeneratingDescriptorProto());
PrintTopBoilerplate();
PrintImports();
PrintFileDescriptor();
if (GeneratingDescriptorProto()) {
@ -277,6 +298,9 @@ bool Generator::Generate(const FileDescriptor* file,
printer_->Outdent();
}
std::string module_name = ModuleName(file->name());
if (!opensource_runtime_) {
module_name = StripPrefixString(module_name, kThirdPartyPrefix);
}
printer_->Print(
"_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, '$module_name$', "
"globals())\n",
@ -310,6 +334,34 @@ bool Generator::Generate(const FileDescriptor* file,
return !printer.failed();
}
// file output by this generator.
void Generator::PrintTopBoilerplate() const {
// TODO(robinson): Allow parameterization of Python version?
printer_->Print(
"# -*- coding: utf-8 -*-\n"
"# Generated by the protocol buffer compiler. DO NOT EDIT!\n"
"# source: $filename$\n"
"\"\"\"Generated protocol buffer code.\"\"\"\n",
"filename", file_->name());
if (!opensource_runtime_) {
// This import is needed so that compatibility proto1 compiler output
// inserted at protoc_insertion_point can refer to other protos like
// google3.a.b.c. Code generated by proto2 compiler doesn't do it, and
// instead uses aliases assigned when importing modules.
printer_->Print("import google3\n");
}
printer_->Print(
"from $internal_package$ import builder as _builder\n"
"from $public_package$ import descriptor as _descriptor\n"
"from $public_package$ import descriptor_pool as _descriptor_pool\n"
"from $public_package$ import symbol_database as _symbol_database\n",
"internal_package", InternalPackage(), "public_package", PublicPackage());
printer_->Print("# @@protoc_insertion_point(imports)\n\n");
printer_->Print("_sym_db = _symbol_database.Default()\n");
printer_->Print("\n\n");
}
// Prints Python imports for all modules imported by |file|.
void Generator::PrintImports() const {
for (int i = 0; i < file_->dependency_count(); ++i) {
@ -317,6 +369,9 @@ void Generator::PrintImports() const {
std::string module_name = ModuleName(filename);
std::string module_alias = ModuleAlias(filename);
if (!opensource_runtime_) {
module_name = StripPrefixString(module_name, kThirdPartyPrefix);
}
if (ContainsPythonKeyword(module_name)) {
// If the module path contains a Python keyword, we have to quote the
// module name and import it using importlib. Otherwise the usual kind of
@ -347,6 +402,9 @@ void Generator::PrintImports() const {
// Print public imports.
for (int i = 0; i < file_->public_dependency_count(); ++i) {
std::string module_name = ModuleName(file_->public_dependency(i)->name());
if (!opensource_runtime_) {
module_name = StripPrefixString(module_name, kThirdPartyPrefix);
}
printer_->Print("from $module$ import *\n", "module", module_name);
}
printer_->Print("\n");
@ -517,6 +575,9 @@ void Generator::PrintDescriptorKeyAndModuleName(
printer_->Print("$descriptor_key$ = $descriptor_name$,\n", "descriptor_key",
kDescriptorKey, "descriptor_name", name);
std::string module_name = ModuleName(file_->name());
if (!opensource_runtime_) {
module_name = StripPrefixString(module_name, kThirdPartyPrefix);
}
printer_->Print("__module__ = '$module_name$'\n", "module_name", module_name);
}
@ -706,6 +767,9 @@ void Generator::PrintMessage(const Descriptor& message_descriptor,
m["descriptor_name"] = ModuleLevelDescriptorName(message_descriptor);
printer_->Print(m, "'$descriptor_key$' : $descriptor_name$,\n");
std::string module_name = ModuleName(file_->name());
if (!opensource_runtime_) {
module_name = StripPrefixString(module_name, kThirdPartyPrefix);
}
printer_->Print("'__module__' : '$module_name$'\n", "module_name",
module_name);
printer_->Print("# @@protoc_insertion_point(class_scope:$full_name$)\n",
@ -1114,6 +1178,16 @@ std::string Generator::ModuleLevelServiceDescriptorName(
return name;
}
std::string Generator::PublicPackage() const {
return opensource_runtime_ ? "google.protobuf"
: "google3.net.google.protobuf.python.public";
}
std::string Generator::InternalPackage() const {
return opensource_runtime_ ? "google.protobuf.internal"
: "google3.net.google.protobuf.python.internal";
}
// Prints standard constructor arguments serialized_start and serialized_end.
// Args:
// descriptor: The cpp descriptor to have a serialized reference.

@ -132,6 +132,7 @@ class PROTOC_EXPORT Generator : public CodeGenerator {
const FieldDescriptor& extension_field) const;
void FixForeignFieldsInNestedExtensions(const Descriptor& descriptor) const;
void PrintTopBoilerplate() const;
void PrintServices() const;
void PrintServiceDescriptors() const;
void PrintServiceDescriptor(const ServiceDescriptor& descriptor) const;
@ -149,6 +150,8 @@ class PROTOC_EXPORT Generator : public CodeGenerator {
std::string ModuleLevelMessageName(const Descriptor& descriptor) const;
std::string ModuleLevelServiceDescriptorName(
const ServiceDescriptor& descriptor) const;
std::string PublicPackage() const;
std::string InternalPackage() const;
template <typename DescriptorT, typename DescriptorProtoT>
void PrintSerializedPbInterval(const DescriptorT& descriptor,

@ -66,6 +66,16 @@ std::string PyiGenerator::ModuleLevelName(const DescriptorT& descriptor) const {
return name;
}
std::string PyiGenerator::PublicPackage() const {
return opensource_runtime_ ? "google.protobuf"
: "google3.net.google.protobuf.python.public";
}
std::string PyiGenerator::InternalPackage() const {
return opensource_runtime_ ? "google.protobuf.internal"
: "google3.net.google.protobuf.python.internal";
}
struct ImportModules {
bool has_repeated = false; // _containers
bool has_iterable = false; // typing.Iterable
@ -182,6 +192,10 @@ void PyiGenerator::PrintImports() const {
if (file_->enum_type_count() > 0) {
import_modules.has_enums = true;
}
if (!opensource_runtime_ && file_->service_count() > 0) {
import_modules.has_optional = true;
import_modules.has_union = true;
}
for (int i = 0; i < file_->message_type_count(); i++) {
CheckImportModules(file_->message_type(i), &import_modules);
}
@ -190,37 +204,50 @@ void PyiGenerator::PrintImports() const {
// required in the proto file.
if (import_modules.has_repeated) {
printer_->Print(
"from google.protobuf.internal import containers as "
"_containers\n");
"from $internal_package$ import containers as _containers\n",
"internal_package", InternalPackage());
}
if (import_modules.has_enums) {
printer_->Print(
"from google.protobuf.internal import enum_type_wrapper"
" as _enum_type_wrapper\n");
"from $internal_package$ import enum_type_wrapper as "
"_enum_type_wrapper\n",
"internal_package", InternalPackage());
}
if (import_modules.has_extendable) {
printer_->Print(
"from google.protobuf.internal import python_message"
" as _python_message\n");
"from $internal_package$ import python_message as _python_message\n",
"internal_package", InternalPackage());
}
if (import_modules.has_well_known_type) {
printer_->Print(
"from google.protobuf.internal import well_known_types"
" as _well_known_types\n");
"from $internal_package$ import well_known_types as "
"_well_known_types\n",
"internal_package", InternalPackage());
}
printer_->Print(
"from google.protobuf import"
" descriptor as _descriptor\n");
printer_->Print("from $public_package$ import descriptor as _descriptor\n",
"public_package", PublicPackage());
if (import_modules.has_messages) {
printer_->Print(
"from google.protobuf import message as _message\n");
printer_->Print("from $public_package$ import message as _message\n",
"public_package", PublicPackage());
}
if (HasGenericServices(file_)) {
printer_->Print(
"from google.protobuf import service as"
" _service\n");
if (opensource_runtime_) {
if (HasGenericServices(file_)) {
printer_->Print("from $public_package$ import service as _service\n",
"public_package", PublicPackage());
}
} else {
if (file_->service_count() > 0) {
printer_->Print(
"from google3.net.rpc.python import proto_python_api_2_stub as "
"_proto_python_api_2_stub\n"
"from google3.net.rpc.python import pywraprpc as _pywraprpc\n"
"from google3.net.rpc.python import rpcserver as _rpcserver\n");
}
}
printer_->Print("from typing import ");
if (!opensource_runtime_ && file_->service_count() > 0) {
printer_->Print("Any as _Any, ");
}
printer_->Print("ClassVar as _ClassVar");
if (import_modules.has_iterable) {
printer_->Print(", Iterable as _Iterable");
@ -514,6 +541,7 @@ void PyiGenerator::PrintServices() const {
}
}
bool PyiGenerator::Generate(const FileDescriptor* file,
const std::string& parameter,
GeneratorContext* context,
@ -522,6 +550,8 @@ bool PyiGenerator::Generate(const FileDescriptor* file,
import_map_.clear();
// Calculate file name.
file_ = file;
// In google3, devtools/python/blaze/pytype/pytype_impl.bzl uses --pyi_out to
// directly set the output file name.
std::string filename =
parameter.empty() ? GetFileName(file, ".pyi") : parameter;
@ -552,7 +582,7 @@ bool PyiGenerator::Generate(const FileDescriptor* file,
PrintExtensions(*file_);
PrintMessages();
if (HasGenericServices(file)) {
if (opensource_runtime_ && HasGenericServices(file)) {
PrintServices();
}
return true;

@ -90,6 +90,10 @@ class PROTOC_EXPORT PyiGenerator : public google::protobuf::compiler::CodeGenera
const FieldDescriptor& field_des, const Descriptor& containing_des) const;
template <typename DescriptorT>
std::string ModuleLevelName(const DescriptorT& descriptor) const;
std::string PublicPackage() const;
std::string InternalPackage() const;
bool opensource_runtime_ = true;
// Very coarse-grained lock to ensure that Generate() is reentrant.
// Guards file_, printer_, and import_map_.

@ -8335,6 +8335,48 @@ void LazyDescriptor::Once(const ServiceDescriptor* service) {
}
}
namespace cpp {
bool HasPreservingUnknownEnumSemantics(const FieldDescriptor* field) {
return field->file()->syntax() == FileDescriptor::SYNTAX_PROTO3;
}
bool HasHasbit(const FieldDescriptor* field) {
// This predicate includes proto3 message fields only if they have "optional".
// Foo submsg1 = 1; // HasHasbit() == false
// optional Foo submsg2 = 2; // HasHasbit() == true
// This is slightly odd, as adding "optional" to a singular proto3 field does
// not change the semantics or API. However whenever any field in a message
// has a hasbit, it forces reflection to include hasbit offsets for *all*
// fields, even if almost all of them are set to -1 (no hasbit). So to avoid
// causing a sudden size regression for ~all proto3 messages, we give proto3
// message fields a hasbit only if "optional" is present. If the user is
// explicitly writing "optional", it is likely they are writing it on
// primitive fields also.
return (field->has_optional_keyword() || field->is_required()) &&
!field->options().weak();
}
static bool FieldEnforceUtf8(const FieldDescriptor* field) {
return true;
}
static bool FileUtf8Verification(const FileDescriptor* file) {
return true;
}
// Which level of UTF-8 enforcemant is placed on this file.
Utf8CheckMode GetUtf8CheckMode(const FieldDescriptor* field, bool is_lite) {
if (field->file()->syntax() == FileDescriptor::SYNTAX_PROTO3 &&
FieldEnforceUtf8(field)) {
return Utf8CheckMode::kStrict;
} else if (!is_lite && FileUtf8Verification(field->file())) {
return Utf8CheckMode::kVerify;
} else {
return Utf8CheckMode::kNone;
}
}
} // namespace cpp
} // namespace internal
} // namespace protobuf

@ -59,6 +59,7 @@
#include <google/protobuf/stubs/strutil.h>
#include <atomic>
#include <iterator>
#include <map>
#include <memory>
#include <set>
@ -2438,6 +2439,75 @@ inline FileDescriptor::Syntax FileDescriptor::syntax() const {
return static_cast<Syntax>(syntax_);
}
namespace internal {
// FieldRange(desc) provides an iterable range for the fields of a
// descriptor type, appropriate for range-for loops.
template <typename T>
struct FieldRangeImpl;
template <typename T>
FieldRangeImpl<T> FieldRange(const T* desc) {
return {desc};
}
template <typename T>
struct FieldRangeImpl {
struct Iterator {
using iterator_category = std::forward_iterator_tag;
using value_type = const FieldDescriptor*;
using difference_type = int;
value_type operator*() { return descriptor->field(idx); }
friend bool operator==(const Iterator& a, const Iterator& b) {
GOOGLE_DCHECK(a.descriptor == b.descriptor);
return a.idx == b.idx;
}
friend bool operator!=(const Iterator& a, const Iterator& b) {
return !(a == b);
}
Iterator& operator++() {
idx++;
return *this;
}
int idx;
const T* descriptor;
};
Iterator begin() const { return {0, descriptor}; }
Iterator end() const { return {descriptor->field_count(), descriptor}; }
const T* descriptor;
};
// The context for these functions under `cpp` is "for the C++ implementation".
// In particular, questions like "does this field have a has bit?" have a
// different answer depending on the language.
namespace cpp {
// Returns true if 'enum' semantics are such that unknown values are preserved
// in the enum field itself, rather than going to the UnknownFieldSet.
PROTOBUF_EXPORT bool HasPreservingUnknownEnumSemantics(
const FieldDescriptor* field);
PROTOBUF_EXPORT bool HasHasbit(const FieldDescriptor* field);
#ifndef SWIG
enum class Utf8CheckMode {
kStrict = 0, // Parsing will fail if non UTF-8 data is in string fields.
kVerify = 1, // Only log an error but parsing will succeed.
kNone = 2, // No UTF-8 check.
};
PROTOBUF_EXPORT Utf8CheckMode GetUtf8CheckMode(const FieldDescriptor* field,
bool is_lite);
#endif // !SWIG
} // namespace cpp
} // namespace internal
} // namespace protobuf
} // namespace google

@ -105,22 +105,6 @@ namespace {
bool IsMapFieldInApi(const FieldDescriptor* field) { return field->is_map(); }
// Sync with helpers.h.
inline bool HasHasbit(const FieldDescriptor* field) {
// This predicate includes proto3 message fields only if they have "optional".
// Foo submsg1 = 1; // HasHasbit() == false
// optional Foo submsg2 = 2; // HasHasbit() == true
// This is slightly odd, as adding "optional" to a singular proto3 field does
// not change the semantics or API. However whenever any field in a message
// has a hasbit, it forces reflection to include hasbit offsets for *all*
// fields, even if almost all of them are set to -1 (no hasbit). So to avoid
// causing a sudden size regression for ~all proto3 messages, we give proto3
// message fields a hasbit only if "optional" is present. If the user is
// explicitly writing "optional", it is likely they are writing it on
// primitive fields also.
return (field->has_optional_keyword() || field->is_required()) &&
!field->options().weak();
}
inline bool InRealOneof(const FieldDescriptor* field) {
return field->containing_oneof() &&
@ -705,7 +689,7 @@ const Message* DynamicMessageFactory::GetPrototypeNoLock(
type_info->has_bits_offset = -1;
int max_hasbit = 0;
for (int i = 0; i < type->field_count(); i++) {
if (HasHasbit(type->field(i))) {
if (internal::cpp::HasHasbit(type->field(i))) {
if (type_info->has_bits_offset == -1) {
// At least one field in the message requires a hasbit, so allocate
// hasbits.

@ -1262,6 +1262,8 @@ class EnumTypeTraits {
template <typename ExtendeeT>
static void Register(int number, FieldType type, bool is_packed,
LazyEagerVerifyFnType fn) {
// Avoid -Wunused-parameter
(void)fn;
ExtensionSet::RegisterEnumExtension(&ExtendeeT::default_instance(), number,
type, false, is_packed, IsValid);
}
@ -1328,6 +1330,8 @@ class RepeatedEnumTypeTraits {
template <typename ExtendeeT>
static void Register(int number, FieldType type, bool is_packed,
LazyEagerVerifyFnType fn) {
// Avoid -Wunused-parameter
(void)fn;
ExtensionSet::RegisterEnumExtension(&ExtendeeT::default_instance(), number,
type, true, is_packed, IsValid);
}

@ -39,6 +39,8 @@
#include <cstdint>
#include <cstring>
#include <set>
#include <string>
#include <unordered_map>
#include <google/protobuf/stubs/logging.h>
#include <google/protobuf/stubs/common.h>
@ -48,6 +50,8 @@
#include <google/protobuf/descriptor.h>
#include <google/protobuf/descriptor.pb.h>
#include <google/protobuf/extension_set.h>
#include <google/protobuf/generated_message_tctable_gen.h>
#include <google/protobuf/generated_message_tctable_impl.h>
#include <google/protobuf/generated_message_util.h>
#include <google/protobuf/inlined_string_field.h>
#include <google/protobuf/map_field.h>
@ -289,6 +293,12 @@ Reflection::Reflection(const Descriptor* descriptor,
last_non_weak_field_index_ = descriptor_->field_count() - 1;
}
Reflection::~Reflection() {
// No need to use sized delete. This code path is uncommon and it would not be
// worth saving or recalculating the size.
::operator delete(const_cast<internal::TcParseTableBase*>(tcparse_table_));
}
const UnknownFieldSet& Reflection::GetUnknownFields(
const Message& message) const {
return GetInternalMetadata(message).unknown_fields<UnknownFieldSet>(
@ -2962,6 +2972,355 @@ const MapFieldBase* Reflection::GetMapData(const Message& message,
return &(GetRaw<MapFieldBase>(message, field));
}
template <typename T>
static uint32_t AlignTo(uint32_t v) {
return (v + alignof(T) - 1) & ~(alignof(T) - 1);
}
static internal::TailCallParseFunc GetFastParseFunction(
const std::string& name) {
// This list must be synchronized with TcParser.
// Missing entries are replaced with MiniParse in opt mode to avoid runtime
// failures. It check-fails in debug mode.
static const auto* const map =
new std::unordered_map<std::string, internal::TailCallParseFunc>{
{"::_pbi::TcParser::FastF32S1", internal::TcParser::FastF32S1},
{"::_pbi::TcParser::FastF32S2", internal::TcParser::FastF32S2},
{"::_pbi::TcParser::FastF32R1", internal::TcParser::FastF32R1},
{"::_pbi::TcParser::FastF32R2", internal::TcParser::FastF32R2},
{"::_pbi::TcParser::FastF32P1", internal::TcParser::FastF32P1},
{"::_pbi::TcParser::FastF32P2", internal::TcParser::FastF32P2},
{"::_pbi::TcParser::FastF64S1", internal::TcParser::FastF64S1},
{"::_pbi::TcParser::FastF64S2", internal::TcParser::FastF64S2},
{"::_pbi::TcParser::FastF64R1", internal::TcParser::FastF64R1},
{"::_pbi::TcParser::FastF64R2", internal::TcParser::FastF64R2},
{"::_pbi::TcParser::FastF64P1", internal::TcParser::FastF64P1},
{"::_pbi::TcParser::FastF64P2", internal::TcParser::FastF64P2},
{"::_pbi::TcParser::FastV8S1", internal::TcParser::FastV8S1},
{"::_pbi::TcParser::FastV8S2", internal::TcParser::FastV8S2},
{"::_pbi::TcParser::FastV8R1", internal::TcParser::FastV8R1},
{"::_pbi::TcParser::FastV8R2", internal::TcParser::FastV8R2},
{"::_pbi::TcParser::FastV8P1", internal::TcParser::FastV8P1},
{"::_pbi::TcParser::FastV8P2", internal::TcParser::FastV8P2},
{"::_pbi::TcParser::FastV32S1", internal::TcParser::FastV32S1},
{"::_pbi::TcParser::FastV32S2", internal::TcParser::FastV32S2},
{"::_pbi::TcParser::FastV32R1", internal::TcParser::FastV32R1},
{"::_pbi::TcParser::FastV32R2", internal::TcParser::FastV32R2},
{"::_pbi::TcParser::FastV32P1", internal::TcParser::FastV32P1},
{"::_pbi::TcParser::FastV32P2", internal::TcParser::FastV32P2},
{"::_pbi::TcParser::FastV64S1", internal::TcParser::FastV64S1},
{"::_pbi::TcParser::FastV64S2", internal::TcParser::FastV64S2},
{"::_pbi::TcParser::FastV64R1", internal::TcParser::FastV64R1},
{"::_pbi::TcParser::FastV64R2", internal::TcParser::FastV64R2},
{"::_pbi::TcParser::FastV64P1", internal::TcParser::FastV64P1},
{"::_pbi::TcParser::FastV64P2", internal::TcParser::FastV64P2},
{"::_pbi::TcParser::FastZ32S1", internal::TcParser::FastZ32S1},
{"::_pbi::TcParser::FastZ32S2", internal::TcParser::FastZ32S2},
{"::_pbi::TcParser::FastZ32R1", internal::TcParser::FastZ32R1},
{"::_pbi::TcParser::FastZ32R2", internal::TcParser::FastZ32R2},
{"::_pbi::TcParser::FastZ32P1", internal::TcParser::FastZ32P1},
{"::_pbi::TcParser::FastZ32P2", internal::TcParser::FastZ32P2},
{"::_pbi::TcParser::FastZ64S1", internal::TcParser::FastZ64S1},
{"::_pbi::TcParser::FastZ64S2", internal::TcParser::FastZ64S2},
{"::_pbi::TcParser::FastZ64R1", internal::TcParser::FastZ64R1},
{"::_pbi::TcParser::FastZ64R2", internal::TcParser::FastZ64R2},
{"::_pbi::TcParser::FastZ64P1", internal::TcParser::FastZ64P1},
{"::_pbi::TcParser::FastZ64P2", internal::TcParser::FastZ64P2},
{"::_pbi::TcParser::FastErS1", internal::TcParser::FastErS1},
{"::_pbi::TcParser::FastErS2", internal::TcParser::FastErS2},
{"::_pbi::TcParser::FastErR1", internal::TcParser::FastErR1},
{"::_pbi::TcParser::FastErR2", internal::TcParser::FastErR2},
{"::_pbi::TcParser::FastEr0S1", internal::TcParser::FastEr0S1},
{"::_pbi::TcParser::FastEr0S2", internal::TcParser::FastEr0S2},
{"::_pbi::TcParser::FastEr0R1", internal::TcParser::FastEr0R1},
{"::_pbi::TcParser::FastEr0R2", internal::TcParser::FastEr0R2},
{"::_pbi::TcParser::FastEr1S1", internal::TcParser::FastEr1S1},
{"::_pbi::TcParser::FastEr1S2", internal::TcParser::FastEr1S2},
{"::_pbi::TcParser::FastEr1R1", internal::TcParser::FastEr1R1},
{"::_pbi::TcParser::FastEr1R2", internal::TcParser::FastEr1R2},
{"::_pbi::TcParser::FastEvS1", internal::TcParser::FastEvS1},
{"::_pbi::TcParser::FastEvS2", internal::TcParser::FastEvS2},
{"::_pbi::TcParser::FastEvR1", internal::TcParser::FastEvR1},
{"::_pbi::TcParser::FastEvR2", internal::TcParser::FastEvR2},
{"::_pbi::TcParser::FastBS1", internal::TcParser::FastBS1},
{"::_pbi::TcParser::FastBS2", internal::TcParser::FastBS2},
{"::_pbi::TcParser::FastBR1", internal::TcParser::FastBR1},
{"::_pbi::TcParser::FastBR2", internal::TcParser::FastBR2},
{"::_pbi::TcParser::FastSS1", internal::TcParser::FastSS1},
{"::_pbi::TcParser::FastSS2", internal::TcParser::FastSS2},
{"::_pbi::TcParser::FastSR1", internal::TcParser::FastSR1},
{"::_pbi::TcParser::FastSR2", internal::TcParser::FastSR2},
{"::_pbi::TcParser::FastUS1", internal::TcParser::FastUS1},
{"::_pbi::TcParser::FastUS2", internal::TcParser::FastUS2},
{"::_pbi::TcParser::FastUR1", internal::TcParser::FastUR1},
{"::_pbi::TcParser::FastUR2", internal::TcParser::FastUR2},
{"::_pbi::TcParser::FastBiS1", internal::TcParser::FastBiS1},
{"::_pbi::TcParser::FastBiS2", internal::TcParser::FastBiS2},
{"::_pbi::TcParser::FastSiS1", internal::TcParser::FastSiS1},
{"::_pbi::TcParser::FastSiS2", internal::TcParser::FastSiS2},
{"::_pbi::TcParser::FastUiS1", internal::TcParser::FastUiS1},
{"::_pbi::TcParser::FastUiS2", internal::TcParser::FastUiS2},
{"::_pbi::TcParser::FastMdS1", internal::TcParser::FastMdS1},
{"::_pbi::TcParser::FastMdS2", internal::TcParser::FastMdS2},
{"::_pbi::TcParser::FastGdS1", internal::TcParser::FastGdS1},
{"::_pbi::TcParser::FastGdS2", internal::TcParser::FastGdS2},
{"::_pbi::TcParser::FastMtS1", internal::TcParser::FastMtS1},
{"::_pbi::TcParser::FastMtS2", internal::TcParser::FastMtS2},
{"::_pbi::TcParser::FastGtS1", internal::TcParser::FastGtS1},
{"::_pbi::TcParser::FastGtS2", internal::TcParser::FastGtS2},
{"::_pbi::TcParser::FastMdR1", internal::TcParser::FastMdR1},
{"::_pbi::TcParser::FastMdR2", internal::TcParser::FastMdR2},
{"::_pbi::TcParser::FastGdR1", internal::TcParser::FastGdR1},
{"::_pbi::TcParser::FastGdR2", internal::TcParser::FastGdR2},
{"::_pbi::TcParser::FastMtR1", internal::TcParser::FastMtR1},
{"::_pbi::TcParser::FastMtR2", internal::TcParser::FastMtR2},
{"::_pbi::TcParser::FastGtR1", internal::TcParser::FastGtR1},
{"::_pbi::TcParser::FastGtR2", internal::TcParser::FastGtR2},
};
auto it = map->find(name);
if (it == map->end()) {
GOOGLE_LOG(DFATAL) << "Failed to find function: " << name;
// Let's not crash in opt, just in case.
// MiniParse is always a valid parser.
return &internal::TcParser::MiniParse;
}
return it->second;
}
const internal::TcParseTableBase* Reflection::CreateTcParseTableForMessageSet()
const {
// ParseLoop can't parse message set wire format.
// Create a dummy table that only exists to make TcParser::ParseLoop jump
// into the reflective parse loop.
using Table = internal::TcParseTable<0, 0, 0, 1, 1>;
// We use `operator new` here because the destruction will be done with
// `operator delete` unconditionally.
void* p = ::operator new(sizeof(Table));
auto* full_table = ::new (p) Table{
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, schema_.default_instance_, nullptr},
{{{&internal::TcParser::ReflectionParseLoop, {}}}}};
GOOGLE_DCHECK_EQ(static_cast<void*>(&full_table->header),
static_cast<void*>(full_table));
return &full_table->header;
}
void Reflection::PopulateTcParseFastEntries(
const internal::TailCallTableInfo& table_info,
TcParseTableBase::FastFieldEntry* fast_entries) const {
for (const auto& fast_field : table_info.fast_path_fields) {
if (fast_field.field == nullptr) {
// No fast entry here. Use mini parser.
*fast_entries++ = {internal::TcParser::MiniParse, {}};
} else if (fast_field.func_name.find("TcParser::FastEv") !=
fast_field.func_name.npos) {
// We can't use fast parsing for these entries because we can't specify
// the validator. Use the reflection based parser called from MiniParse.
// TODO(b/239592582): Implement a fast parser for these enums.
*fast_entries++ = {internal::TcParser::MiniParse, {}};
} else {
*fast_entries++ = {
GetFastParseFunction(fast_field.func_name),
{fast_field.coded_tag, fast_field.hasbit_idx, fast_field.aux_idx,
static_cast<uint16_t>(schema_.GetFieldOffset(fast_field.field))}};
}
}
}
static void PopulateTcParseLookupTable(
const internal::TailCallTableInfo& table_info, uint16_t* lookup_table) {
for (const auto& entry_block : table_info.num_to_entry_table.blocks) {
*lookup_table++ = entry_block.first_fnum & 0xFFFF;
*lookup_table++ = entry_block.first_fnum >> 16;
*lookup_table++ = entry_block.entries.size();
for (auto se16 : entry_block.entries) {
*lookup_table++ = se16.skipmap;
*lookup_table++ = se16.field_entry_offset;
}
}
*lookup_table++ = 0xFFFF;
*lookup_table++ = 0xFFFF;
}
void Reflection::PopulateTcParseEntries(
internal::TailCallTableInfo& table_info,
TcParseTableBase::FieldEntry* entries) const {
for (const auto& entry : table_info.field_entries) {
const FieldDescriptor* field = entry.field;
if (field->options().weak()) {
// Weak fields are handled by the generated fallback function.
// (These are handled by legacy Google-internal logic.)
*entries = {};
} else if (field->type() == field->TYPE_ENUM &&
table_info.aux_entries[entry.aux_idx].type ==
internal::TailCallTableInfo::kEnumValidator) {
// Mini parse can't handle it. Fallback to reflection.
*entries = {};
table_info.aux_entries[entry.aux_idx] = {};
} else {
const OneofDescriptor* oneof = field->real_containing_oneof();
entries->offset = schema_.GetFieldOffset(field);
if (oneof != nullptr) {
entries->has_idx = schema_.oneof_case_offset_ + 4 * oneof->index();
} else if (schema_.HasHasbits()) {
entries->has_idx =
static_cast<int>(8 * schema_.HasBitsOffset() + entry.hasbit_idx);
} else {
entries->has_idx = 0;
}
entries->aux_idx = entry.aux_idx;
entries->type_card = entry.type_card;
}
++entries;
}
}
void Reflection::PopulateTcParseFieldAux(
const internal::TailCallTableInfo& table_info,
TcParseTableBase::FieldAux* field_aux) const {
for (const auto& aux_entry : table_info.aux_entries) {
switch (aux_entry.type) {
case internal::TailCallTableInfo::kNothing:
*field_aux++ = {};
break;
case internal::TailCallTableInfo::kInlinedStringDonatedOffset:
field_aux++->offset =
static_cast<uint32_t>(schema_.inlined_string_donated_offset_);
break;
case internal::TailCallTableInfo::kSplitOffset:
field_aux++->offset = schema_.SplitOffset();
break;
case internal::TailCallTableInfo::kSplitSizeof:
field_aux++->offset = schema_.SizeofSplit();
break;
case internal::TailCallTableInfo::kSubTable:
GOOGLE_LOG(FATAL) << "Not supported";
break;
case internal::TailCallTableInfo::kSubMessage:
field_aux++->message_default_p =
GetDefaultMessageInstance(aux_entry.field);
break;
case internal::TailCallTableInfo::kEnumRange:
field_aux++->enum_range = {aux_entry.enum_range.start,
aux_entry.enum_range.size};
break;
case internal::TailCallTableInfo::kEnumValidator:
GOOGLE_LOG(FATAL) << "Not supported.";
break;
case internal::TailCallTableInfo::kNumericOffset:
field_aux++->offset = aux_entry.offset;
break;
}
}
}
const internal::TcParseTableBase* Reflection::CreateTcParseTable() const {
using TcParseTableBase = internal::TcParseTableBase;
if (descriptor_->options().message_set_wire_format()) {
return CreateTcParseTableForMessageSet();
}
std::vector<const FieldDescriptor*> fields;
constexpr int kNoHasbit = -1;
std::vector<int> has_bit_indices(
static_cast<size_t>(descriptor_->field_count()), kNoHasbit);
std::vector<int> inlined_string_indices = has_bit_indices;
for (int i = 0; i < descriptor_->field_count(); ++i) {
auto* field = descriptor_->field(i);
if (schema_.IsFieldStripped(field)) continue;
fields.push_back(field);
has_bit_indices[static_cast<size_t>(field->index())] =
static_cast<int>(schema_.HasBitIndex(field));
if (IsInlined(field)) {
inlined_string_indices[static_cast<size_t>(field->index())] =
schema_.InlinedStringIndex(field);
}
}
std::sort(fields.begin(), fields.end(),
[](const FieldDescriptor* a, const FieldDescriptor* b) {
return a->number() < b->number();
});
class ReflectionOptionProvider final
: public internal::TailCallTableInfo::OptionProvider {
public:
explicit ReflectionOptionProvider(const Reflection& ref) : ref_(ref) {}
internal::TailCallTableInfo::PerFieldOptions GetForField(
const FieldDescriptor* field) const final {
return {ref_.IsLazyField(field), //
ref_.IsInlined(field), //
// Only LITE can be implicitly weak.
/* is_implicitly_weak */ false,
// We could change this to use direct table.
// Might be easier to do when all messages support TDP.
/* use_direct_tcparser_table */ false,
/* is_lite */ false, //
ref_.schema_.IsSplit(field)};
}
private:
const Reflection& ref_;
};
internal::TailCallTableInfo table_info(
descriptor_, fields, ReflectionOptionProvider(*this), has_bit_indices,
inlined_string_indices);
const size_t fast_entries_count = table_info.fast_path_fields.size();
GOOGLE_CHECK_EQ(fast_entries_count, 1 << table_info.table_size_log2);
const uint16_t lookup_table_offset = AlignTo<uint16_t>(
sizeof(TcParseTableBase) +
fast_entries_count * sizeof(TcParseTableBase::FastFieldEntry));
const uint32_t field_entry_offset = AlignTo<TcParseTableBase::FieldEntry>(
lookup_table_offset +
sizeof(uint16_t) * table_info.num_to_entry_table.size16());
const uint32_t aux_offset = AlignTo<TcParseTableBase::FieldAux>(
field_entry_offset +
sizeof(TcParseTableBase::FieldEntry) * fields.size());
int byte_size =
aux_offset +
sizeof(TcParseTableBase::FieldAux) * table_info.aux_entries.size() +
sizeof(char) * table_info.field_name_data.size();
void* p = ::operator new(byte_size);
auto* res = ::new (p) TcParseTableBase{
static_cast<uint16_t>(schema_.HasHasbits() ? schema_.HasBitsOffset() : 0),
// extensions handled through reflection.
0, 0, 0,
static_cast<uint32_t>(fields.empty() ? 0 : fields.back()->number()),
static_cast<uint8_t>((fast_entries_count - 1) << 3), lookup_table_offset,
table_info.num_to_entry_table.skipmap32, field_entry_offset,
static_cast<uint16_t>(fields.size()),
static_cast<uint16_t>(table_info.aux_entries.size()), aux_offset,
schema_.default_instance_, &internal::TcParser::ReflectionFallback};
// Now copy the rest of the payloads
PopulateTcParseFastEntries(table_info, res->fast_entry(0));
PopulateTcParseLookupTable(table_info, res->field_lookup_begin());
PopulateTcParseEntries(table_info, res->field_entries_begin());
PopulateTcParseFieldAux(table_info, res->field_aux(0u));
// Copy the name data.
memcpy(res->name_data(), table_info.field_name_data.data(),
table_info.field_name_data.size());
// Validation to make sure we used all the bytes correctly.
GOOGLE_CHECK_EQ(res->name_data() + table_info.field_name_data.size() -
reinterpret_cast<char*>(res),
byte_size);
return res;
}
namespace {
// Helper function to transform migration schema into reflection schema.

@ -183,12 +183,19 @@ struct alignas(uint64_t) TcParseTableBase {
const FastFieldEntry* fast_entry(size_t idx) const {
return reinterpret_cast<const FastFieldEntry*>(this + 1) + idx;
}
FastFieldEntry* fast_entry(size_t idx) {
return reinterpret_cast<FastFieldEntry*>(this + 1) + idx;
}
// Returns a begin iterator (pointer) to the start of the field lookup table.
const uint16_t* field_lookup_begin() const {
return reinterpret_cast<const uint16_t*>(reinterpret_cast<uintptr_t>(this) +
lookup_table_offset);
}
uint16_t* field_lookup_begin() {
return reinterpret_cast<uint16_t*>(reinterpret_cast<uintptr_t>(this) +
lookup_table_offset);
}
// Field entry for all fields.
struct FieldEntry {
@ -203,6 +210,10 @@ struct alignas(uint64_t) TcParseTableBase {
return reinterpret_cast<const FieldEntry*>(
reinterpret_cast<uintptr_t>(this) + field_entries_offset);
}
FieldEntry* field_entries_begin() {
return reinterpret_cast<FieldEntry*>(reinterpret_cast<uintptr_t>(this) +
field_entries_offset);
}
// Auxiliary entries for field types that need extra information.
union FieldAux {
@ -234,6 +245,11 @@ struct alignas(uint64_t) TcParseTableBase {
aux_offset) +
idx;
}
FieldAux* field_aux(uint32_t idx) {
return reinterpret_cast<FieldAux*>(reinterpret_cast<uintptr_t>(this) +
aux_offset) +
idx;
}
const FieldAux* field_aux(const FieldEntry* entry) const {
return field_aux(entry->aux_idx);
}
@ -244,6 +260,11 @@ struct alignas(uint64_t) TcParseTableBase {
aux_offset +
num_aux_entries * sizeof(FieldAux));
}
char* name_data() {
return reinterpret_cast<char*>(reinterpret_cast<uintptr_t>(this) +
aux_offset +
num_aux_entries * sizeof(FieldAux));
}
};
#if defined(_MSC_VER) && !defined(_WIN64)

@ -35,6 +35,7 @@
#include <google/protobuf/message.h>
#include <google/protobuf/parse_context.h>
#include <google/protobuf/unknown_field_set.h>
#include <google/protobuf/wire_format.h>
// clang-format off
#include <google/protobuf/port_def.inc>
@ -48,6 +49,41 @@ const char* TcParser::GenericFallback(PROTOBUF_TC_PARAM_DECL) {
return GenericFallbackImpl<Message, UnknownFieldSet>(PROTOBUF_TC_PARAM_PASS);
}
const char* TcParser::ReflectionFallback(PROTOBUF_TC_PARAM_DECL) {
SyncHasbits(msg, hasbits, table);
uint32_t tag = data.tag();
if (tag == 0 || (tag & 7) == WireFormatLite::WIRETYPE_END_GROUP) {
ctx->SetLastTag(tag);
return ptr;
}
auto* full_msg = down_cast<Message*>(msg);
auto* descriptor = full_msg->GetDescriptor();
auto* reflection = full_msg->GetReflection();
int field_number = WireFormatLite::GetTagFieldNumber(tag);
const FieldDescriptor* field = descriptor->FindFieldByNumber(field_number);
// If that failed, check if the field is an extension.
if (field == nullptr && descriptor->IsExtensionNumber(field_number)) {
if (ctx->data().pool == nullptr) {
field = reflection->FindKnownExtensionByNumber(field_number);
} else {
field = ctx->data().pool->FindExtensionByNumber(descriptor, field_number);
}
}
return WireFormat::_InternalParseAndMergeField(full_msg, ptr, ctx, tag,
reflection, field);
}
const char* TcParser::ReflectionParseLoop(PROTOBUF_TC_PARAM_DECL) {
(void)data;
(void)table;
(void)hasbits;
// Call into the wire format reflective parse loop.
return WireFormat::_InternalParse(down_cast<Message*>(msg), ptr, ctx);
}
} // namespace internal
} // namespace protobuf
} // namespace google

@ -0,0 +1,779 @@
// Protocol Buffers - Google's data interchange format
// Copyright 2008 Google Inc. All rights reserved.
// https://developers.google.com/protocol-buffers/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <google/protobuf/generated_message_tctable_gen.h>
#include <algorithm>
#include <limits>
#include <string>
#include <utility>
#include <vector>
#include <google/protobuf/descriptor.h>
#include <google/protobuf/descriptor.pb.h>
#include <google/protobuf/generated_message_tctable_decl.h>
#include <google/protobuf/generated_message_tctable_impl.h>
#include <google/protobuf/wire_format.h>
// Must come last:
#include <google/protobuf/port_def.inc>
namespace google {
namespace protobuf {
namespace internal {
namespace {
bool GetEnumValidationRange(const EnumDescriptor* enum_type, int16_t& start,
uint16_t& size) {
GOOGLE_CHECK_GT(enum_type->value_count(), 0) << enum_type->DebugString();
// Check if the enum values are a single, contiguous range.
std::vector<int> enum_values;
for (int i = 0, N = static_cast<int>(enum_type->value_count()); i < N; ++i) {
enum_values.push_back(enum_type->value(i)->number());
}
auto values_begin = enum_values.begin();
auto values_end = enum_values.end();
std::sort(values_begin, values_end);
enum_values.erase(std::unique(values_begin, values_end), values_end);
if (std::numeric_limits<int16_t>::min() <= enum_values[0] &&
enum_values[0] <= std::numeric_limits<int16_t>::max() &&
enum_values.size() <= std::numeric_limits<uint16_t>::max() &&
static_cast<int>(enum_values[0] + enum_values.size() - 1) ==
enum_values.back()) {
start = static_cast<int16_t>(enum_values[0]);
size = static_cast<uint16_t>(enum_values.size());
return true;
} else {
return false;
}
}
void PopulateFastFieldEntry(const TailCallTableInfo::FieldEntryInfo& entry,
const TailCallTableInfo::PerFieldOptions& options,
TailCallTableInfo::FastFieldInfo& info) {
const FieldDescriptor* field = entry.field;
std::string name = "::_pbi::TcParser::Fast";
uint8_t aux_idx = static_cast<uint8_t>(entry.aux_idx);
static const char* kPrefix[] = {
nullptr, // 0
"F64", // TYPE_DOUBLE = 1,
"F32", // TYPE_FLOAT = 2,
"V64", // TYPE_INT64 = 3,
"V64", // TYPE_UINT64 = 4,
"V32", // TYPE_INT32 = 5,
"F64", // TYPE_FIXED64 = 6,
"F32", // TYPE_FIXED32 = 7,
"V8", // TYPE_BOOL = 8,
"", // TYPE_STRING = 9,
"G", // TYPE_GROUP = 10,
"M", // TYPE_MESSAGE = 11,
"B", // TYPE_BYTES = 12,
"V32", // TYPE_UINT32 = 13,
"", // TYPE_ENUM = 14,
"F32", // TYPE_SFIXED32 = 15,
"F64", // TYPE_SFIXED64 = 16,
"Z32", // TYPE_SINT32 = 17,
"Z64", // TYPE_SINT64 = 18,
};
name.append(kPrefix[field->type()]);
if (field->type() == field->TYPE_ENUM) {
// Enums are handled as:
// - V32 for open enums
// - Er (and Er0/Er1) for sequential enums
// - Ev for the rest
if (cpp::HasPreservingUnknownEnumSemantics(field)) {
name.append("V32");
} else if (field->is_repeated() && field->is_packed()) {
GOOGLE_LOG(DFATAL) << "Enum validation not handled: " << field->DebugString();
return;
} else {
int16_t start;
uint16_t size;
if (GetEnumValidationRange(field->enum_type(), start, size)) {
name.append("Er");
int max_value = start + size - 1;
if (max_value <= 127 && (start == 0 || start == 1)) {
name.append(1, '0' + start);
aux_idx = max_value;
}
} else {
name.append("Ev");
}
}
}
if (field->type() == field->TYPE_STRING) {
switch (internal::cpp::GetUtf8CheckMode(field, options.is_lite)) {
case internal::cpp::Utf8CheckMode::kStrict:
name.append("U");
break;
case internal::cpp::Utf8CheckMode::kVerify:
name.append("S");
break;
case internal::cpp::Utf8CheckMode::kNone:
name.append("B");
break;
}
}
if (field->type() == field->TYPE_STRING ||
field->type() == field->TYPE_BYTES) {
if (options.is_string_inlined) {
name.append("i");
GOOGLE_CHECK(!field->is_repeated());
aux_idx = static_cast<uint8_t>(entry.inlined_string_idx);
}
}
if (field->type() == field->TYPE_MESSAGE ||
field->type() == field->TYPE_GROUP) {
name.append(options.use_direct_tcparser_table ? "t" : "d");
}
// The field implementation functions are prefixed by cardinality:
// `S` for optional or implicit fields.
// `R` for non-packed repeated.
// `P` for packed repeated.
name.append(field->is_packed() ? "P"
: field->is_repeated() ? "R"
: field->real_containing_oneof() ? "O"
: "S");
// Append the tag length. Fast parsing only handles 1- or 2-byte tags.
name.append(field->number() < 16 ? "1" : "2");
info.func_name = std::move(name);
info.aux_idx = aux_idx;
}
bool IsFieldEligibleForFastParsing(
const TailCallTableInfo::FieldEntryInfo& entry,
const TailCallTableInfo::OptionProvider& option_provider) {
const auto* field = entry.field;
const auto options = option_provider.GetForField(field);
// Map, oneof, weak, and lazy fields are not handled on the fast path.
if (field->is_map() || field->real_containing_oneof() ||
field->options().weak() || options.is_implicitly_weak ||
options.is_lazy || options.should_split) {
return false;
}
// We will check for a valid auxiliary index range later. However, we might
// want to change the value we check for inlined string fields.
int aux_idx = entry.aux_idx;
switch (field->type()) {
case FieldDescriptor::TYPE_ENUM:
// If enum values are not validated at parse time, then this field can be
// handled on the fast path like an int32.
if (cpp::HasPreservingUnknownEnumSemantics(field)) {
break;
}
if (field->is_repeated() && field->is_packed()) {
return false;
}
break;
// Some bytes fields can be handled on fast path.
case FieldDescriptor::TYPE_STRING:
case FieldDescriptor::TYPE_BYTES:
if (field->options().ctype() != FieldOptions::STRING) {
return false;
}
if (options.is_string_inlined) {
GOOGLE_CHECK(!field->is_repeated());
// For inlined strings, the donation state index is stored in the
// `aux_idx` field of the fast parsing info. We need to check the range
// of that value instead of the auxiliary index.
aux_idx = entry.inlined_string_idx;
}
break;
default:
break;
}
if (cpp::HasHasbit(field)) {
// The tailcall parser can only update the first 32 hasbits. Fields with
// has-bits beyond the first 32 are handled by mini parsing/fallback.
GOOGLE_CHECK_GE(entry.hasbit_idx, 0) << field->DebugString();
if (entry.hasbit_idx >= 32) return false;
}
// If the field needs auxiliary data, then the aux index is needed. This
// must fit in a uint8_t.
if (aux_idx > std::numeric_limits<uint8_t>::max()) {
return false;
}
// The largest tag that can be read by the tailcall parser is two bytes
// when varint-coded. This allows 14 bits for the numeric tag value:
// byte 0 byte 1
// 1nnnnttt 0nnnnnnn
// ^^^^^^^ ^^^^^^^
if (field->number() >= 1 << 11) return false;
return true;
}
std::vector<TailCallTableInfo::FastFieldInfo> SplitFastFieldsForSize(
const std::vector<TailCallTableInfo::FieldEntryInfo>& field_entries,
int table_size_log2,
const TailCallTableInfo::OptionProvider& option_provider) {
std::vector<TailCallTableInfo::FastFieldInfo> result(1 << table_size_log2);
const uint32_t idx_mask = static_cast<uint32_t>(result.size() - 1);
for (const auto& entry : field_entries) {
if (!IsFieldEligibleForFastParsing(entry, option_provider)) {
continue;
}
const auto* field = entry.field;
const auto options = option_provider.GetForField(field);
uint32_t tag = WireFormat::MakeTag(field);
// Construct the varint-coded tag. If it is more than 7 bits, we need to
// shift the high bits and add a continue bit.
if (uint32_t hibits = tag & 0xFFFFFF80) {
tag = tag + hibits + 128; // tag = lobits + 2*hibits + 128
}
// The field index is determined by the low bits of the field number, where
// the table size determines the width of the mask. The largest table
// supported is 32 entries. The parse loop uses these bits directly, so that
// the dispatch does not require arithmetic:
// byte 0 byte 1
// tag: 1nnnnttt 0nnnnnnn
// ^^^^^
// idx (table_size_log2=5)
// This means that any field number that does not fit in the lower 4 bits
// will always have the top bit of its table index asserted.
const uint32_t fast_idx = (tag >> 3) & idx_mask;
TailCallTableInfo::FastFieldInfo& info = result[fast_idx];
if (info.field != nullptr) {
// This field entry is already filled.
continue;
}
// Fill in this field's entry:
GOOGLE_CHECK(info.func_name.empty()) << info.func_name;
PopulateFastFieldEntry(entry, options, info);
info.field = field;
info.coded_tag = tag;
// If this field does not have presence, then it can set an out-of-bounds
// bit (tailcall parsing uses a uint64_t for hasbits, but only stores 32).
info.hasbit_idx = cpp::HasHasbit(field) ? entry.hasbit_idx : 63;
}
return result;
}
// Filter out fields that will be handled by mini parsing.
std::vector<const FieldDescriptor*> FilterMiniParsedFields(
const std::vector<const FieldDescriptor*>& fields,
const TailCallTableInfo::OptionProvider& option_provider
) {
std::vector<const FieldDescriptor*> generated_fallback_fields;
for (const auto* field : fields) {
auto options = option_provider.GetForField(field);
bool handled = false;
switch (field->type()) {
case FieldDescriptor::TYPE_DOUBLE:
case FieldDescriptor::TYPE_FLOAT:
case FieldDescriptor::TYPE_FIXED32:
case FieldDescriptor::TYPE_SFIXED32:
case FieldDescriptor::TYPE_FIXED64:
case FieldDescriptor::TYPE_SFIXED64:
case FieldDescriptor::TYPE_BOOL:
case FieldDescriptor::TYPE_UINT32:
case FieldDescriptor::TYPE_SINT32:
case FieldDescriptor::TYPE_INT32:
case FieldDescriptor::TYPE_UINT64:
case FieldDescriptor::TYPE_SINT64:
case FieldDescriptor::TYPE_INT64:
// These are handled by MiniParse, so we don't need any generated
// fallback code.
handled = true;
break;
case FieldDescriptor::TYPE_ENUM:
if (field->is_repeated() &&
!cpp::HasPreservingUnknownEnumSemantics(field)) {
// TODO(b/206890171): handle packed repeated closed enums
// Non-packed repeated can be handled using tables, but we still
// need to generate fallback code for all repeated enums in order to
// handle packed encoding. This is because of the lite/full split
// when handling invalid enum values in a packed field.
handled = false;
} else {
handled = true;
}
break;
case FieldDescriptor::TYPE_BYTES:
case FieldDescriptor::TYPE_STRING:
if (options.is_string_inlined) {
// TODO(b/198211897): support InilnedStringField.
handled = false;
} else {
handled = true;
}
break;
case FieldDescriptor::TYPE_MESSAGE:
case FieldDescriptor::TYPE_GROUP:
// TODO(b/210762816): support remaining field types.
if (field->is_map() || field->options().weak() ||
options.is_implicitly_weak || options.is_lazy) {
handled = false;
} else {
handled = true;
}
break;
default:
handled = false;
break;
}
if (!handled) generated_fallback_fields.push_back(field);
}
return generated_fallback_fields;
}
std::vector<uint8_t> GenerateFieldNames(
const Descriptor* descriptor,
const std::vector<const FieldDescriptor*>& fields) {
static constexpr int kMaxNameLength = 255;
std::vector<uint8_t> out;
// First, we output the size of each string, as an unsigned byte. The first
// string is the message name.
int count = 1;
out.push_back(std::min(static_cast<int>(descriptor->full_name().size()),
kMaxNameLength));
for (const auto* field : fields) {
out.push_back(field->name().size());
++count;
}
while (count & 7) { // align to an 8-byte boundary
out.push_back(0);
++count;
}
// The message name is stored at the beginning of the string
std::string message_name = descriptor->full_name();
if (message_name.size() > kMaxNameLength) {
static constexpr int kNameHalfLength = (kMaxNameLength - 3) / 2;
message_name = StrCat(
message_name.substr(0, kNameHalfLength), "...",
message_name.substr(message_name.size() - kNameHalfLength));
}
out.insert(out.end(), message_name.begin(), message_name.end());
// Then we output the actual field names
for (const auto* field : fields) {
out.insert(out.end(), field->name().begin(), field->name().end());
}
return out;
}
TailCallTableInfo::NumToEntryTable MakeNumToEntryTable(
const std::vector<const FieldDescriptor*>& field_descriptors) {
TailCallTableInfo::NumToEntryTable num_to_entry_table;
num_to_entry_table.skipmap32 = static_cast<uint32_t>(-1);
// skip_entry_block is the current block of SkipEntries that we're
// appending to. cur_block_first_fnum is the number of the first
// field represented by the block.
uint16_t field_entry_index = 0;
uint16_t N = field_descriptors.size();
// First, handle field numbers 1-32, which affect only the initial
// skipmap32 and don't generate additional skip-entry blocks.
for (; field_entry_index != N; ++field_entry_index) {
auto* field_descriptor = field_descriptors[field_entry_index];
if (field_descriptor->number() > 32) break;
auto skipmap32_index = field_descriptor->number() - 1;
num_to_entry_table.skipmap32 -= 1 << skipmap32_index;
}
// If all the field numbers were less than or equal to 32, we will have
// no further entries to process, and we are already done.
if (field_entry_index == N) return num_to_entry_table;
TailCallTableInfo::SkipEntryBlock* block = nullptr;
bool start_new_block = true;
// To determine sparseness, track the field number corresponding to
// the start of the most recent skip entry.
uint32_t last_skip_entry_start = 0;
for (; field_entry_index != N; ++field_entry_index) {
auto* field_descriptor = field_descriptors[field_entry_index];
uint32_t fnum = static_cast<uint32_t>(field_descriptor->number());
GOOGLE_CHECK_GT(fnum, last_skip_entry_start);
if (start_new_block == false) {
// If the next field number is within 15 of the last_skip_entry_start, we
// continue writing just to that entry. If it's between 16 and 31 more,
// then we just extend the current block by one. If it's more than 31
// more, we have to add empty skip entries in order to continue using the
// existing block. Obviously it's just 32 more, it doesn't make sense to
// start a whole new block, since new blocks mean having to write out
// their starting field number, which is 32 bits, as well as the size of
// the additional block, which is 16... while an empty SkipEntry16 only
// costs 32 bits. So if it was 48 more, it's a slight space win; we save
// 16 bits, but probably at the cost of slower run time. We're choosing
// 96 for now.
if (fnum - last_skip_entry_start > 96) start_new_block = true;
}
if (start_new_block) {
num_to_entry_table.blocks.push_back({fnum});
block = &num_to_entry_table.blocks.back();
start_new_block = false;
}
auto skip_entry_num = (fnum - block->first_fnum) / 16;
auto skip_entry_index = (fnum - block->first_fnum) % 16;
while (skip_entry_num >= block->entries.size())
block->entries.push_back({0xFFFF, field_entry_index});
block->entries[skip_entry_num].skipmap -= 1 << (skip_entry_index);
last_skip_entry_start = fnum - skip_entry_index;
}
return num_to_entry_table;
}
uint16_t MakeTypeCardForField(
const FieldDescriptor* field,
const TailCallTableInfo::PerFieldOptions& options) {
uint16_t type_card;
namespace fl = internal::field_layout;
if (internal::cpp::HasHasbit(field)) {
type_card = fl::kFcOptional;
} else if (field->is_repeated()) {
type_card = fl::kFcRepeated;
} else if (field->real_containing_oneof()) {
type_card = fl::kFcOneof;
} else {
type_card = fl::kFcSingular;
}
// The rest of the type uses convenience aliases:
switch (field->type()) {
case FieldDescriptor::TYPE_DOUBLE:
type_card |= field->is_repeated() && field->is_packed()
? fl::kPackedDouble
: fl::kDouble;
break;
case FieldDescriptor::TYPE_FLOAT:
type_card |= field->is_repeated() && field->is_packed() ? fl::kPackedFloat
: fl::kFloat;
break;
case FieldDescriptor::TYPE_FIXED32:
type_card |= field->is_repeated() && field->is_packed()
? fl::kPackedFixed32
: fl::kFixed32;
break;
case FieldDescriptor::TYPE_SFIXED32:
type_card |= field->is_repeated() && field->is_packed()
? fl::kPackedSFixed32
: fl::kSFixed32;
break;
case FieldDescriptor::TYPE_FIXED64:
type_card |= field->is_repeated() && field->is_packed()
? fl::kPackedFixed64
: fl::kFixed64;
break;
case FieldDescriptor::TYPE_SFIXED64:
type_card |= field->is_repeated() && field->is_packed()
? fl::kPackedSFixed64
: fl::kSFixed64;
break;
case FieldDescriptor::TYPE_BOOL:
type_card |= field->is_repeated() && field->is_packed() ? fl::kPackedBool
: fl::kBool;
break;
case FieldDescriptor::TYPE_ENUM:
if (internal::cpp::HasPreservingUnknownEnumSemantics(field)) {
// No validation is required.
type_card |= field->is_repeated() && field->is_packed()
? fl::kPackedOpenEnum
: fl::kOpenEnum;
} else {
int16_t start;
uint16_t size;
if (GetEnumValidationRange(field->enum_type(), start, size)) {
// Validation is done by range check (start/length in FieldAux).
type_card |= field->is_repeated() && field->is_packed()
? fl::kPackedEnumRange
: fl::kEnumRange;
} else {
// Validation uses the generated _IsValid function.
type_card |= field->is_repeated() && field->is_packed()
? fl::kPackedEnum
: fl::kEnum;
}
}
break;
case FieldDescriptor::TYPE_UINT32:
type_card |= field->is_repeated() && field->is_packed()
? fl::kPackedUInt32
: fl::kUInt32;
break;
case FieldDescriptor::TYPE_SINT32:
type_card |= field->is_repeated() && field->is_packed()
? fl::kPackedSInt32
: fl::kSInt32;
break;
case FieldDescriptor::TYPE_INT32:
type_card |= field->is_repeated() && field->is_packed() ? fl::kPackedInt32
: fl::kInt32;
break;
case FieldDescriptor::TYPE_UINT64:
type_card |= field->is_repeated() && field->is_packed()
? fl::kPackedUInt64
: fl::kUInt64;
break;
case FieldDescriptor::TYPE_SINT64:
type_card |= field->is_repeated() && field->is_packed()
? fl::kPackedSInt64
: fl::kSInt64;
break;
case FieldDescriptor::TYPE_INT64:
type_card |= field->is_repeated() && field->is_packed() ? fl::kPackedInt64
: fl::kInt64;
break;
case FieldDescriptor::TYPE_BYTES:
type_card |= fl::kBytes;
break;
case FieldDescriptor::TYPE_STRING: {
switch (internal::cpp::GetUtf8CheckMode(field, options.is_lite)) {
case internal::cpp::Utf8CheckMode::kStrict:
type_card |= fl::kUtf8String;
break;
case internal::cpp::Utf8CheckMode::kVerify:
type_card |= fl::kRawString;
break;
case internal::cpp::Utf8CheckMode::kNone:
type_card |= fl::kBytes;
break;
}
break;
}
case FieldDescriptor::TYPE_GROUP:
type_card |= 0 | fl::kMessage | fl::kRepGroup;
if (options.use_direct_tcparser_table) {
type_card |= fl::kTvTable;
} else {
type_card |= fl::kTvDefault;
}
break;
case FieldDescriptor::TYPE_MESSAGE:
if (field->is_map()) {
type_card |= fl::kMap;
} else {
type_card |= fl::kMessage;
if (options.is_lazy) {
type_card |= fl::kRepLazy;
} else if (options.is_implicitly_weak) {
type_card |= fl::kRepIWeak;
}
if (options.use_direct_tcparser_table) {
type_card |= fl::kTvTable;
} else {
type_card |= fl::kTvDefault;
}
}
break;
}
// Fill in extra information about string and bytes field representations.
if (field->type() == FieldDescriptor::TYPE_BYTES ||
field->type() == FieldDescriptor::TYPE_STRING) {
if (field->is_repeated()) {
type_card |= fl::kRepSString;
} else {
type_card |= fl::kRepAString;
}
}
if (options.should_split) {
type_card |= fl::kSplitTrue;
}
return type_card;
}
} // namespace
TailCallTableInfo::TailCallTableInfo(
const Descriptor* descriptor,
const std::vector<const FieldDescriptor*>& ordered_fields,
const OptionProvider& option_provider,
const std::vector<int>& has_bit_indices,
const std::vector<int>& inlined_string_indices) {
// If this message has any inlined string fields, store the donation state
// offset in the second auxiliary entry.
if (!inlined_string_indices.empty()) {
aux_entries.resize(1); // pad if necessary
aux_entries[0] = {kInlinedStringDonatedOffset};
}
// If this message is split, store the split pointer offset in the third
// auxiliary entry.
for (auto* field : ordered_fields) {
if (option_provider.GetForField(field).should_split) {
aux_entries.resize(3); // pad if necessary
aux_entries[1] = {kSplitOffset};
aux_entries[2] = {kSplitSizeof};
break;
}
}
// Fill in mini table entries.
for (const FieldDescriptor* field : ordered_fields) {
auto options = option_provider.GetForField(field);
field_entries.push_back(
{field, internal::cpp ::HasHasbit(field)
? has_bit_indices[static_cast<size_t>(field->index())]
: -1});
auto& entry = field_entries.back();
entry.type_card = MakeTypeCardForField(field, options);
if (field->type() == FieldDescriptor::TYPE_MESSAGE ||
field->type() == FieldDescriptor::TYPE_GROUP) {
// Message-typed fields have a FieldAux with the default instance pointer.
if (field->is_map()) {
// TODO(b/205904770): generate aux entries for maps
} else if (field->options().weak()) {
// Don't generate anything for weak fields. They are handled by the
// generated fallback.
} else if (options.is_implicitly_weak) {
// Implicit weak fields don't need to store a default instance pointer.
} else if (options.is_lazy) {
// Lazy fields are handled by the generated fallback function.
} else {
field_entries.back().aux_idx = aux_entries.size();
aux_entries.push_back(
{options.use_direct_tcparser_table ? kSubTable : kSubMessage,
{field}});
}
} else if (field->type() == FieldDescriptor::TYPE_ENUM &&
!cpp::HasPreservingUnknownEnumSemantics(field)) {
// Enum fields which preserve unknown values (proto3 behavior) are
// effectively int32 fields with respect to parsing -- i.e., the value
// does not need to be validated at parse time.
//
// Enum fields which do not preserve unknown values (proto2 behavior) use
// a FieldAux to store validation information. If the enum values are
// sequential (and within a range we can represent), then the FieldAux
// entry represents the range using the minimum value (which must fit in
// an int16_t) and count (a uint16_t). Otherwise, the entry holds a
// pointer to the generated Name_IsValid function.
entry.aux_idx = aux_entries.size();
aux_entries.push_back({});
auto& aux_entry = aux_entries.back();
if (GetEnumValidationRange(field->enum_type(), aux_entry.enum_range.start,
aux_entry.enum_range.size)) {
aux_entry.type = kEnumRange;
} else {
aux_entry.type = kEnumValidator;
aux_entry.field = field;
}
} else if ((field->type() == FieldDescriptor::TYPE_STRING ||
field->type() == FieldDescriptor::TYPE_BYTES) &&
options.is_string_inlined) {
GOOGLE_CHECK(!field->is_repeated());
// Inlined strings have an extra marker to represent their donation state.
int idx = inlined_string_indices[static_cast<size_t>(field->index())];
// For mini parsing, the donation state index is stored as an `offset`
// auxiliary entry.
entry.aux_idx = aux_entries.size();
aux_entries.push_back({kNumericOffset});
aux_entries.back().offset = idx;
// For fast table parsing, the donation state index is stored instead of
// the aux_idx (this will limit the range to 8 bits).
entry.inlined_string_idx = idx;
}
}
table_size_log2 = 0; // fallback value
int num_fast_fields = -1;
for (int try_size_log2 : {0, 1, 2, 3, 4, 5}) {
size_t try_size = 1 << try_size_log2;
auto split_fields =
SplitFastFieldsForSize(field_entries, try_size_log2, option_provider);
GOOGLE_CHECK_EQ(split_fields.size(), try_size);
int try_num_fast_fields = 0;
for (const auto& info : split_fields) {
if (info.field != nullptr) ++try_num_fast_fields;
}
// Use this size if (and only if) it covers more fields.
if (try_num_fast_fields > num_fast_fields) {
fast_path_fields = std::move(split_fields);
table_size_log2 = try_size_log2;
num_fast_fields = try_num_fast_fields;
}
// The largest table we allow has the same number of entries as the
// message has fields, rounded up to the next power of 2 (e.g., a message
// with 5 fields can have a fast table of size 8). A larger table *might*
// cover more fields in certain cases, but a larger table in that case
// would have mostly empty entries; so, we cap the size to avoid
// pathologically sparse tables.
if (try_size > ordered_fields.size()) {
break;
}
}
// Filter out fields that are handled by MiniParse. We don't need to generate
// a fallback for these, which saves code size.
fallback_fields = FilterMiniParsedFields(ordered_fields, option_provider
);
num_to_entry_table = MakeNumToEntryTable(ordered_fields);
field_name_data = GenerateFieldNames(descriptor, ordered_fields);
// If there are no fallback fields, and at most one extension range, the
// parser can use a generic fallback function. Otherwise, a message-specific
// fallback routine is needed.
use_generated_fallback =
!fallback_fields.empty() || descriptor->extension_range_count() > 1;
}
} // namespace internal
} // namespace protobuf
} // namespace google
#include <google/protobuf/port_undef.inc>

@ -0,0 +1,162 @@
// Protocol Buffers - Google's data interchange format
// Copyright 2008 Google Inc. All rights reserved.
// https://developers.google.com/protocol-buffers/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// This file contains routines to generate tail-call table parsing tables.
// Everything in this file is for internal use only.
#ifndef GOOGLE_PROTOBUF_GENERATED_MESSAGE_TCTABLE_GEN_H__
#define GOOGLE_PROTOBUF_GENERATED_MESSAGE_TCTABLE_GEN_H__
#include <cstdint>
#include <functional>
#include <string>
#include <vector>
#include <google/protobuf/descriptor.h>
#include <google/protobuf/descriptor.pb.h>
#include <google/protobuf/generated_message_tctable_decl.h>
// Must come last:
#include <google/protobuf/port_def.inc>
namespace google {
namespace protobuf {
namespace internal {
// Helper class for generating tailcall parsing functions.
struct PROTOBUF_EXPORT TailCallTableInfo {
struct PerFieldOptions {
bool is_lazy;
bool is_string_inlined;
bool is_implicitly_weak;
bool use_direct_tcparser_table;
bool is_lite;
bool should_split;
};
class OptionProvider {
public:
virtual PerFieldOptions GetForField(const FieldDescriptor*) const = 0;
protected:
~OptionProvider() = default;
};
TailCallTableInfo(const Descriptor* descriptor,
const std::vector<const FieldDescriptor*>& ordered_fields,
const OptionProvider& option_provider,
const std::vector<int>& has_bit_indices,
const std::vector<int>& inlined_string_indices);
// Fields parsed by the table fast-path.
struct FastFieldInfo {
std::string func_name;
const FieldDescriptor* field;
uint16_t coded_tag;
uint8_t hasbit_idx;
uint8_t aux_idx;
};
std::vector<FastFieldInfo> fast_path_fields;
// Fields parsed by mini parsing routines.
struct FieldEntryInfo {
const FieldDescriptor* field;
int hasbit_idx;
int inlined_string_idx;
uint16_t aux_idx;
uint16_t type_card;
};
std::vector<FieldEntryInfo> field_entries;
enum AuxType {
kNothing = 0,
kInlinedStringDonatedOffset,
kSplitOffset,
kSplitSizeof,
kSubMessage,
kSubTable,
kEnumRange,
kEnumValidator,
kNumericOffset,
};
struct AuxEntry {
AuxType type;
struct EnumRange {
int16_t start;
uint16_t size;
};
union {
const FieldDescriptor* field;
uint32_t offset;
EnumRange enum_range;
};
};
std::vector<AuxEntry> aux_entries;
// Fields parsed by generated fallback function.
std::vector<const FieldDescriptor*> fallback_fields;
struct SkipEntry16 {
uint16_t skipmap;
uint16_t field_entry_offset;
};
struct SkipEntryBlock {
uint32_t first_fnum;
std::vector<SkipEntry16> entries;
};
struct NumToEntryTable {
uint32_t skipmap32; // for fields #1 - #32
std::vector<SkipEntryBlock> blocks;
// Compute the number of uint16_t required to represent this table.
int size16() const {
int size = 2; // for the termination field#
for (const auto& block : blocks) {
// 2 for the field#, 1 for a count of skip entries, 2 for each entry.
size += static_cast<int>(3 + block.entries.size() * 2);
}
return size;
}
};
NumToEntryTable num_to_entry_table;
std::vector<uint8_t> field_name_data;
// Table size.
int table_size_log2;
// True if a generated fallback function is required instead of generic.
bool use_generated_fallback;
};
} // namespace internal
} // namespace protobuf
} // namespace google
#include <google/protobuf/port_undef.inc>
#endif // GOOGLE_PROTOBUF_GENERATED_MESSAGE_TCTABLE_GEN_H__

@ -279,6 +279,8 @@ class PROTOBUF_EXPORT TcParser final {
static const char* GenericFallback(PROTOBUF_TC_PARAM_DECL);
static const char* GenericFallbackLite(PROTOBUF_TC_PARAM_DECL);
static const char* ReflectionFallback(PROTOBUF_TC_PARAM_DECL);
static const char* ReflectionParseLoop(PROTOBUF_TC_PARAM_DECL);
static const char* ParseLoop(MessageLite* msg, const char* ptr,
ParseContext* ctx,

@ -45,6 +45,7 @@
#include <google/protobuf/arenastring.h>
#include <gtest/gtest.h>
#include <google/protobuf/stubs/strutil.h>
#include <google/protobuf/message.h>
namespace google {

@ -47,6 +47,7 @@
#include <google/protobuf/descriptor.h>
#include <google/protobuf/descriptor.pb.h>
#include <google/protobuf/generated_message_reflection.h>
#include <google/protobuf/generated_message_tctable_impl.h>
#include <google/protobuf/generated_message_util.h>
#include <google/protobuf/map_field.h>
#include <google/protobuf/map_field_inl.h>
@ -167,7 +168,15 @@ void Message::DiscardUnknownFields() {
const char* Message::_InternalParse(const char* ptr,
internal::ParseContext* ctx) {
#if defined(PROTOBUF_USE_TABLE_PARSER_ON_REFLECTION)
auto meta = GetMetadata();
ptr = internal::TcParser::ParseLoop(this, ptr, ctx,
meta.reflection->GetTcParseTable());
return ptr;
#else
return WireFormat::_InternalParse(this, ptr, ctx);
#endif
}
uint8_t* Message::_InternalSerialize(uint8_t* target,

@ -122,6 +122,7 @@
#include <google/protobuf/port.h>
#include <google/protobuf/descriptor.h>
#include <google/protobuf/generated_message_reflection.h>
#include <google/protobuf/generated_message_tctable_decl.h>
#include <google/protobuf/generated_message_util.h>
#include <google/protobuf/map.h> // TODO(b/211442718): cleanup
#include <google/protobuf/message_lite.h>
@ -157,6 +158,7 @@ struct DescriptorTable;
class MapFieldBase;
class SwapFieldHelper;
class CachedSize;
struct TailCallTableInfo;
} // namespace internal
class UnknownFieldSet; // unknown_field_set.h
namespace io {
@ -468,6 +470,8 @@ class MutableRepeatedFieldRef;
// memory leaks. So, instead we ended up with this flat interface.
class PROTOBUF_EXPORT Reflection final {
public:
~Reflection();
// Get the UnknownFieldSet for the message. This contains fields which
// were seen when the Message was parsed but were not recognized according
// to the Message's definition.
@ -1061,10 +1065,34 @@ class PROTOBUF_EXPORT Reflection final {
// contain weak fields, then this field equals descriptor_->field_count().
int last_non_weak_field_index_;
// The table-driven parser table.
// This table is generated on demand for Message types that did not override
// _InternalParse. It uses the reflection information to do so.
mutable internal::once_flag tcparse_table_once_;
using TcParseTableBase = internal::TcParseTableBase;
mutable const TcParseTableBase* tcparse_table_ = nullptr;
const TcParseTableBase* GetTcParseTable() const {
internal::call_once(tcparse_table_once_,
[&] { tcparse_table_ = CreateTcParseTable(); });
return tcparse_table_;
}
const TcParseTableBase* CreateTcParseTable() const;
const TcParseTableBase* CreateTcParseTableForMessageSet() const;
void PopulateTcParseFastEntries(
const internal::TailCallTableInfo& table_info,
TcParseTableBase::FastFieldEntry* fast_entries) const;
void PopulateTcParseEntries(internal::TailCallTableInfo& table_info,
TcParseTableBase::FieldEntry* entries) const;
void PopulateTcParseFieldAux(const internal::TailCallTableInfo& table_info,
TcParseTableBase::FieldAux* field_aux) const;
template <typename T, typename Enable>
friend class RepeatedFieldRef;
template <typename T, typename Enable>
friend class MutableRepeatedFieldRef;
friend class Message;
friend class ::PROTOBUF_NAMESPACE_ID::MessageLayoutInspector;
friend class ::PROTOBUF_NAMESPACE_ID::AssignDescriptorsHelper;
friend class DynamicMessageFactory;

@ -47,6 +47,8 @@ inline void SizedDelete(void* p, size_t size) {
#if defined(__cpp_sized_deallocation)
::operator delete(p, size);
#else
// Avoid -Wunused-parameter
(void)size;
::operator delete(p);
#endif
}
@ -54,6 +56,8 @@ inline void SizedArrayDelete(void* p, size_t size) {
#if defined(__cpp_sized_deallocation)
::operator delete[](p, size);
#else
// Avoid -Wunused-parameter
(void)size;
::operator delete[](p);
#endif
}

@ -832,6 +832,15 @@
#define PROTOBUF_TAIL_CALL_TABLE_PARSER_ENABLED 1
#endif
#ifdef PROTOBUF_USE_TABLE_PARSER_ON_REFLECTION
#error PROTOBUF_USE_TABLE_PARSER_ON_REFLECTION was previously defined
#endif
#if !defined(PROTOBUF_TEMPORARY_DISABLE_TABLE_PARSER_ON_REFLECTION) && \
(defined(PROTOBUF_EXPERIMENTAL_USE_TABLE_PARSER_ON_REFLECTION) || \
defined(PROTOBUF_TAIL_CALL_TABLE_PARSER_ENABLED))
#define PROTOBUF_USE_TABLE_PARSER_ON_REFLECTION 1
#endif // PROTOBUF_ENABLE_FORCE_ALLOCATION_ON_CONSTRUCTION
// Note that this is performance sensitive: changing the parameters will change
// the registers used by the ABI calling convention, which subsequently affects
// register selection logic inside the function.

@ -103,6 +103,7 @@
#undef PROTOBUF_MSAN
#undef PROTOBUF_TSAN
#undef PROTOBUF_TAIL_CALL_TABLE_PARSER_ENABLED
#undef PROTOBUF_USE_TABLE_PARSER_ON_REFLECTION
#undef PROTOBUF_TC_PARAM_DECL
#undef PROTOBUF_EXCLUSIVE_LOCKS_REQUIRED
#undef PROTOBUF_LOCKS_EXCLUDED

@ -34,7 +34,6 @@
#include <google/protobuf/unittest_proto3_arena.pb.h>
#include <google/protobuf/arena.h>
#include <google/protobuf/testing/googletest.h>
#include <gtest/gtest.h>
using proto3_arena_unittest::TestAllTypes;

@ -37,7 +37,6 @@
#include <google/protobuf/unittest_proto3_optional.pb.h>
#include <google/protobuf/arena.h>
#include <google/protobuf/text_format.h>
#include <google/protobuf/testing/googletest.h>
#include <gtest/gtest.h>
#include <google/protobuf/stubs/strutil.h>
#include <google/protobuf/test_util.h>

@ -33,7 +33,6 @@
#include <vector>
#include <google/protobuf/arena.h>
#include <google/protobuf/testing/googletest.h>
#include <gtest/gtest.h>
using UNITTEST::TestAllTypes;

@ -38,7 +38,6 @@
#include <google/protobuf/stubs/common.h>
#include <google/protobuf/unittest.pb.h>
#include <google/protobuf/descriptor.h>
#include <google/protobuf/testing/googletest.h>
#include <gtest/gtest.h>
#include <google/protobuf/test_util.h>

@ -113,10 +113,21 @@ void memswap(char* a, char* b) {
b += kBlockSize;
}
#if defined(__GNUC__) && !defined(__clang__)
// Workaround GCC bug: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99578
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wpragmas"
#pragma GCC diagnostic ignored "-Wstringop-overflow"
#endif // __GNUC__
// Swap the leftover bytes, could be zero.
memcpy(&buf, a, kSize % kBlockSize);
memcpy(a, b, kSize % kBlockSize);
memcpy(b, &buf, kSize % kBlockSize);
#if defined(__GNUC__) && !defined(__clang__)
#pragma GCC diagnostic pop
#endif // GCC
}
template <typename Element>

@ -52,7 +52,6 @@
#include <google/protobuf/unittest.pb.h>
#include <google/protobuf/stubs/strutil.h>
#include <gmock/gmock.h>
#include <google/protobuf/testing/googletest.h>
#include <gtest/gtest.h>
#include <google/protobuf/stubs/stl_util.h>

@ -41,6 +41,8 @@
//
// This header covers RepeatedPtrField.
// IWYU pragma: private, include "net/proto2/public/repeated_field.h"
#ifndef GOOGLE_PROTOBUF_REPEATED_PTR_FIELD_H__
#define GOOGLE_PROTOBUF_REPEATED_PTR_FIELD_H__

@ -42,6 +42,7 @@
#include <climits>
#include <cmath>
#include <limits>
#include <string>
#include <utility>
#include <vector>

@ -285,6 +285,7 @@ class PROTOBUF_EXPORT WireFormat {
private:
struct MessageSetParser;
friend class TcParser;
// Skip a MessageSet field.
static bool SkipMessageSetField(io::CodedInputStream* input,
uint32_t field_number,

Loading…
Cancel
Save