Merge tag 'refs/tags/sync-piper' into sync-stage

pull/10325/head
Sandy Zhang 3 years ago
commit c49b2245ae
  1. 5
      java/kotlin/src/test/kotlin/com/google/protobuf/Proto3Test.kt
  2. 1
      src/Makefile.am
  3. 1
      src/file_lists.cmake
  4. 2
      src/google/protobuf/arena_unittest.cc
  5. 5
      src/google/protobuf/arenaz_sampler.cc
  6. 4
      src/google/protobuf/arenaz_sampler.h
  7. 25
      src/google/protobuf/arenaz_sampler_test.cc
  8. 11
      src/google/protobuf/compiler/command_line_interface.cc
  9. 1
      src/google/protobuf/compiler/cpp/field.cc
  10. 1
      src/google/protobuf/compiler/cpp/field.h
  11. 8
      src/google/protobuf/compiler/cpp/file.cc
  12. 4
      src/google/protobuf/compiler/cpp/helpers.cc
  13. 82
      src/google/protobuf/compiler/cpp/message.cc
  14. 46
      src/google/protobuf/compiler/cpp/parse_function_generator.cc
  15. 12
      src/google/protobuf/compiler/java/enum_field.cc
  16. 12
      src/google/protobuf/compiler/java/enum_field_lite.cc
  17. 5
      src/google/protobuf/compiler/java/generator.h
  18. 36
      src/google/protobuf/compiler/java/message_serialization.h
  19. 124
      src/google/protobuf/compiler/java/message_serialization_unittest.cc
  20. 56
      src/google/protobuf/compiler/java/message_serialization_unittest.proto
  21. 9
      src/google/protobuf/compiler/main.cc
  22. 6
      src/google/protobuf/compiler/python/generator.h
  23. 214
      src/google/protobuf/compiler/python/pyi_generator.cc
  24. 32
      src/google/protobuf/compiler/python/pyi_generator.h
  25. 162
      src/google/protobuf/generated_message_tctable_impl.h
  26. 73
      src/google/protobuf/generated_message_tctable_lite.cc
  27. 16
      src/google/protobuf/io/printer.h
  28. 6
      src/google/protobuf/map.h
  29. 2
      src/google/protobuf/map_test.cc
  30. 39
      src/google/protobuf/map_test.inc
  31. 2
      src/google/protobuf/message.cc
  32. 3
      src/google/protobuf/message.h
  33. 90
      src/google/protobuf/message_unittest.inc
  34. 1
      src/google/protobuf/parse_context.h
  35. 49
      src/google/protobuf/port_def.inc
  36. 3
      src/google/protobuf/port_undef.inc
  37. 12
      src/google/protobuf/repeated_field.h
  38. 17
      src/google/protobuf/unittest.proto
  39. 1
      src/google/protobuf/util/json_util.h
  40. 106
      src/google/protobuf/util/json_util_test.cc
  41. 6
      src/google/protobuf/util/message_differencer.h
  42. 1
      src/google/protobuf/wire_format_lite.h
  43. 2
      third_party/benchmark

@ -66,6 +66,11 @@ class Proto3Test {
assertThat(optionalNestedMessage).isEqualTo(TestAllTypesKt.nestedMessage { bb = 118 })
optionalNestedEnum = NestedEnum.BAZ
assertThat(optionalNestedEnum).isEqualTo(NestedEnum.BAZ)
assertThat(optionalNestedEnumValue).isEqualTo(3)
optionalNestedEnumValue = 1
assertThat(optionalNestedEnumValue).isEqualTo(1)
assertThat(optionalNestedEnum).isEqualTo(NestedEnum.FOO)
oneofUint32 = 601
assertThat(oneofUint32).isEqualTo(601)
}

@ -774,6 +774,7 @@ protobuf_test_SOURCES = \
google/protobuf/compiler/csharp/csharp_generator_unittest.cc \
google/protobuf/compiler/importer_unittest.cc \
google/protobuf/compiler/java/doc_comment_unittest.cc \
google/protobuf/compiler/java/message_serialization_unittest.cc \
google/protobuf/compiler/java/plugin_unittest.cc \
google/protobuf/compiler/mock_code_generator.cc \
google/protobuf/compiler/mock_code_generator.h \

@ -771,6 +771,7 @@ set(compiler_test_files
${protobuf_SOURCE_DIR}/src/google/protobuf/compiler/csharp/csharp_generator_unittest.cc
${protobuf_SOURCE_DIR}/src/google/protobuf/compiler/importer_unittest.cc
${protobuf_SOURCE_DIR}/src/google/protobuf/compiler/java/doc_comment_unittest.cc
${protobuf_SOURCE_DIR}/src/google/protobuf/compiler/java/message_serialization_unittest.cc
${protobuf_SOURCE_DIR}/src/google/protobuf/compiler/java/plugin_unittest.cc
${protobuf_SOURCE_DIR}/src/google/protobuf/compiler/objectivec/objectivec_helpers_unittest.cc
${protobuf_SOURCE_DIR}/src/google/protobuf/compiler/parser_unittest.cc

@ -1419,7 +1419,7 @@ TEST(ArenaTest, BlockSizeDoubling) {
ASSERT_GT(arena.SpaceAllocated(), first_block_size);
auto second_block_size = (arena.SpaceAllocated() - first_block_size);
EXPECT_EQ(second_block_size, 2*first_block_size);
EXPECT_GE(second_block_size, 2*first_block_size);
}
TEST(ArenaTest, Alignment) {

@ -72,7 +72,7 @@ void ThreadSafeArenaStats::PrepareForSampling(int64_t stride) {
bytes_used.store(0, std::memory_order_relaxed);
bytes_allocated.store(0, std::memory_order_relaxed);
bytes_wasted.store(0, std::memory_order_relaxed);
max_bytes_allocated.store(0, std::memory_order_relaxed);
max_block_size.store(0, std::memory_order_relaxed);
thread_ids.store(0, std::memory_order_relaxed);
weight = stride;
// The inliner makes hardcoded skip_count difficult (especially when combined
@ -87,6 +87,9 @@ void RecordAllocateSlow(ThreadSafeArenaStats* info, size_t used,
info->bytes_used.fetch_add(used, std::memory_order_relaxed);
info->bytes_allocated.fetch_add(allocated, std::memory_order_relaxed);
info->bytes_wasted.fetch_add(wasted, std::memory_order_relaxed);
if (info->max_block_size.load(std::memory_order_relaxed) < allocated) {
info->max_block_size.store(allocated, std::memory_order_relaxed);
}
const uint64_t tid = 1ULL << (GetCachedTID() % 63);
info->thread_ids.fetch_or(tid, std::memory_order_relaxed);
}

@ -69,8 +69,8 @@ struct ThreadSafeArenaStats
std::atomic<size_t> bytes_used;
std::atomic<size_t> bytes_allocated;
std::atomic<size_t> bytes_wasted;
// Records the largest size an arena ever had.
std::atomic<size_t> max_bytes_allocated;
// Records the largest block allocated for the arena.
std::atomic<size_t> max_block_size;
// Bit `i` is set to 1 indicates that a thread with `tid % 63 = i` accessed
// the underlying arena. We use `% 63` as a rudimentary hash to ensure some
// bit mixing for thread-ids; `% 64` would only grab the low bits and might

@ -89,21 +89,21 @@ TEST(ThreadSafeArenaStatsTest, PrepareForSampling) {
EXPECT_EQ(info.bytes_used.load(), 0);
EXPECT_EQ(info.bytes_allocated.load(), 0);
EXPECT_EQ(info.bytes_wasted.load(), 0);
EXPECT_EQ(info.max_bytes_allocated.load(), 0);
EXPECT_EQ(info.max_block_size.load(), 0);
EXPECT_EQ(info.weight, kTestStride);
info.num_allocations.store(1, std::memory_order_relaxed);
info.bytes_used.store(1, std::memory_order_relaxed);
info.bytes_allocated.store(1, std::memory_order_relaxed);
info.bytes_wasted.store(1, std::memory_order_relaxed);
info.max_bytes_allocated.store(1, std::memory_order_relaxed);
info.max_block_size.store(1, std::memory_order_relaxed);
info.PrepareForSampling(2 * kTestStride);
EXPECT_EQ(info.num_allocations.load(), 0);
EXPECT_EQ(info.bytes_used.load(), 0);
EXPECT_EQ(info.bytes_allocated.load(), 0);
EXPECT_EQ(info.bytes_wasted.load(), 0);
EXPECT_EQ(info.max_bytes_allocated.load(), 0);
EXPECT_EQ(info.max_block_size.load(), 0);
EXPECT_EQ(info.weight, 2 * kTestStride);
}
@ -117,14 +117,29 @@ TEST(ThreadSafeArenaStatsTest, RecordAllocateSlow) {
EXPECT_EQ(info.bytes_used.load(), 100);
EXPECT_EQ(info.bytes_allocated.load(), 128);
EXPECT_EQ(info.bytes_wasted.load(), 0);
EXPECT_EQ(info.max_bytes_allocated.load(), 0);
EXPECT_EQ(info.max_block_size.load(), 128);
RecordAllocateSlow(&info, /*requested=*/100, /*allocated=*/256,
/*wasted=*/28);
EXPECT_EQ(info.num_allocations.load(), 2);
EXPECT_EQ(info.bytes_used.load(), 200);
EXPECT_EQ(info.bytes_allocated.load(), 384);
EXPECT_EQ(info.bytes_wasted.load(), 28);
EXPECT_EQ(info.max_bytes_allocated.load(), 0);
EXPECT_EQ(info.max_block_size.load(), 256);
}
TEST(ThreadSafeArenaStatsTest, RecordAllocateSlowMaxBlockSizeTest) {
ThreadSafeArenaStats info;
constexpr int64_t kTestStride = 458;
MutexLock l(&info.init_mu);
info.PrepareForSampling(kTestStride);
RecordAllocateSlow(&info, /*requested=*/100, /*allocated=*/128, /*wasted=*/0);
EXPECT_EQ(info.max_block_size.load(), 128);
RecordAllocateSlow(&info, /*requested=*/100, /*allocated=*/256,
/*wasted=*/28);
EXPECT_EQ(info.max_block_size.load(), 256);
RecordAllocateSlow(&info, /*requested=*/100, /*allocated=*/128,
/*wasted=*/28);
EXPECT_EQ(info.max_block_size.load(), 256);
}
TEST(ThreadSafeArenazSamplerTest, SamplingCorrectness) {

@ -339,9 +339,12 @@ class CommandLineInterface::ErrorPrinter
void AddErrorOrWarning(const std::string& filename, int line, int column,
const std::string& message, const std::string& type,
std::ostream& out) {
// Print full path when running under MSVS
std::string dfile;
if (format_ == CommandLineInterface::ERROR_FORMAT_MSVS &&
if (
#ifndef PROTOBUF_OPENSOURCE
// Print full path when running under MSVS
format_ == CommandLineInterface::ERROR_FORMAT_MSVS &&
#endif // !PROTOBUF_OPENSOURCE
tree_ != nullptr && tree_->VirtualFileToDiskFile(filename, &dfile)) {
out << dfile;
} else {
@ -398,7 +401,6 @@ class CommandLineInterface::GeneratorContextImpl : public GeneratorContext {
// Get name of all output files.
void GetOutputFilenames(std::vector<std::string>* output_filenames);
// implements GeneratorContext --------------------------------------
io::ZeroCopyOutputStream* Open(const std::string& filename) override;
io::ZeroCopyOutputStream* OpenForAppend(const std::string& filename) override;
@ -963,6 +965,7 @@ PopulateSingleSimpleDescriptorDatabase(const std::string& descriptor_set_name);
int CommandLineInterface::Run(int argc, const char* const argv[]) {
Clear();
switch (ParseArguments(argc, argv)) {
case PARSE_ARGUMENT_DONE_AND_EXIT:
return 0;
@ -1076,7 +1079,6 @@ int CommandLineInterface::Run(int argc, const char* const argv[]) {
}
}
// Write all output to disk.
for (const auto& pair : output_directories) {
const std::string& location = pair.first;
GeneratorContextImpl* directory = pair.second.get();
@ -1151,7 +1153,6 @@ int CommandLineInterface::Run(int argc, const char* const argv[]) {
// Do not add a default case.
}
}
return 0;
}

@ -330,7 +330,6 @@ void FieldGenerator::GenerateCopyConstructorCode(io::Printer* printer) const {
}
}
void SetCommonOneofFieldVariables(
const FieldDescriptor* descriptor,
std::map<std::string, std::string>* variables) {

@ -208,7 +208,6 @@ class FieldGenerator {
virtual bool IsInlined() const { return false; }
virtual ArenaDtorNeeds NeedsArenaDestructor() const {
return ArenaDtorNeeds::kNone;
}

@ -495,12 +495,10 @@ void FileGenerator::GenerateSourceDefaultInstance(int idx,
generator->GenerateInitDefaultSplitInstance(printer);
format(
"} {}\n"
" ~$1$() {}\n"
" union {\n"
" $2$ _instance;\n"
" $1$ _instance;\n"
" };\n"
"};\n",
DefaultInstanceType(generator->descriptor_, options_, /*split=*/true),
StrCat(generator->classname_, "::Impl_::Split"));
// NO_DESTROY is not necessary for correctness. The empty destructor is
// enough. However, the empty destructor fails to be elided in some
@ -508,7 +506,7 @@ void FileGenerator::GenerateSourceDefaultInstance(int idx,
// there just to improve performance and binary size in these builds.
format(
"PROTOBUF_ATTRIBUTE_NO_DESTROY PROTOBUF_CONSTINIT "
"PROTOBUF_ATTRIBUTE_INIT_PRIORITY1 $1$ $2$;\n",
"PROTOBUF_ATTRIBUTE_INIT_PRIORITY1 const $1$ $2$;\n",
DefaultInstanceType(generator->descriptor_, options_, /*split=*/true),
DefaultInstanceName(generator->descriptor_, options_, /*split=*/true));
}
@ -999,7 +997,7 @@ class FileGenerator::ForwardDeclarations {
const Descriptor* class_desc = p.second;
format(
"struct $1$;\n"
"$dllexport_decl $extern $1$ $2$;\n",
"$dllexport_decl $extern const $1$ $2$;\n",
DefaultInstanceType(class_desc, options, /*split=*/true),
DefaultInstanceName(class_desc, options, /*split=*/true));
}

@ -176,7 +176,6 @@ static const char* const kKeywordList[] = {
#endif // !PROTOBUF_FUTURE_BREAKING_CHANGES
};
static std::unordered_set<std::string>* MakeKeywordsMap() {
auto* result = new std::unordered_set<std::string>();
for (const auto keyword : kKeywordList) {
@ -525,7 +524,6 @@ std::string FieldName(const FieldDescriptor* field) {
return result;
}
std::string FieldMemberName(const FieldDescriptor* field, bool split) {
StringPiece prefix =
IsMapEntryMessage(field->containing_type()) ? "" : "_impl_.";
@ -876,8 +874,6 @@ std::string SafeFunctionName(const Descriptor* descriptor,
bool IsProfileDriven(const Options& options) {
return options.access_info_map != nullptr;
}
bool IsStringInlined(const FieldDescriptor* descriptor,
const Options& options) {
(void)descriptor;

@ -828,7 +828,6 @@ void MessageGenerator::GenerateFieldAccessorDeclarations(io::Printer* printer) {
// Generate type-specific accessor declarations.
field_generators_.get(field).GenerateAccessorDeclarations(printer);
format("\n");
}
@ -1238,41 +1237,41 @@ void MessageGenerator::GenerateFieldAccessorDefinitions(io::Printer* printer) {
Formatter::SaveState saver(&format);
format.AddMap(vars);
// Generate has_$name$() or $name$_size().
if (field->is_repeated()) {
if (IsFieldStripped(field, options_)) {
format(
"inline int $classname$::$name$_size() const { "
"__builtin_trap(); }\n");
} else {
format(
"inline int $classname$::_internal_$name$_size() const {\n"
" return $field$$1$.size();\n"
"}\n"
"inline int $classname$::$name$_size() const {\n"
"$annotate_size$"
" return _internal_$name$_size();\n"
"}\n",
IsImplicitWeakField(field, options_, scc_analyzer_) &&
field->message_type()
? ".weak"
: "");
}
} else if (field->real_containing_oneof()) {
format.Set("field_name", UnderscoresToCamelCase(field->name(), true));
format.Set("oneof_name", field->containing_oneof()->name());
format.Set("oneof_index",
StrCat(field->containing_oneof()->index()));
GenerateOneofMemberHasBits(field, format);
// Generate has_$name$() or $name$_size().
if (field->is_repeated()) {
if (IsFieldStripped(field, options_)) {
format(
"inline int $classname$::$name$_size() const { "
"__builtin_trap(); }\n");
} else {
// Singular field.
GenerateSingularFieldHasBits(field, format);
format(
"inline int $classname$::_internal_$name$_size() const {\n"
" return $field$$1$.size();\n"
"}\n"
"inline int $classname$::$name$_size() const {\n"
"$annotate_size$"
" return _internal_$name$_size();\n"
"}\n",
IsImplicitWeakField(field, options_, scc_analyzer_) &&
field->message_type()
? ".weak"
: "");
}
} else if (field->real_containing_oneof()) {
format.Set("field_name", UnderscoresToCamelCase(field->name(), true));
format.Set("oneof_name", field->containing_oneof()->name());
format.Set("oneof_index",
StrCat(field->containing_oneof()->index()));
GenerateOneofMemberHasBits(field, format);
} else {
// Singular field.
GenerateSingularFieldHasBits(field, format);
}
if (!IsCrossFileMaybeMap(field)) {
GenerateFieldClear(field, true, format);
}
// Generate type-specific accessors.
if (!IsFieldStripped(field, options_)) {
field_generators_.get(field).GenerateInlineAccessorDefinitions(printer);
@ -1760,7 +1759,7 @@ void MessageGenerator::GenerateClassDefinition(io::Printer* printer) {
format(
"private:\n"
"inline bool IsSplitMessageDefault() const {\n"
" return $split$ == reinterpret_cast<Impl_::Split*>(&$1$);\n"
" return $split$ == reinterpret_cast<const Impl_::Split*>(&$1$);\n"
"}\n"
"PROTOBUF_NOINLINE void PrepareSplitMessageForWrite();\n"
"public:\n",
@ -1928,6 +1927,8 @@ void MessageGenerator::GenerateClassDefinition(io::Printer* printer) {
" typedef void InternalArenaConstructable_;\n"
" typedef void DestructorSkippable_;\n"
"};\n"
"static_assert(std::is_trivially_copy_constructible<Split>::value);\n"
"static_assert(std::is_trivially_destructible<Split>::value);\n"
"Split* _split_;\n");
}
@ -2421,8 +2422,15 @@ void MessageGenerator::GenerateSharedConstructorCode(io::Printer* printer) {
}
if (ShouldSplit(descriptor_, options_)) {
put_sep();
format("decltype($split$){reinterpret_cast<Impl_::Split*>(&$1$)}",
DefaultInstanceName(descriptor_, options_, /*split=*/true));
// We can't assign the default split to this->split without the const_cast
// because the former is a const. The const_cast is safe because we don't
// intend to modify the default split through this pointer, and we also
// expect the default split to be in the rodata section which is protected
// from mutation.
format(
"decltype($split$){const_cast<Impl_::Split*>"
"(reinterpret_cast<const Impl_::Split*>(&$1$))}",
DefaultInstanceName(descriptor_, options_, /*split=*/true));
}
for (auto oneof : OneOfRange(descriptor_)) {
put_sep();
@ -2681,7 +2689,7 @@ void MessageGenerator::GenerateConstexprConstructor(io::Printer* printer) {
}
if (ShouldSplit(descriptor_, options_)) {
put_sep();
format("/*decltype($split$)*/&$1$._instance",
format("/*decltype($split$)*/const_cast<Impl_::Split*>(&$1$._instance)",
DefaultInstanceName(descriptor_, options_, /*split=*/true));
}
@ -2866,8 +2874,10 @@ void MessageGenerator::GenerateStructors(io::Printer* printer) {
}
if (ShouldSplit(descriptor_, options_)) {
put_sep();
format("decltype($split$){reinterpret_cast<Impl_::Split*>(&$1$)}",
DefaultInstanceName(descriptor_, options_, /*split=*/true));
format(
"decltype($split$){const_cast<Impl_::Split*>"
"(reinterpret_cast<const Impl_::Split*>(&$1$))}",
DefaultInstanceName(descriptor_, options_, /*split=*/true));
}
for (auto oneof : OneOfRange(descriptor_)) {
put_sep();

@ -83,7 +83,8 @@ int TagSize(uint32_t field_number) {
return 2;
}
void PopulateFastFieldEntry(const TailCallTableInfo::FieldEntryInfo& entry,
void PopulateFastFieldEntry(const Descriptor* descriptor,
const TailCallTableInfo::FieldEntryInfo& entry,
const Options& options,
TailCallTableInfo::FastFieldInfo& info);
@ -158,6 +159,7 @@ bool IsFieldEligibleForFastParsing(
}
std::vector<TailCallTableInfo::FastFieldInfo> SplitFastFieldsForSize(
const Descriptor* descriptor,
const std::vector<TailCallTableInfo::FieldEntryInfo>& field_entries,
int table_size_log2, const Options& options,
MessageSCCAnalyzer* scc_analyzer) {
@ -200,7 +202,7 @@ std::vector<TailCallTableInfo::FastFieldInfo> SplitFastFieldsForSize(
GOOGLE_CHECK(info.func_name.empty()) << info.func_name;
info.field = field;
info.coded_tag = tag;
PopulateFastFieldEntry(entry, options, info);
PopulateFastFieldEntry(descriptor, entry, options, info);
// If this field does not have presence, then it can set an out-of-bounds
// bit (tailcall parsing uses a uint64_t for hasbits, but only stores 32).
info.hasbit_idx = HasHasbit(field) ? entry.hasbit_idx : 63;
@ -412,8 +414,8 @@ TailCallTableInfo::TailCallTableInfo(
int num_fast_fields = -1;
for (int try_size_log2 : {0, 1, 2, 3, 4, 5}) {
size_t try_size = 1 << try_size_log2;
auto split_fields = SplitFastFieldsForSize(field_entries, try_size_log2,
options, scc_analyzer);
auto split_fields = SplitFastFieldsForSize(
descriptor, field_entries, try_size_log2, options, scc_analyzer);
GOOGLE_CHECK_EQ(split_fields.size(), try_size);
int try_num_fast_fields = 0;
for (const auto& info : split_fields) {
@ -1667,11 +1669,12 @@ void ParseFunctionGenerator::GenerateFieldSwitch(
namespace {
void PopulateFastFieldEntry(const TailCallTableInfo::FieldEntryInfo& entry,
void PopulateFastFieldEntry(const Descriptor* descriptor,
const TailCallTableInfo::FieldEntryInfo& entry,
const Options& options,
TailCallTableInfo::FastFieldInfo& info) {
const FieldDescriptor* field = entry.field;
std::string name = "::_pbi::TcParser::Fast";
std::string name;
uint8_t aux_idx = static_cast<uint8_t>(entry.aux_idx);
switch (field->type()) {
@ -1784,7 +1787,36 @@ void PopulateFastFieldEntry(const TailCallTableInfo::FieldEntryInfo& entry,
// Append the tag length. Fast parsing only handles 1- or 2-byte tags.
name.append(TagSize(field->number()) == 1 ? "1" : "2");
info.func_name = std::move(name);
if (name == "V8S1") {
info.func_name = StrCat(
"::_pbi::TcParser::SingularVarintNoZag1<bool, offsetof(", //
ClassName(descriptor), //
", ", //
FieldMemberName(field, /*split=*/false), //
"), ", //
HasHasbit(field) ? entry.hasbit_idx : 63, //
">()");
} else if (name == "V32S1") {
info.func_name = StrCat(
"::_pbi::TcParser::SingularVarintNoZag1<uint32_t, offsetof(", //
ClassName(descriptor), //
", ", //
FieldMemberName(field, /*split=*/false), //
"), ", //
HasHasbit(field) ? entry.hasbit_idx : 63, //
">()");
} else if (name == "V64S1") {
info.func_name = StrCat(
"::_pbi::TcParser::SingularVarintNoZag1<uint64_t, offsetof(", //
ClassName(descriptor), //
", ", //
FieldMemberName(field, /*split=*/false), //
"), ", //
HasHasbit(field) ? entry.hasbit_idx : 63, //
">()");
} else {
info.func_name = StrCat("::_pbi::TcParser::Fast", name);
}
info.aux_idx = aux_idx;
}

@ -281,6 +281,18 @@ void ImmutableEnumFieldGenerator::GenerateKotlinDslMembers(
" $kt_dsl_builder$.${$set$capitalized_name$$}$(value)\n"
" }\n");
if (SupportUnknownEnumValue(descriptor_->file())) {
printer->Print(
variables_,
"$kt_deprecation$ var $kt_name$Value: kotlin.Int\n"
" @JvmName(\"${$get$kt_capitalized_name$Value$}$\")\n"
" get() = $kt_dsl_builder$.${$get$capitalized_name$Value$}$()\n"
" @JvmName(\"${$set$kt_capitalized_name$Value$}$\")\n"
" set(value) {\n"
" $kt_dsl_builder$.${$set$capitalized_name$Value$}$(value)\n"
" }\n");
}
WriteFieldAccessorDocComment(printer, descriptor_, CLEARER,
/* builder */ false, /* kdoc */ true);
printer->Print(variables_,

@ -296,6 +296,18 @@ void ImmutableEnumFieldLiteGenerator::GenerateKotlinDslMembers(
" $kt_dsl_builder$.${$set$capitalized_name$$}$(value)\n"
" }\n");
if (SupportUnknownEnumValue(descriptor_->file())) {
printer->Print(
variables_,
"$kt_deprecation$ var $kt_name$Value: kotlin.Int\n"
" @JvmName(\"${$get$kt_capitalized_name$Value$}$\")\n"
" get() = $kt_dsl_builder$.${$get$capitalized_name$Value$}$()\n"
" @JvmName(\"${$set$kt_capitalized_name$Value$}$\")\n"
" set(value) {\n"
" $kt_dsl_builder$.${$set$capitalized_name$Value$}$(value)\n"
" }\n");
}
WriteFieldAccessorDocComment(printer, descriptor_, CLEARER,
/* builder */ false, /* kdoc */ true);
printer->Print(variables_,

@ -63,7 +63,12 @@ class PROTOC_EXPORT JavaGenerator : public CodeGenerator {
uint64_t GetSupportedFeatures() const override;
void set_opensource_runtime(bool opensource) {
opensource_runtime_ = opensource;
}
private:
bool opensource_runtime_ = true;
GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(JavaGenerator);
};

@ -32,6 +32,7 @@
#define GOOGLE_PROTOBUF_COMPILER_JAVA_MESSAGE_SERIALIZATION_H__
#include <algorithm>
#include <cstddef>
#include <vector>
#include <google/protobuf/io/printer.h>
@ -66,20 +67,31 @@ void GenerateSerializeFieldsAndExtensions(
std::sort(sorted_extensions.begin(), sorted_extensions.end(),
ExtensionRangeOrdering());
std::size_t range_idx = 0;
// Merge the fields and the extension ranges, both sorted by field number.
for (int i = 0, j = 0;
i < descriptor->field_count() || j < sorted_extensions.size();) {
if (i == descriptor->field_count()) {
GenerateSerializeExtensionRange(printer, sorted_extensions[j++]);
} else if (j == sorted_extensions.size()) {
field_generators.get(sorted_fields[i++])
.GenerateSerializationCode(printer);
} else if (sorted_fields[i]->number() < sorted_extensions[j]->start) {
field_generators.get(sorted_fields[i++])
.GenerateSerializationCode(printer);
} else {
GenerateSerializeExtensionRange(printer, sorted_extensions[j++]);
for (int i = 0; i < descriptor->field_count(); ++i) {
const FieldDescriptor* field = sorted_fields[i];
// Collapse all extension ranges up until the next field. This leads to
// shorter and more efficient codegen for messages containing a large
// number of extension ranges without fields in between them.
const Descriptor::ExtensionRange* range = nullptr;
while (range_idx < sorted_extensions.size() &&
sorted_extensions[range_idx]->end <= field->number()) {
range = sorted_extensions[range_idx++];
}
if (range != nullptr) {
GenerateSerializeExtensionRange(printer, range);
}
field_generators.get(field).GenerateSerializationCode(printer);
}
// After serializing all fields, serialize any remaining extensions via a
// single writeUntil call.
if (range_idx < sorted_extensions.size()) {
GenerateSerializeExtensionRange(printer, sorted_extensions.back());
}
}

@ -0,0 +1,124 @@
// Protocol Buffers - Google's data interchange format
// Copyright 2008 Google Inc. All rights reserved.
// https://developers.google.com/protocol-buffers/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <cstddef>
#include <string>
#include <utility>
#include <vector>
#include <google/protobuf/stubs/logging.h>
#include <google/protobuf/stubs/common.h>
#include <google/protobuf/testing/file.h>
#include <google/protobuf/testing/file.h>
#include <gmock/gmock.h>
#include <google/protobuf/testing/googletest.h>
#include <gtest/gtest.h>
#include <google/protobuf/compiler/command_line_interface.h>
#include <google/protobuf/compiler/java/generator.h>
#include <google/protobuf/test_util2.h>
namespace google {
namespace protobuf {
namespace compiler {
namespace java {
namespace {
using ::testing::ElementsAre;
// Generates Java code for the specified Java proto, returning the compiler's
// exit status.
int CompileJavaProto(std::string proto_file_name) {
JavaGenerator java_generator;
CommandLineInterface cli;
cli.RegisterGenerator("--java_out", &java_generator, /*help_text=*/"");
std::string proto_path = StrCat(
"--proto_path=",
TestUtil::GetTestDataPath("third_party/protobuf/compiler/java"));
std::string java_out = StrCat("--java_out=", TestTempDir());
const char* argv[] = {
"protoc",
proto_path.c_str(),
java_out.c_str(),
proto_file_name.c_str(),
};
// Open-source codebase does not support ABSL_ARRAYSIZE.
return cli.Run(sizeof(argv) / sizeof(*argv), argv);
}
TEST(MessageSerializationTest, CollapseAdjacentExtensionRanges) {
GOOGLE_CHECK_EQ(CompileJavaProto("message_serialization_unittest.proto"), 0);
std::string java_source;
GOOGLE_CHECK_OK(File::GetContents(
// Open-source codebase does not support file::JoinPath, so we manually
// concatenate instead.
StrCat(TestTempDir(),
"/TestMessageWithManyExtensionRanges.java"),
&java_source, true));
// Open-source codebase does not support constexpr StringPiece.
static constexpr const char kWriteUntilCall[] = "extensionWriter.writeUntil(";
std::vector<std::string> range_ends;
// Open-source codebase does not have Split overload taking a single
// char delimiter.
//
// NOLINTNEXTLINE(abseil-faster-strsplit-delimiter)
for (const auto& line : Split(java_source, "\n")) {
// Extract end position from writeUntil call. (Open-source codebase does not
// support RE2.)
std::size_t write_until_pos = line.find(kWriteUntilCall);
if (write_until_pos == std::string::npos) {
continue;
}
write_until_pos += (sizeof(kWriteUntilCall) - 1);
std::size_t comma_pos = line.find(',', write_until_pos);
if (comma_pos == std::string::npos) {
continue;
}
range_ends.push_back(
std::string(line.substr(write_until_pos, comma_pos - write_until_pos)));
}
EXPECT_THAT(range_ends, ElementsAre("3", "13", "43"));
}
} // namespace
} // namespace java
} // namespace compiler
} // namespace protobuf
} // namespace google

@ -0,0 +1,56 @@
// Protocol Buffers - Google's data interchange format
// Copyright 2008 Google Inc. All rights reserved.
// https://developers.google.com/protocol-buffers/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
syntax = "proto2";
package protobuf_unittest;
option java_multiple_files = true;
option java_package = "";
// Each batch of extension ranges not separated by a non-extension field should
// be serialized using a single ExtensionWriter#writeUntil call.
message TestMessageWithManyExtensionRanges {
// First extension range: ends at field number 3 (exclusive)
extensions 1 to 2;
optional int32 foo = 3;
optional int32 bar = 5;
// Second extension range: ends at field number 13 (exclusive)
extensions 6;
extensions 8;
extensions 10 to 12;
optional int32 baz = 23;
// Third extension range: ends at field number 43 (exclusive)
extensions 42;
}

@ -66,6 +66,10 @@ int ProtobufMain(int argc, char* argv[]) {
cli.RegisterGenerator("--java_out", "--java_opt", &java_generator,
"Generate Java source file.");
#ifdef GOOGLE_PROTOBUF_RUNTIME_INCLUDE_BASE
java_generator.set_opensource_runtime(true);
#endif
// Proto2 Kotlin
java::KotlinGenerator kt_generator;
cli.RegisterGenerator("--kotlin_out", "--kotlin_opt", &kt_generator,
@ -76,6 +80,11 @@ int ProtobufMain(int argc, char* argv[]) {
python::Generator py_generator;
cli.RegisterGenerator("--python_out", "--python_opt", &py_generator,
"Generate Python source file.");
#ifdef GOOGLE_PROTOBUF_RUNTIME_INCLUDE_BASE
py_generator.set_opensource_runtime(true);
#endif
// Python pyi
python::PyiGenerator pyi_generator;
cli.RegisterGenerator("--pyi_out", &pyi_generator,

@ -76,6 +76,10 @@ class PROTOC_EXPORT Generator : public CodeGenerator {
uint64_t GetSupportedFeatures() const override;
void set_opensource_runtime(bool opensource) {
opensource_runtime_ = opensource;
}
private:
void PrintImports() const;
void PrintFileDescriptor() const;
@ -172,6 +176,8 @@ class PROTOC_EXPORT Generator : public CodeGenerator {
mutable io::Printer* printer_; // Set in Generate(). Under mutex_.
mutable bool pure_python_workable_;
bool opensource_runtime_ = true;
GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(Generator);
};

@ -44,39 +44,22 @@ namespace protobuf {
namespace compiler {
namespace python {
template <typename DescriptorT>
struct SortByName {
bool operator()(const DescriptorT* l, const DescriptorT* r) const {
return l->name() < r->name();
}
};
PyiGenerator::PyiGenerator() : file_(nullptr) {}
PyiGenerator::~PyiGenerator() {}
void PyiGenerator::PrintItemMap(
const std::map<std::string, std::string>& item_map) const {
for (const auto& entry : item_map) {
printer_->Print("$key$: $value$\n", "key", entry.first, "value",
entry.second);
}
}
template <typename DescriptorT>
std::string PyiGenerator::ModuleLevelName(
const DescriptorT& descriptor,
const std::map<std::string, std::string>& import_map) const {
std::string PyiGenerator::ModuleLevelName(const DescriptorT& descriptor) const {
std::string name = NamePrefixedWithNestedTypes(descriptor, ".");
if (descriptor.file() != file_) {
std::string module_alias;
std::string filename = descriptor.file()->name();
if (import_map.find(filename) == import_map.end()) {
if (import_map_.find(filename) == import_map_.end()) {
std::string module_name = ModuleName(descriptor.file()->name());
std::vector<std::string> tokens = Split(module_name, ".");
module_alias = "_" + tokens.back();
} else {
module_alias = import_map.at(filename);
module_alias = import_map_.at(filename);
}
name = module_alias + "." + name;
}
@ -156,7 +139,6 @@ void CheckImportModules(const Descriptor* descriptor,
void PyiGenerator::PrintImportForDescriptor(
const FileDescriptor& desc,
std::map<std::string, std::string>* import_map,
std::set<std::string>* seen_aliases) const {
const std::string& filename = desc.name();
std::string module_name = StrippedModuleName(filename);
@ -176,21 +158,19 @@ void PyiGenerator::PrintImportForDescriptor(
}
printer_->Print("$statement$ as $alias$\n", "statement",
import_statement, "alias", alias);
(*import_map)[filename] = alias;
import_map_[filename] = alias;
seen_aliases->insert(alias);
}
void PyiGenerator::PrintImports(
std::map<std::string, std::string>* item_map,
std::map<std::string, std::string>* import_map) const {
void PyiGenerator::PrintImports() const {
// Prints imported dependent _pb2 files.
std::set<std::string> seen_aliases;
for (int i = 0; i < file_->dependency_count(); ++i) {
const FileDescriptor* dep = file_->dependency(i);
PrintImportForDescriptor(*dep, import_map, &seen_aliases);
PrintImportForDescriptor(*dep, &seen_aliases);
for (int j = 0; j < dep->public_dependency_count(); ++j) {
PrintImportForDescriptor(
*dep->public_dependency(j), import_map, &seen_aliases);
*dep->public_dependency(j), &seen_aliases);
}
}
@ -254,7 +234,7 @@ void PyiGenerator::PrintImports(
if (import_modules.has_union) {
printer_->Print(", Union as _Union");
}
printer_->Print("\n\n");
printer_->Print("\n");
// Public imports
for (int i = 0; i < file_->public_dependency_count(); ++i) {
@ -272,17 +252,8 @@ void PyiGenerator::PrintImports(
module_name, "enum_class",
public_dep->enum_type(i)->name());
}
// Enum values for public imports
for (int i = 0; i < public_dep->enum_type_count(); ++i) {
const EnumDescriptor* enum_descriptor = public_dep->enum_type(i);
for (int j = 0; j < enum_descriptor->value_count(); ++j) {
(*item_map)[enum_descriptor->value(j)->name()] =
ModuleLevelName(*enum_descriptor, *import_map);
}
}
// Top level extensions for public imports
AddExtensions(*public_dep, item_map);
}
printer_->Print("\n");
}
void PyiGenerator::PrintEnum(const EnumDescriptor& enum_descriptor) const {
@ -293,20 +264,18 @@ void PyiGenerator::PrintEnum(const EnumDescriptor& enum_descriptor) const {
"enum_name", enum_name);
}
// Adds enum value to item map which will be ordered and printed later.
void PyiGenerator::AddEnumValue(
const EnumDescriptor& enum_descriptor,
std::map<std::string, std::string>* item_map,
const std::map<std::string, std::string>& import_map) const {
void PyiGenerator::PrintEnumValues(
const EnumDescriptor& enum_descriptor) const {
// enum values
std::string module_enum_name = ModuleLevelName(enum_descriptor, import_map);
std::string module_enum_name = ModuleLevelName(enum_descriptor);
for (int j = 0; j < enum_descriptor.value_count(); ++j) {
const EnumValueDescriptor* value_descriptor = enum_descriptor.value(j);
(*item_map)[value_descriptor->name()] = module_enum_name;
printer_->Print("$name$: $module_enum_name$\n",
"name", value_descriptor->name(),
"module_enum_name", module_enum_name);
}
}
// Prints top level enums
void PyiGenerator::PrintTopLevelEnums() const {
for (int i = 0; i < file_->enum_type_count(); ++i) {
printer_->Print("\n");
@ -314,25 +283,22 @@ void PyiGenerator::PrintTopLevelEnums() const {
}
}
// Add top level extensions to item_map which will be ordered and
// printed later.
template <typename DescriptorT>
void PyiGenerator::AddExtensions(
const DescriptorT& descriptor,
std::map<std::string, std::string>* item_map) const {
void PyiGenerator::PrintExtensions(const DescriptorT& descriptor) const {
for (int i = 0; i < descriptor.extension_count(); ++i) {
const FieldDescriptor* extension_field = descriptor.extension(i);
std::string constant_name = extension_field->name() + "_FIELD_NUMBER";
ToUpper(&constant_name);
(*item_map)[constant_name] = "_ClassVar[int]";
(*item_map)[extension_field->name()] = "_descriptor.FieldDescriptor";
printer_->Print("$constant_name$: _ClassVar[int]\n",
"constant_name", constant_name);
printer_->Print("$name$: _descriptor.FieldDescriptor\n",
"name", extension_field->name());
}
}
// Returns the string format of a field's cpp_type
std::string PyiGenerator::GetFieldType(
const FieldDescriptor& field_des, const Descriptor& containing_des,
const std::map<std::string, std::string>& import_map) const {
const FieldDescriptor& field_des, const Descriptor& containing_des) const {
switch (field_des.cpp_type()) {
case FieldDescriptor::CPPTYPE_INT32:
case FieldDescriptor::CPPTYPE_UINT32:
@ -345,7 +311,7 @@ std::string PyiGenerator::GetFieldType(
case FieldDescriptor::CPPTYPE_BOOL:
return "bool";
case FieldDescriptor::CPPTYPE_ENUM:
return ModuleLevelName(*field_des.enum_type(), import_map);
return ModuleLevelName(*field_des.enum_type());
case FieldDescriptor::CPPTYPE_STRING:
if (field_des.type() == FieldDescriptor::TYPE_STRING) {
return "str";
@ -356,7 +322,7 @@ std::string PyiGenerator::GetFieldType(
// If the field is inside a nested message and the nested message has the
// same name as a top-level message, then we need to prefix the field type
// with the module name for disambiguation.
std::string name = ModuleLevelName(*field_des.message_type(), import_map);
std::string name = ModuleLevelName(*field_des.message_type());
if ((containing_des.containing_type() != nullptr &&
name == containing_des.name())) {
std::string module = ModuleName(field_des.file()->name());
@ -371,8 +337,7 @@ std::string PyiGenerator::GetFieldType(
}
void PyiGenerator::PrintMessage(
const Descriptor& message_descriptor, bool is_nested,
const std::map<std::string, std::string>& import_map) const {
const Descriptor& message_descriptor, bool is_nested) const {
if (!is_nested) {
printer_->Print("\n");
}
@ -390,17 +355,11 @@ void PyiGenerator::PrintMessage(
printer_->Indent();
printer_->Indent();
std::vector<const FieldDescriptor*> fields;
fields.reserve(message_descriptor.field_count());
for (int i = 0; i < message_descriptor.field_count(); ++i) {
fields.push_back(message_descriptor.field(i));
}
std::sort(fields.begin(), fields.end(), SortByName<FieldDescriptor>());
// Prints slots
printer_->Print("__slots__ = [", "class_name", class_name);
bool first_item = true;
for (const auto& field_des : fields) {
for (int i = 0; i < message_descriptor.field_count(); ++i) {
const FieldDescriptor* field_des = message_descriptor.field(i);
if (IsPythonKeyword(field_des->name())) {
continue;
}
@ -413,48 +372,34 @@ void PyiGenerator::PrintMessage(
}
printer_->Print("]\n");
std::map<std::string, std::string> item_map;
// Prints Extensions for extendable messages
if (message_descriptor.extension_range_count() > 0) {
item_map["Extensions"] = "_python_message._ExtensionDict";
printer_->Print("Extensions: _python_message._ExtensionDict\n");
}
// Prints nested enums
std::vector<const EnumDescriptor*> nested_enums;
nested_enums.reserve(message_descriptor.enum_type_count());
for (int i = 0; i < message_descriptor.enum_type_count(); ++i) {
nested_enums.push_back(message_descriptor.enum_type(i));
}
std::sort(nested_enums.begin(), nested_enums.end(),
SortByName<EnumDescriptor>());
for (const auto& entry : nested_enums) {
PrintEnum(*entry);
// Adds enum value to item_map which will be ordered and printed later
AddEnumValue(*entry, &item_map, import_map);
PrintEnum(*message_descriptor.enum_type(i));
PrintEnumValues(*message_descriptor.enum_type(i));
}
// Prints nested messages
std::vector<const Descriptor*> nested_messages;
nested_messages.reserve(message_descriptor.nested_type_count());
for (int i = 0; i < message_descriptor.nested_type_count(); ++i) {
nested_messages.push_back(message_descriptor.nested_type(i));
}
std::sort(nested_messages.begin(), nested_messages.end(),
SortByName<Descriptor>());
for (const auto& entry : nested_messages) {
PrintMessage(*entry, true, import_map);
PrintMessage(*message_descriptor.nested_type(i), true);
}
// Adds extensions to item_map which will be ordered and printed later
AddExtensions(message_descriptor, &item_map);
PrintExtensions(message_descriptor);
// Adds field number and field descriptor to item_map
// Prints field number
for (int i = 0; i < message_descriptor.field_count(); ++i) {
const FieldDescriptor& field_des = *message_descriptor.field(i);
printer_->Print(
"$field_number_name$: _ClassVar[int]\n", "field_number_name",
ToUpper(field_des.name()) + "_FIELD_NUMBER");
}
// Prints field name and type
for (int i = 0; i < message_descriptor.field_count(); ++i) {
const FieldDescriptor& field_des = *message_descriptor.field(i);
item_map[ToUpper(field_des.name()) + "_FIELD_NUMBER"] =
"_ClassVar[int]";
if (IsPythonKeyword(field_des.name())) {
continue;
}
@ -465,27 +410,25 @@ void PyiGenerator::PrintMessage(
field_type = (value_des->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE
? "_containers.MessageMap["
: "_containers.ScalarMap[");
field_type += GetFieldType(*key_des, message_descriptor, import_map);
field_type += GetFieldType(*key_des, message_descriptor);
field_type += ", ";
field_type += GetFieldType(*value_des, message_descriptor, import_map);
field_type += GetFieldType(*value_des, message_descriptor);
} else {
if (field_des.is_repeated()) {
field_type = (field_des.cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE
? "_containers.RepeatedCompositeFieldContainer["
: "_containers.RepeatedScalarFieldContainer[");
}
field_type += GetFieldType(field_des, message_descriptor, import_map);
field_type += GetFieldType(field_des, message_descriptor);
}
if (field_des.is_repeated()) {
field_type += "]";
}
item_map[field_des.name()] = field_type;
printer_->Print("$name$: $type$\n",
"name", field_des.name(), "type", field_type);
}
// Prints all items in item_map
PrintItemMap(item_map);
// Prints __init__
printer_->Print("def __init__(self");
bool has_key_words = false;
@ -513,9 +456,9 @@ void PyiGenerator::PrintMessage(
const Descriptor* map_entry = field_des->message_type();
printer_->Print(
"_Mapping[$key_type$, $value_type$]", "key_type",
GetFieldType(*map_entry->field(0), message_descriptor, import_map),
GetFieldType(*map_entry->field(0), message_descriptor),
"value_type",
GetFieldType(*map_entry->field(1), message_descriptor, import_map));
GetFieldType(*map_entry->field(1), message_descriptor));
} else {
if (field_des->is_repeated()) {
printer_->Print("_Iterable[");
@ -523,15 +466,15 @@ void PyiGenerator::PrintMessage(
if (field_des->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
printer_->Print(
"_Union[$type_name$, _Mapping]", "type_name",
GetFieldType(*field_des, message_descriptor, import_map));
GetFieldType(*field_des, message_descriptor));
} else {
if (field_des->cpp_type() == FieldDescriptor::CPPTYPE_ENUM) {
printer_->Print("_Union[$type_name$, str]", "type_name",
ModuleLevelName(*field_des->enum_type(), import_map));
ModuleLevelName(*field_des->enum_type()));
} else {
printer_->Print(
"$type_name$", "type_name",
GetFieldType(*field_des, message_descriptor, import_map));
GetFieldType(*field_des, message_descriptor));
}
}
if (field_des->is_repeated()) {
@ -553,36 +496,21 @@ void PyiGenerator::PrintMessage(
printer_->Outdent();
}
void PyiGenerator::PrintMessages(
const std::map<std::string, std::string>& import_map) const {
void PyiGenerator::PrintMessages() const {
// Deterministically order the descriptors.
std::vector<const Descriptor*> messages;
messages.reserve(file_->message_type_count());
for (int i = 0; i < file_->message_type_count(); ++i) {
messages.push_back(file_->message_type(i));
}
std::sort(messages.begin(), messages.end(), SortByName<Descriptor>());
for (const auto& entry : messages) {
PrintMessage(*entry, false, import_map);
PrintMessage(*file_->message_type(i), false);
}
}
void PyiGenerator::PrintServices() const {
std::vector<const ServiceDescriptor*> services;
services.reserve(file_->service_count());
for (int i = 0; i < file_->service_count(); ++i) {
services.push_back(file_->service(i));
}
std::sort(services.begin(), services.end(), SortByName<ServiceDescriptor>());
// Prints $Service$ and $Service$_Stub classes
for (const auto& entry : services) {
for (int i = 0; i < file_->service_count(); ++i) {
printer_->Print("\n");
printer_->Print(
"class $service_name$(_service.service): ...\n\n"
"class $service_name$_Stub($service_name$): ...\n",
"service_name", entry->name());
"service_name", file_->service(i)->name());
}
}
@ -591,6 +519,7 @@ bool PyiGenerator::Generate(const FileDescriptor* file,
GeneratorContext* context,
std::string* error) const {
MutexLock lock(&mutex_);
import_map_.clear();
// Calculate file name.
file_ = file;
std::string filename =
@ -601,29 +530,28 @@ bool PyiGenerator::Generate(const FileDescriptor* file,
io::Printer printer(output.get(), '$');
printer_ = &printer;
// item map will store "DESCRIPTOR", top level extensions, top level enum
// values. The items will be sorted and printed later.
std::map<std::string, std::string> item_map;
// Adds "DESCRIPTOR" into item_map.
item_map["DESCRIPTOR"] = "_descriptor.FileDescriptor";
PrintImports();
printer_->Print("DESCRIPTOR: _descriptor.FileDescriptor\n");
// import_map will be a mapping from filename to module alias, e.g.
// "google3/foo/bar.py" -> "_bar"
std::map<std::string, std::string> import_map;
// Prints extensions and enums from imports.
for (int i = 0; i < file_->public_dependency_count(); ++i) {
const FileDescriptor* public_dep = file_->public_dependency(i);
PrintExtensions(*public_dep);
for (int i = 0; i < public_dep->enum_type_count(); ++i) {
const EnumDescriptor* enum_descriptor = public_dep->enum_type(i);
PrintEnumValues(*enum_descriptor);
}
}
PrintImports(&item_map, &import_map);
// Adds top level enum values to item_map.
PrintTopLevelEnums();
// Prints top level enum values
for (int i = 0; i < file_->enum_type_count(); ++i) {
AddEnumValue(*file_->enum_type(i), &item_map, import_map);
PrintEnumValues(*file_->enum_type(i));
}
// Adds top level extensions to item_map.
AddExtensions(*file_, &item_map);
// Prints item map
PrintItemMap(item_map);
// Prints top level Extensions
PrintExtensions(*file_);
PrintMessages();
PrintMessages(import_map);
PrintTopLevelEnums();
if (HasGenericServices(file)) {
PrintServices();
}

@ -76,37 +76,29 @@ class PROTOC_EXPORT PyiGenerator : public google::protobuf::compiler::CodeGenera
private:
void PrintImportForDescriptor(const FileDescriptor& desc,
std::map<std::string, std::string>* import_map,
std::set<std::string>* seen_aliases) const;
void PrintImports(std::map<std::string, std::string>* item_map,
std::map<std::string, std::string>* import_map) const;
void PrintEnum(const EnumDescriptor& enum_descriptor) const;
void AddEnumValue(const EnumDescriptor& enum_descriptor,
std::map<std::string, std::string>* item_map,
const std::map<std::string, std::string>& import_map) const;
void PrintImports() const;
void PrintTopLevelEnums() const;
void PrintEnum(const EnumDescriptor& enum_descriptor) const;
void PrintEnumValues(const EnumDescriptor& enum_descriptor) const;
template <typename DescriptorT>
void AddExtensions(const DescriptorT& descriptor,
std::map<std::string, std::string>* item_map) const;
void PrintMessages(
const std::map<std::string, std::string>& import_map) const;
void PrintMessage(const Descriptor& message_descriptor, bool is_nested,
const std::map<std::string, std::string>& import_map) const;
void PrintExtensions(const DescriptorT& descriptor) const;
void PrintMessages() const;
void PrintMessage(const Descriptor& message_descriptor, bool is_nested) const;
void PrintServices() const;
void PrintItemMap(const std::map<std::string, std::string>& item_map) const;
std::string GetFieldType(
const FieldDescriptor& field_des, const Descriptor& containing_des,
const std::map<std::string, std::string>& import_map) const;
const FieldDescriptor& field_des, const Descriptor& containing_des) const;
template <typename DescriptorT>
std::string ModuleLevelName(
const DescriptorT& descriptor,
const std::map<std::string, std::string>& import_map) const;
std::string ModuleLevelName(const DescriptorT& descriptor) const;
// Very coarse-grained lock to ensure that Generate() is reentrant.
// Guards file_ and printer_.
// Guards file_, printer_, and import_map_.
mutable Mutex mutex_;
mutable const FileDescriptor* file_; // Set in Generate(). Under mutex_.
mutable io::Printer* printer_; // Set in Generate(). Under mutex_.
// import_map will be a mapping from filename to module alias, e.g.
// "google3/foo/bar.py" -> "_bar"
mutable std::map<std::string, std::string> import_map_;
GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(PyiGenerator);
};

@ -38,7 +38,6 @@
#include <google/protobuf/port.h>
#include <google/protobuf/extension_set.h>
#include <google/protobuf/generated_message_tctable_decl.h>
#include <google/protobuf/message_lite.h>
#include <google/protobuf/metadata_lite.h>
#include <google/protobuf/parse_context.h>
#include <google/protobuf/wire_format_lite.h>
@ -257,16 +256,6 @@ enum FieldType : uint16_t {
// clang-format on
} // namespace field_layout
// PROTOBUF_TC_PARAM_DECL are the parameters for tailcall functions, it is
// defined in port_def.inc.
//
// Note that this is performance sensitive: changing the parameters will change
// the registers used by the ABI calling convention, which subsequently affects
// register selection logic inside the function.
// PROTOBUF_TC_PARAM_PASS passes values to match PROTOBUF_TC_PARAM_DECL.
#define PROTOBUF_TC_PARAM_PASS msg, ptr, ctx, table, hasbits, data
#ifndef NDEBUG
template <size_t align>
void AlignFail(uintptr_t address) {
@ -349,6 +338,28 @@ class PROTOBUF_EXPORT TcParser final {
static const char* FastZ64P1(PROTOBUF_TC_PARAM_DECL);
static const char* FastZ64P2(PROTOBUF_TC_PARAM_DECL);
// Manually unrolled and specialized Varint parsing.
template <typename FieldType, int data_offset, int hasbit_idx>
static const char* SpecializedUnrolledVImpl1(PROTOBUF_TC_PARAM_DECL);
template <typename FieldType, int data_offset, int hasbit_idx>
static constexpr TailCallParseFunc SingularVarintNoZag1() {
if (data_offset < 100) {
return &SpecializedUnrolledVImpl1<FieldType, data_offset, hasbit_idx>;
} else if (sizeof(FieldType) == 1) {
return &FastV8S1;
} else if (sizeof(FieldType) == 4) {
return &FastV32S1;
} else if (sizeof(FieldType) == 8) {
return &FastV64S1;
} else {
static_assert(sizeof(FieldType) == 1 || sizeof(FieldType) == 4 ||
sizeof(FieldType) == 8,
"");
return nullptr;
}
}
// Functions referenced by generated fast tables (closed enum):
// E: closed enum (N.B.: open enums use V32, above)
// r: enum range v: enum validator (_IsValid function)
@ -600,6 +611,135 @@ class PROTOBUF_EXPORT TcParser final {
static const char* MpMap(PROTOBUF_TC_PARAM_DECL);
};
template <typename FieldType, int data_offset, int hasbit_idx>
const char* TcParser::SpecializedUnrolledVImpl1(PROTOBUF_TC_PARAM_DECL) {
using TagType = uint8_t;
// super-early success test...
if (PROTOBUF_PREDICT_TRUE(((data.data) & 0x80FF) == 0)) {
ptr += sizeof(TagType); // Consume tag
if (hasbit_idx < 32) {
hasbits |= (uint64_t{1} << hasbit_idx);
}
uint8_t value = data.data >> 8;
RefAt<FieldType>(msg, data_offset) = value;
ptr += 1;
PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_PASS);
}
if (PROTOBUF_PREDICT_FALSE(data.coded_tag<TagType>() != 0)) {
PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_PASS);
}
ptr += sizeof(TagType); // Consume tag
if (hasbit_idx < 32) {
hasbits |= (uint64_t{1} << hasbit_idx);
}
// Few registers
auto* out = &RefAt<FieldType>(msg, data_offset);
uint64_t res = 0xFF & (data.data >> 8);
/* if (PROTOBUF_PREDICT_FALSE(res & 0x80)) */ {
res = RotRight7AndReplaceLowByte(res, ptr[1]);
if (PROTOBUF_PREDICT_FALSE(res & 0x80)) {
res = RotRight7AndReplaceLowByte(res, ptr[2]);
if (PROTOBUF_PREDICT_FALSE(res & 0x80)) {
res = RotRight7AndReplaceLowByte(res, ptr[3]);
if (PROTOBUF_PREDICT_FALSE(res & 0x80)) {
res = RotRight7AndReplaceLowByte(res, ptr[4]);
if (PROTOBUF_PREDICT_FALSE(res & 0x80)) {
res = RotRight7AndReplaceLowByte(res, ptr[5]);
if (PROTOBUF_PREDICT_FALSE(res & 0x80)) {
res = RotRight7AndReplaceLowByte(res, ptr[6]);
if (PROTOBUF_PREDICT_FALSE(res & 0x80)) {
res = RotRight7AndReplaceLowByte(res, ptr[7]);
if (PROTOBUF_PREDICT_FALSE(res & 0x80)) {
res = RotRight7AndReplaceLowByte(res, ptr[8]);
if (PROTOBUF_PREDICT_FALSE(res & 0x80)) {
if (ptr[9] & 0xFE) return nullptr;
res = RotateLeft(res, -7) & ~1;
res += ptr[9] & 1;
*out = RotateLeft(res, 63);
ptr += 10;
PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_PASS);
}
*out = RotateLeft(res, 56);
ptr += 9;
PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_PASS);
}
*out = RotateLeft(res, 49);
ptr += 8;
PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_PASS);
}
*out = RotateLeft(res, 42);
ptr += 7;
PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_PASS);
}
*out = RotateLeft(res, 35);
ptr += 6;
PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_PASS);
}
*out = RotateLeft(res, 28);
ptr += 5;
PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_PASS);
}
*out = RotateLeft(res, 21);
ptr += 4;
PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_PASS);
}
*out = RotateLeft(res, 14);
ptr += 3;
PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_PASS);
}
*out = RotateLeft(res, 7);
ptr += 2;
PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_PASS);
}
*out = res;
ptr += 1;
PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_PASS);
}
// Dispatch to the designated parse function
inline PROTOBUF_ALWAYS_INLINE const char* TcParser::TagDispatch(
PROTOBUF_TC_PARAM_DECL) {
const auto coded_tag = UnalignedLoad<uint16_t>(ptr);
const size_t idx = coded_tag & table->fast_idx_mask;
PROTOBUF_ASSUME((idx & 7) == 0);
auto* fast_entry = table->fast_entry(idx >> 3);
data = fast_entry->bits;
data.data ^= coded_tag;
PROTOBUF_MUSTTAIL return fast_entry->target(PROTOBUF_TC_PARAM_PASS);
}
// We can only safely call from field to next field if the call is optimized
// to a proper tail call. Otherwise we blow through stack. Clang and gcc
// reliably do this optimization in opt mode, but do not perform this in debug
// mode. Luckily the structure of the algorithm is such that it's always
// possible to just return and use the enclosing parse loop as a trampoline.
inline PROTOBUF_ALWAYS_INLINE const char* TcParser::ToTagDispatch(
PROTOBUF_TC_PARAM_DECL) {
constexpr bool always_return = !PROTOBUF_TAILCALL;
if (always_return || !ctx->DataAvailable(ptr)) {
PROTOBUF_MUSTTAIL return ToParseLoop(PROTOBUF_TC_PARAM_PASS);
}
PROTOBUF_MUSTTAIL return TagDispatch(PROTOBUF_TC_PARAM_PASS);
}
inline PROTOBUF_ALWAYS_INLINE const char* TcParser::ToParseLoop(
PROTOBUF_TC_PARAM_DECL) {
(void)data;
(void)ctx;
SyncHasbits(msg, hasbits, table);
return ptr;
}
inline PROTOBUF_ALWAYS_INLINE const char* TcParser::Error(
PROTOBUF_TC_PARAM_DECL) {
(void)data;
(void)ctx;
(void)ptr;
SyncHasbits(msg, hasbits, table);
return nullptr;
}
} // namespace internal
} // namespace protobuf
} // namespace google

@ -30,6 +30,7 @@
#include <cstdint>
#include <numeric>
#include <type_traits>
#include <google/protobuf/extension_set.h>
#include <google/protobuf/generated_message_tctable_decl.h>
@ -84,56 +85,13 @@ PROTOBUF_NOINLINE const char* TcParser::ParseLoop(
// TODO(b/64614992): remove this asm
asm("" : "+r"(table));
#endif
ptr = TagDispatch(msg, ptr, ctx, table - 1, 0, {});
ptr = TagDispatch(msg, ptr, ctx, {}, table - 1, 0);
if (ptr == nullptr) break;
if (ctx->LastTag() != 1) break; // Ended on terminating tag
}
return ptr;
}
// Dispatch to the designated parse function
inline PROTOBUF_ALWAYS_INLINE const char* TcParser::TagDispatch(
PROTOBUF_TC_PARAM_DECL) {
const auto coded_tag = UnalignedLoad<uint16_t>(ptr);
const size_t idx = coded_tag & table->fast_idx_mask;
PROTOBUF_ASSUME((idx & 7) == 0);
auto* fast_entry = table->fast_entry(idx >> 3);
data = fast_entry->bits;
data.data ^= coded_tag;
PROTOBUF_MUSTTAIL return fast_entry->target(PROTOBUF_TC_PARAM_PASS);
}
// We can only safely call from field to next field if the call is optimized
// to a proper tail call. Otherwise we blow through stack. Clang and gcc
// reliably do this optimization in opt mode, but do not perform this in debug
// mode. Luckily the structure of the algorithm is such that it's always
// possible to just return and use the enclosing parse loop as a trampoline.
inline PROTOBUF_ALWAYS_INLINE const char* TcParser::ToTagDispatch(
PROTOBUF_TC_PARAM_DECL) {
constexpr bool always_return = !PROTOBUF_TAILCALL;
if (always_return || !ctx->DataAvailable(ptr)) {
PROTOBUF_MUSTTAIL return ToParseLoop(PROTOBUF_TC_PARAM_PASS);
}
PROTOBUF_MUSTTAIL return TagDispatch(PROTOBUF_TC_PARAM_PASS);
}
inline PROTOBUF_ALWAYS_INLINE const char* TcParser::ToParseLoop(
PROTOBUF_TC_PARAM_DECL) {
(void)data;
(void)ctx;
SyncHasbits(msg, hasbits, table);
return ptr;
}
inline PROTOBUF_ALWAYS_INLINE const char* TcParser::Error(
PROTOBUF_TC_PARAM_DECL) {
(void)data;
(void)ctx;
(void)ptr;
SyncHasbits(msg, hasbits, table);
return nullptr;
}
// On the fast path, a (matching) 1-byte tag already has the decoded value.
static uint32_t FastDecodeTag(uint8_t coded_tag) {
return coded_tag;
@ -875,8 +833,31 @@ PROTOBUF_NOINLINE const char* TcParser::SingularVarBigint(
}
const char* TcParser::FastV8S1(PROTOBUF_TC_PARAM_DECL) {
PROTOBUF_MUSTTAIL return SingularVarint<bool, uint8_t>(
PROTOBUF_TC_PARAM_PASS);
// Special case for a varint bool field with a tag of 1 byte:
// The coded_tag() field will actually contain the value too and we can check
// both at the same time.
auto coded_tag = data.coded_tag<uint16_t>();
if (PROTOBUF_PREDICT_TRUE(coded_tag == 0x0000 || coded_tag == 0x0100)) {
auto& field = RefAt<bool>(msg, data.offset());
// Note: we use `data.data` because Clang generates suboptimal code when
// using coded_tag.
// In x86_64 this uses the CH register to read the second byte out of
// `data`.
uint8_t value = data.data >> 8;
// The assume allows using a mov instead of test+setne.
PROTOBUF_ASSUME(value <= 1);
field = static_cast<bool>(value);
ptr += 2; // Consume the tag and the value.
hasbits |= (uint64_t{1} << data.hasbit_idx());
PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_PASS);
}
// If it didn't match above either the tag is wrong, or the value is encoded
// non-canonically.
// Jump to MiniParse as wrong tag is the most probable reason.
PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_PASS);
}
const char* TcParser::FastV8S2(PROTOBUF_TC_PARAM_DECL) {
PROTOBUF_MUSTTAIL return SingularVarint<bool, uint16_t>(

@ -251,7 +251,8 @@ class PROTOBUF_EXPORT Printer {
template <typename... Args>
void Print(const char* text, const Args&... args) {
std::map<std::string, std::string> vars;
PrintInternal(&vars, text, args...);
FillMap(&vars, args...);
Print(vars, text);
}
// Indent text by two spaces. After calling Indent(), two spaces will be
@ -299,18 +300,13 @@ class PROTOBUF_EXPORT Printer {
void Annotate(const char* begin_varname, const char* end_varname,
const std::string& file_path, const std::vector<int>& path);
// Base case
void PrintInternal(std::map<std::string, std::string>* vars,
const char* text) {
Print(*vars, text);
}
void FillMap(std::map<std::string, std::string>* vars) {}
template <typename... Args>
void PrintInternal(std::map<std::string, std::string>* vars, const char* text,
const char* key, const std::string& value,
const Args&... args) {
void FillMap(std::map<std::string, std::string>* vars, const std::string& key,
const std::string& value, const Args&... args) {
(*vars)[key] = value;
PrintInternal(vars, text, args...);
FillMap(vars, args...);
}
// Copy size worth of bytes from data to buffer_.

@ -333,6 +333,11 @@ inline size_t SpaceUsedInValues(const void*) { return 0; }
} // namespace internal
#ifdef PROTOBUF_FUTURE_MAP_PAIR_UPGRADE
// This is the class for Map's internal value_type.
template <typename Key, typename T>
using MapPair = std::pair<const Key, T>;
#else
// This is the class for Map's internal value_type. Instead of using
// std::pair as value_type, we use this class which provides us more control of
// its process of construction and destruction.
@ -363,6 +368,7 @@ struct PROTOBUF_ATTRIBUTE_STANDALONE_DEBUG MapPair {
friend class Arena;
friend class Map<Key, T>;
};
#endif
// Map is an associative container type used to store protobuf map
// fields. Each Map instance may or may not use a different hash function, a

@ -84,3 +84,5 @@ TEST(MapTest, Aligned8OnArena) { MapTest_Aligned<AlignedAs8, true>(); }
} // namespace internal
} // namespace protobuf
} // namespace google
#include <google/protobuf/port_undef.inc>

@ -810,6 +810,8 @@ TEST_F(MapImplTest, Emplace) {
m, UnorderedElementsAre(Pair(1, "one"), Pair(2, "two"), Pair(42, "aaa")));
}
#ifndef PROTOBUF_FUTURE_MAP_PAIR_UPGRADE
TEST_F(MapImplTest, EmplaceKeyOnly) {
using ::testing::Pair;
using ::testing::UnorderedElementsAre;
@ -824,6 +826,43 @@ TEST_F(MapImplTest, EmplaceKeyOnly) {
EXPECT_THAT(m, UnorderedElementsAre(Pair(1, ""), Pair(42, "")));
}
#else
TEST_F(MapImplTest, ValueTypeNoImplicitConversion) {
using vt = typename Map<const char*, int>::value_type;
EXPECT_FALSE((std::is_convertible<
vt, std::pair<std::string, std::vector<std::string>>>::value));
}
enum class ConstructorType {
kDefault,
kCopy,
kMove,
};
struct ConstructorTag {
ConstructorTag() : invoked_constructor(ConstructorType::kDefault) {}
ConstructorTag(const ConstructorTag&)
: invoked_constructor(ConstructorType::kCopy) {}
ConstructorTag(ConstructorTag&&)
: invoked_constructor(ConstructorType::kMove) {}
ConstructorType invoked_constructor;
};
TEST_F(MapImplTest, ValueTypeHasMoveConstructor) {
using vt = typename Map<ConstructorTag, ConstructorTag>::value_type;
ConstructorTag l, r;
vt pair(l, std::move(r));
EXPECT_EQ(pair.first.invoked_constructor, ConstructorType::kCopy);
EXPECT_EQ(pair.second.invoked_constructor, ConstructorType::kMove);
}
#endif // !PROTOBUF_FUTURE_MAP_PAIR_UPGRADE
struct CountedInstance {
CountedInstance() { ++num_created; }
CountedInstance(const CountedInstance&) : CountedInstance() {}

@ -214,7 +214,7 @@ uint64_t Message::GetInvariantPerBuild(uint64_t salt) {
}
namespace internal {
void* CreateSplitMessageGeneric(Arena* arena, void* default_split,
void* CreateSplitMessageGeneric(Arena* arena, const void* default_split,
size_t size) {
void* split =
(arena == nullptr) ? ::operator new(size) : arena->AllocateAligned(size);

@ -411,7 +411,8 @@ class PROTOBUF_EXPORT Message : public MessageLite {
namespace internal {
// Creates and returns an allocation for a split message.
void* CreateSplitMessageGeneric(Arena* arena, void* default_split, size_t size);
void* CreateSplitMessageGeneric(Arena* arena, const void* default_split,
size_t size);
// Forward-declare interfaces used to implement RepeatedFieldRef.
// These are protobuf internals that users shouldn't care about.

@ -1174,11 +1174,12 @@ TEST(MESSAGE_TEST_NAME, PreservesFloatingPointNegative0) {
std::signbit(out_message.optional_double()));
}
std::string EncodeEnumValue(int number, int value, int non_canonical_bytes) {
uint8_t buf[100];
uint8_t* p = buf;
p = internal::WireFormatLite::WriteEnumToArray(number, value, p);
// Adds `non_canonical_bytes` bytes to the varint representation at the tail of
// the buffer.
// `buf` points to the start of the buffer, `p` points to one-past-the-end.
// Returns the new one-past-the-end pointer.
uint8_t* AddNonCanonicalBytes(const uint8_t* buf, uint8_t* p,
int non_canonical_bytes) {
// varint can have a max of 10 bytes.
while (non_canonical_bytes-- > 0 && p - buf < 10) {
// Add a dummy byte at the end.
@ -1186,7 +1187,15 @@ std::string EncodeEnumValue(int number, int value, int non_canonical_bytes) {
p[0] = 0;
++p;
}
return p;
}
std::string EncodeEnumValue(int number, int value, int non_canonical_bytes) {
uint8_t buf[100];
uint8_t* p = buf;
p = internal::WireFormatLite::WriteEnumToArray(number, value, p);
p = AddNonCanonicalBytes(buf, p, non_canonical_bytes);
return std::string(buf, p);
}
@ -1201,6 +1210,16 @@ TEST(MESSAGE_TEST_NAME, TestEnumParsers) {
const auto other_field = EncodeOtherField();
// Encode a boolean field for many different cases and verify that it can be
// parsed as expected.
// There are:
// - optional/repeated/packed fields
// - field tags that encode in 1/2/3 bytes
// - canonical and non-canonical encodings of the varint
// - last vs not last field
// - label combinations to trigger different parsers: sequential, small
// sequential, non-validated.
constexpr int kInvalidValue = 0x900913;
auto* ref = obj.GetReflection();
auto* descriptor = obj.descriptor();
@ -1226,6 +1245,8 @@ TEST(MESSAGE_TEST_NAME, TestEnumParsers) {
auto encoded = EncodeEnumValue(field->number(), value_desc->number(),
non_canonical_bytes);
if (use_tail_field) {
// Make sure that fields after this one can be parsed too. ie test
// that the "next" jump is correct too.
encoded += other_field;
}
@ -1263,5 +1284,64 @@ TEST(MESSAGE_TEST_NAME, TestEnumParsers) {
}
}
std::string EncodeBoolValue(int number, bool value, int non_canonical_bytes) {
uint8_t buf[100];
uint8_t* p = buf;
p = internal::WireFormatLite::WriteBoolToArray(number, value, p);
p = AddNonCanonicalBytes(buf, p, non_canonical_bytes);
return std::string(buf, p);
}
TEST(MESSAGE_TEST_NAME, TestBoolParsers) {
UNITTEST::BoolParseTester obj;
const auto other_field = EncodeOtherField();
// Encode a boolean field for many different cases and verify that it can be
// parsed as expected.
// There are:
// - optional/repeated/packed fields
// - field tags that encode in 1/2/3 bytes
// - canonical and non-canonical encodings of the varint
// - last vs not last field
auto* ref = obj.GetReflection();
auto* descriptor = obj.descriptor();
for (bool use_tail_field : {false, true}) {
SCOPED_TRACE(use_tail_field);
for (int non_canonical_bytes = 0; non_canonical_bytes < 10;
++non_canonical_bytes) {
SCOPED_TRACE(non_canonical_bytes);
for (int i = 0; i < descriptor->field_count(); ++i) {
const auto* field = descriptor->field(i);
if (field->name() == "other_field") continue;
SCOPED_TRACE(field->full_name());
for (bool value : {false, true}) {
SCOPED_TRACE(value);
auto encoded =
EncodeBoolValue(field->number(), value, non_canonical_bytes);
if (use_tail_field) {
// Make sure that fields after this one can be parsed too. ie test
// that the "next" jump is correct too.
encoded += other_field;
}
EXPECT_TRUE(obj.ParseFromString(encoded));
if (field->is_repeated()) {
ASSERT_EQ(ref->FieldSize(obj, field), 1);
EXPECT_EQ(ref->GetRepeatedBool(obj, field, 0), value);
} else {
EXPECT_TRUE(ref->HasField(obj, field));
EXPECT_EQ(ref->GetBool(obj, field), value);
}
auto& unknown = ref->GetUnknownFields(obj);
ASSERT_EQ(unknown.field_count(), 0);
}
}
}
}
}
} // namespace protobuf
} // namespace google

@ -617,6 +617,7 @@ PROTOBUF_NODISCARD PROTOBUF_ALWAYS_INLINE constexpr T RotateLeft(
PROTOBUF_NODISCARD inline PROTOBUF_ALWAYS_INLINE uint64_t
RotRight7AndReplaceLowByte(uint64_t res, const char& byte) {
// TODO(b/239808098): remove the inline assembly
#if defined(__x86_64__) && defined(__GNUC__)
// This will only use one register for `res`.
// `byte` comes as a reference to allow the compiler to generate code like:

@ -181,11 +181,15 @@
// Future versions of protobuf will include breaking changes to some APIs.
// This macro can be set to enable these API changes ahead of time, so that
// user code can be updated before upgrading versions of protobuf.
// PROTOBUF_FUTURE_FINAL is used on classes that are historically not marked as
// final, but that may be marked final in future (breaking) releases.
#ifdef PROTOBUF_FUTURE_BREAKING_CHANGES
// Used on classes that are historically not marked as final.
// Used to upgrade google::protobuf::MapPair<K, V> to std::pair<const K, V>.
// Owner: mordberg@
#define PROTOBUF_FUTURE_MAP_PAIR_UPGRADE 1
// Used on classes that are historically not marked as final, but that may be
// marked final in future (breaking) releases.
// Owner: kfm@
#define PROTOBUF_FUTURE_FINAL final
@ -678,7 +682,8 @@
#error PROTOBUF_CONSTINIT was previously defined
#endif
#if defined(_MSC_VER)
// Lexan sets both MSV_VER and clang, so handle it with the clang path.
#if defined(_MSC_VER) && !defined(__clang__)
// MSVC 17 currently seems to raise an error about constant-initialized pointers.
# if PROTOBUF_MSC_VER_MIN(1930)
# define PROTOBUF_CONSTINIT
@ -827,11 +832,22 @@
#define PROTOBUF_TAIL_CALL_TABLE_PARSER_ENABLED 1
#endif
#define PROTOBUF_TC_PARAM_DECL \
::google::protobuf::MessageLite *msg, const char *ptr, \
::google::protobuf::internal::ParseContext *ctx, \
const ::google::protobuf::internal::TcParseTableBase *table, \
uint64_t hasbits, ::google::protobuf::internal::TcFieldData data
// Note that this is performance sensitive: changing the parameters will change
// the registers used by the ABI calling convention, which subsequently affects
// register selection logic inside the function.
// Arguments `msg`, `ptr` and `ctx` are the 1st/2nd/3rd argument to match the
// signature of ParseLoop.
//
// Note for x86_64: `data` must be the third or fourth argument for performance
// reasons. In order to efficiently read the second byte of `data` we need it to
// be passed in RDX or RCX.
#define PROTOBUF_TC_PARAM_DECL \
::google::protobuf::MessageLite *msg, const char *ptr, \
::google::protobuf::internal::ParseContext *ctx, \
::google::protobuf::internal::TcFieldData data, \
const ::google::protobuf::internal::TcParseTableBase *table, uint64_t hasbits
// PROTOBUF_TC_PARAM_PASS passes values to match PROTOBUF_TC_PARAM_DECL.
#define PROTOBUF_TC_PARAM_PASS msg, ptr, ctx, data, table, hasbits
#ifdef PROTOBUF_UNUSED
#error PROTOBUF_UNUSED was previously defined
@ -953,6 +969,21 @@
#pragma warning(disable: 4125)
#endif
#if PROTOBUF_ENABLE_DEBUG_LOGGING_MAY_LEAK_PII
#define PROTOBUF_DEBUG true
#else
#define PROTOBUF_DEBUG false
#endif
// This `for` allows us to condition the `GOOGLE_LOG` on the define above, so that
// code can write `PROTOBUF_DLOG(INFO) << ...;` and have it turned off when
// debug logging is off.
//
// This is a `for`, not and `if`, to avoid it accidentally chaining with an
// `else` below it.
#define PROTOBUF_DLOG(x) \
for (bool b = PROTOBUF_DEBUG; b; b = false) GOOGLE_LOG(x)
// We don't want code outside port_def doing complex testing, so
// remove our portable condition test macros to nudge folks away from
// using it themselves.

@ -108,9 +108,12 @@
#undef PROTOBUF_LOCKS_EXCLUDED
#undef PROTOBUF_NO_THREAD_SAFETY_ANALYSIS
#undef PROTOBUF_GUARDED_BY
#undef PROTOBUF_DEBUG
#undef PROTOBUF_DLOG
#ifdef PROTOBUF_FUTURE_BREAKING_CHANGES
#undef PROTOBUF_FUTURE_BREAKING_CHANGES
#undef PROTOBUF_FUTURE_MAP_PAIR_UPGRADE
#undef PROTOBUF_FUTURE_REMOVE_DEFAULT_FIELD_COMPARATOR
#undef PROTOBUF_FUTURE_REMOVE_CLEARED_API
#endif

@ -233,12 +233,6 @@ class RepeatedField final {
// copies data between each other.
void Swap(RepeatedField* other);
// Swaps entire contents with "other". Should be called only if the caller can
// guarantee that both repeated fields are on the same arena or are on the
// heap. Swapping between different arenas is disallowed and caught by a
// GOOGLE_DCHECK (see API docs for details).
void UnsafeArenaSwap(RepeatedField* other);
// Swaps two elements.
void SwapElements(int index1, int index2);
@ -321,6 +315,12 @@ class RepeatedField final {
: rep()->arena;
}
// Swaps entire contents with "other". Should be called only if the caller can
// guarantee that both repeated fields are on the same arena or are on the
// heap. Swapping between different arenas is disallowed and caught by a
// GOOGLE_DCHECK (see API docs for details).
void UnsafeArenaSwap(RepeatedField* other);
static constexpr int kInitialSize = 0;
// A note on the representation here (see also comment below for
// RepeatedPtrFieldBase's struct Rep):

@ -1527,3 +1527,20 @@ message EnumParseTester {
optional int32 other_field = 99;
};
// This message contains different kind of bool fields to exercise the different
// parsers in table-drived.
message BoolParseTester {
optional bool optional_bool_lowfield = 1;
optional bool optional_bool_midfield = 1001;
optional bool optional_bool_hifield = 1000001;
repeated bool repeated_bool_lowfield = 2;
repeated bool repeated_bool_midfield = 1002;
repeated bool repeated_bool_hifield = 1000002;
repeated bool packed_bool_lowfield = 3 [packed = true];
repeated bool packed_bool_midfield = 1003 [packed = true];
repeated bool packed_bool_hifield = 1000003 [packed = true];
// An arbitrary field we can append to to break the runs of repeated fields.
optional int32 other_field = 99;
};

@ -33,6 +33,7 @@
#ifndef GOOGLE_PROTOBUF_UTIL_JSON_UTIL_H__
#define GOOGLE_PROTOBUF_UTIL_JSON_UTIL_H__
#include <google/protobuf/stubs/bytestream.h>
#include <google/protobuf/stubs/status.h>
#include <google/protobuf/stubs/strutil.h>

@ -42,6 +42,7 @@
#include <google/protobuf/struct.pb.h>
#include <google/protobuf/timestamp.pb.h>
#include <google/protobuf/wrappers.pb.h>
#include <google/protobuf/unittest.pb.h>
#include <gmock/gmock.h>
#include <gtest/gtest.h>
#include <google/protobuf/stubs/status.h>
@ -155,7 +156,7 @@ class JsonTest : public testing::TestWithParam<Codec> {
util::StatusOr<Proto> ToProto(StringPiece json,
JsonParseOptions options = {}) {
Proto proto;
RETURN_IF_ERROR(JsonStringToMessage(json, &proto, options));
RETURN_IF_ERROR(ToProto(proto, json, options));
return proto;
}
@ -270,13 +271,26 @@ TEST_P(JsonTest, TestDefaultValues) {
R"("defaultString":"hello","defaultBytes":"d29ybGQ=","defaultNestedEnum":"BAR",)"
R"("defaultForeignEnum":"FOREIGN_BAR","defaultImportEnum":"IMPORT_BAR",)"
R"("defaultStringPiece":"abc","defaultCord":"123"})"));
// The ESF parser actually gets this wrong, and serializes floats whose
// default value is non-finite as 0. We make sure to reproduce this bug.
EXPECT_THAT(
ToJson(protobuf_unittest::TestExtremeDefaultValues(), options),
IsOkAndHolds(
R"({"escapedBytes":"XDAwMFwwMDFcMDA3XDAxMFwwMTRcblxyXHRcMDEzXFxcJ1wiXDM3Ng==")"
R"(,"largeUint32":4294967295,"largeUint64":"18446744073709551615",)"
R"("smallInt32":-2147483647,"smallInt64":"-9223372036854775807")"
R"(,"reallySmallInt32":-2147483648,"reallySmallInt64":"-9223372036854775808",)"
R"("utf8String":"","zeroFloat":0,"oneFloat":1,"smallFloat":1.5,)"
R"("negativeOneFloat":-1,"negativeFloat":-1.5,"largeFloat":2e+08,)"
R"("smallNegativeFloat":-8e-28,"infDouble":0,"negInfDouble":0)"
R"(,"nanDouble":0,"infFloat":0,"negInfFloat":0,"nanFloat":0)"
R"(,"cppTrigraph":"? ? ?? ?? ??? ??/ ??-","stringWithZero":"hel\u0000lo")"
R"(,"bytesWithZero":"d29yXDAwMGxk","stringPieceWithZero":"ab\u0000c")"
R"(,"cordWithZero":"12\u00003","replacementString":"${unknown}"})"));
}
TEST_P(JsonTest, TestPreserveProtoFieldNames) {
if (GetParam() == Codec::kResolver) {
GTEST_SKIP();
}
TestMessage m;
m.mutable_message_value();
@ -286,6 +300,22 @@ TEST_P(JsonTest, TestPreserveProtoFieldNames) {
}
TEST_P(JsonTest, Camels) {
protobuf_unittest::TestCamelCaseFieldNames m;
m.set_stringfield("sTRINGfIELD");
EXPECT_THAT(ToJson(m), IsOkAndHolds(R"({"StringField":"sTRINGfIELD"})"));
}
TEST_P(JsonTest, EvilString) {
auto m = ToProto<TestMessage>(R"json(
{"string_value": ")json"
"\n\r\b\f\1\2\3"
"\"}");
ASSERT_OK(m);
EXPECT_EQ(m->string_value(), "\n\r\b\f\1\2\3");
}
TEST_P(JsonTest, TestAlwaysPrintEnumsAsInts) {
TestMessage orig;
orig.set_enum_value(proto3::BAR);
@ -378,6 +408,7 @@ TEST_P(JsonTest, ParseMessage) {
"repeatedEnumValue": [1, "FOO"],
"repeatedMessageValue": [
{"value": 40},
{},
{"value": 96}
]
}
@ -406,9 +437,10 @@ TEST_P(JsonTest, ParseMessage) {
EXPECT_THAT(m->repeated_string_value(), ElementsAre("foo", "bar ", ""));
EXPECT_THAT(m->repeated_enum_value(), ElementsAre(proto3::BAR, proto3::FOO));
ASSERT_THAT(m->repeated_message_value(), SizeIs(2));
ASSERT_THAT(m->repeated_message_value(), SizeIs(3));
EXPECT_EQ(m->repeated_message_value(0).value(), 40);
EXPECT_EQ(m->repeated_message_value(1).value(), 96);
EXPECT_EQ(m->repeated_message_value(1).value(), 0);
EXPECT_EQ(m->repeated_message_value(2).value(), 96);
EXPECT_THAT(
ToJson(*m),
@ -419,7 +451,7 @@ TEST_P(JsonTest, ParseMessage) {
R"("messageValue":{"value":2048},"repeatedBoolValue":[true],"repeatedInt32Value":[0,-42])"
R"(,"repeatedUint64Value":["1","2"],"repeatedDoubleValue":[1.5,-2],)"
R"("repeatedStringValue":["foo","bar ",""],"repeatedEnumValue":["BAR","FOO"],)"
R"("repeatedMessageValue":[{"value":40},{"value":96}]})"));
R"("repeatedMessageValue":[{"value":40},{},{"value":96}]})"));
}
TEST_P(JsonTest, CurseOfAtob) {
@ -434,6 +466,15 @@ TEST_P(JsonTest, CurseOfAtob) {
false, true));
}
TEST_P(JsonTest, FloatPrecision) {
google::protobuf::Value v;
v.mutable_list_value()->add_values()->set_number_value(0.9900000095367432);
v.mutable_list_value()->add_values()->set_number_value(0.8799999952316284);
EXPECT_THAT(ToJson(v),
IsOkAndHolds("[0.99000000953674316,0.87999999523162842]"));
}
TEST_P(JsonTest, ParseLegacySingleRepeatedField) {
auto m = ToProto<TestMessage>(R"json({
"repeatedInt32Value": 1997,
@ -731,6 +772,24 @@ TEST_P(JsonTest, TestFlatList) {
)json");
ASSERT_OK(m);
EXPECT_THAT(m->repeated_int32_value(), ElementsAre(5, 6));
// The above flatteing behavior is supressed for google::protobuf::ListValue.
auto m2 = ToProto<google::protobuf::Value>(R"json(
{
"repeatedInt32Value": [[[5]], [6]]
}
)json");
ASSERT_OK(m2);
auto fields = m2->struct_value().fields();
auto list = fields["repeatedInt32Value"].list_value();
EXPECT_EQ(list.values(0)
.list_value()
.values(0)
.list_value()
.values(0)
.number_value(),
5);
EXPECT_EQ(list.values(1).list_value().values(0).number_value(), 6);
}
TEST_P(JsonTest, ParseWrappers) {
@ -1065,13 +1124,40 @@ TEST_P(JsonTest, TestLegalNullsInArray) {
ASSERT_THAT(m2->repeated_value(), SizeIs(1));
EXPECT_TRUE(m2->repeated_value(0).has_null_value());
m2->Clear();
m2->mutable_repeated_value(); // Materialize an empty singular Value.
m2->add_repeated_value();
m2->add_repeated_value()->set_string_value("solitude");
m2->add_repeated_value();
EXPECT_THAT(ToJson(*m2), IsOkAndHolds(R"({"repeatedValue":["solitude"]})"));
}
TEST_P(JsonTest, ListList) {
auto m = ToProto<proto3::TestListValue>(R"json({
"repeated_value": [["ayy", "lmao"]]
})json");
ASSERT_OK(m);
EXPECT_EQ(m->repeated_value(0).values(0).string_value(), "ayy");
EXPECT_EQ(m->repeated_value(0).values(1).string_value(), "lmao");
m = ToProto<proto3::TestListValue>(R"json({
"repeated_value": [{
"values": ["ayy", "lmao"]
}]
})json");
ASSERT_OK(m);
EXPECT_EQ(m->repeated_value(0).values(0).string_value(), "ayy");
EXPECT_EQ(m->repeated_value(0).values(1).string_value(), "lmao");
}
TEST_P(JsonTest, DISABLED_HtmlEscape) {
TEST_P(JsonTest, HtmlEscape) {
TestMessage m;
m.set_string_value("</script>");
EXPECT_THAT(ToJson(m),
IsOkAndHolds("{\"stringValue\":\"\\u003c/script\\u003e\"}"));
IsOkAndHolds(R"({"stringValue":"\u003c/script\u003e"})"));
}
} // namespace

@ -242,17 +242,17 @@ class PROTOBUF_EXPORT MessageDifferencer {
// Reports that a field has been added into Message2.
virtual void ReportAdded(const Message& message1, const Message& message2,
const std::vector<SpecificField>& field_path) = 0;
const std::vector<SpecificField>& field_path) {}
// Reports that a field has been deleted from Message1.
virtual void ReportDeleted(
const Message& message1, const Message& message2,
const std::vector<SpecificField>& field_path) = 0;
const std::vector<SpecificField>& field_path) {}
// Reports that the value of a field has been modified.
virtual void ReportModified(
const Message& message1, const Message& message2,
const std::vector<SpecificField>& field_path) = 0;
const std::vector<SpecificField>& field_path) {}
// Reports that a repeated field has been moved to another location. This
// only applies when using TreatAsSet or TreatAsMap() -- see below. Also

@ -53,7 +53,6 @@
#include <google/protobuf/message_lite.h>
#include <google/protobuf/repeated_field.h>
// Do UTF-8 validation on string type in Debug build only
#ifndef NDEBUG
#define GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED
#endif

@ -1 +1 @@
Subproject commit 5b7683f49e1e9223cf9927b24f6fd3d6bd82e3f8
Subproject commit 0baacde3618ca617da95375e0af13ce1baadea47
Loading…
Cancel
Save