Merge pull request #10388 from mkruskal-google/sync-stage
Integrate from Piper for C++, Java, and Pythonpull/10389/head
commit
13b3647016
52 changed files with 2119 additions and 1004 deletions
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,779 @@ |
||||
// Protocol Buffers - Google's data interchange format
|
||||
// Copyright 2008 Google Inc. All rights reserved.
|
||||
// https://developers.google.com/protocol-buffers/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following disclaimer
|
||||
// in the documentation and/or other materials provided with the
|
||||
// distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#include <google/protobuf/generated_message_tctable_gen.h> |
||||
|
||||
#include <algorithm> |
||||
#include <limits> |
||||
#include <string> |
||||
#include <utility> |
||||
#include <vector> |
||||
|
||||
#include <google/protobuf/descriptor.h> |
||||
#include <google/protobuf/descriptor.pb.h> |
||||
#include <google/protobuf/generated_message_tctable_decl.h> |
||||
#include <google/protobuf/generated_message_tctable_impl.h> |
||||
#include <google/protobuf/wire_format.h> |
||||
|
||||
// Must come last:
|
||||
#include <google/protobuf/port_def.inc> |
||||
|
||||
namespace google { |
||||
namespace protobuf { |
||||
namespace internal { |
||||
|
||||
namespace { |
||||
|
||||
bool GetEnumValidationRange(const EnumDescriptor* enum_type, int16_t& start, |
||||
uint16_t& size) { |
||||
GOOGLE_CHECK_GT(enum_type->value_count(), 0) << enum_type->DebugString(); |
||||
|
||||
// Check if the enum values are a single, contiguous range.
|
||||
std::vector<int> enum_values; |
||||
for (int i = 0, N = static_cast<int>(enum_type->value_count()); i < N; ++i) { |
||||
enum_values.push_back(enum_type->value(i)->number()); |
||||
} |
||||
auto values_begin = enum_values.begin(); |
||||
auto values_end = enum_values.end(); |
||||
std::sort(values_begin, values_end); |
||||
enum_values.erase(std::unique(values_begin, values_end), values_end); |
||||
|
||||
if (std::numeric_limits<int16_t>::min() <= enum_values[0] && |
||||
enum_values[0] <= std::numeric_limits<int16_t>::max() && |
||||
enum_values.size() <= std::numeric_limits<uint16_t>::max() && |
||||
static_cast<int>(enum_values[0] + enum_values.size() - 1) == |
||||
enum_values.back()) { |
||||
start = static_cast<int16_t>(enum_values[0]); |
||||
size = static_cast<uint16_t>(enum_values.size()); |
||||
return true; |
||||
} else { |
||||
return false; |
||||
} |
||||
} |
||||
|
||||
void PopulateFastFieldEntry(const TailCallTableInfo::FieldEntryInfo& entry, |
||||
const TailCallTableInfo::PerFieldOptions& options, |
||||
TailCallTableInfo::FastFieldInfo& info) { |
||||
const FieldDescriptor* field = entry.field; |
||||
std::string name = "::_pbi::TcParser::Fast"; |
||||
uint8_t aux_idx = static_cast<uint8_t>(entry.aux_idx); |
||||
|
||||
static const char* kPrefix[] = { |
||||
nullptr, // 0
|
||||
"F64", // TYPE_DOUBLE = 1,
|
||||
"F32", // TYPE_FLOAT = 2,
|
||||
"V64", // TYPE_INT64 = 3,
|
||||
"V64", // TYPE_UINT64 = 4,
|
||||
"V32", // TYPE_INT32 = 5,
|
||||
"F64", // TYPE_FIXED64 = 6,
|
||||
"F32", // TYPE_FIXED32 = 7,
|
||||
"V8", // TYPE_BOOL = 8,
|
||||
"", // TYPE_STRING = 9,
|
||||
"G", // TYPE_GROUP = 10,
|
||||
"M", // TYPE_MESSAGE = 11,
|
||||
"B", // TYPE_BYTES = 12,
|
||||
"V32", // TYPE_UINT32 = 13,
|
||||
"", // TYPE_ENUM = 14,
|
||||
"F32", // TYPE_SFIXED32 = 15,
|
||||
"F64", // TYPE_SFIXED64 = 16,
|
||||
"Z32", // TYPE_SINT32 = 17,
|
||||
"Z64", // TYPE_SINT64 = 18,
|
||||
}; |
||||
name.append(kPrefix[field->type()]); |
||||
|
||||
if (field->type() == field->TYPE_ENUM) { |
||||
// Enums are handled as:
|
||||
// - V32 for open enums
|
||||
// - Er (and Er0/Er1) for sequential enums
|
||||
// - Ev for the rest
|
||||
if (cpp::HasPreservingUnknownEnumSemantics(field)) { |
||||
name.append("V32"); |
||||
} else if (field->is_repeated() && field->is_packed()) { |
||||
GOOGLE_LOG(DFATAL) << "Enum validation not handled: " << field->DebugString(); |
||||
return; |
||||
} else { |
||||
int16_t start; |
||||
uint16_t size; |
||||
if (GetEnumValidationRange(field->enum_type(), start, size)) { |
||||
name.append("Er"); |
||||
int max_value = start + size - 1; |
||||
if (max_value <= 127 && (start == 0 || start == 1)) { |
||||
name.append(1, '0' + start); |
||||
aux_idx = max_value; |
||||
} |
||||
} else { |
||||
name.append("Ev"); |
||||
} |
||||
} |
||||
} |
||||
if (field->type() == field->TYPE_STRING) { |
||||
switch (internal::cpp::GetUtf8CheckMode(field, options.is_lite)) { |
||||
case internal::cpp::Utf8CheckMode::kStrict: |
||||
name.append("U"); |
||||
break; |
||||
case internal::cpp::Utf8CheckMode::kVerify: |
||||
name.append("S"); |
||||
break; |
||||
case internal::cpp::Utf8CheckMode::kNone: |
||||
name.append("B"); |
||||
break; |
||||
} |
||||
} |
||||
if (field->type() == field->TYPE_STRING || |
||||
field->type() == field->TYPE_BYTES) { |
||||
if (options.is_string_inlined) { |
||||
name.append("i"); |
||||
GOOGLE_CHECK(!field->is_repeated()); |
||||
aux_idx = static_cast<uint8_t>(entry.inlined_string_idx); |
||||
} |
||||
} |
||||
if (field->type() == field->TYPE_MESSAGE || |
||||
field->type() == field->TYPE_GROUP) { |
||||
name.append(options.use_direct_tcparser_table ? "t" : "d"); |
||||
} |
||||
|
||||
// The field implementation functions are prefixed by cardinality:
|
||||
// `S` for optional or implicit fields.
|
||||
// `R` for non-packed repeated.
|
||||
// `P` for packed repeated.
|
||||
name.append(field->is_packed() ? "P" |
||||
: field->is_repeated() ? "R" |
||||
: field->real_containing_oneof() ? "O" |
||||
: "S"); |
||||
|
||||
// Append the tag length. Fast parsing only handles 1- or 2-byte tags.
|
||||
name.append(field->number() < 16 ? "1" : "2"); |
||||
|
||||
info.func_name = std::move(name); |
||||
info.aux_idx = aux_idx; |
||||
} |
||||
|
||||
bool IsFieldEligibleForFastParsing( |
||||
const TailCallTableInfo::FieldEntryInfo& entry, |
||||
const TailCallTableInfo::OptionProvider& option_provider) { |
||||
const auto* field = entry.field; |
||||
const auto options = option_provider.GetForField(field); |
||||
// Map, oneof, weak, and lazy fields are not handled on the fast path.
|
||||
if (field->is_map() || field->real_containing_oneof() || |
||||
field->options().weak() || options.is_implicitly_weak || |
||||
options.is_lazy || options.should_split) { |
||||
return false; |
||||
} |
||||
|
||||
// We will check for a valid auxiliary index range later. However, we might
|
||||
// want to change the value we check for inlined string fields.
|
||||
int aux_idx = entry.aux_idx; |
||||
|
||||
switch (field->type()) { |
||||
case FieldDescriptor::TYPE_ENUM: |
||||
// If enum values are not validated at parse time, then this field can be
|
||||
// handled on the fast path like an int32.
|
||||
if (cpp::HasPreservingUnknownEnumSemantics(field)) { |
||||
break; |
||||
} |
||||
if (field->is_repeated() && field->is_packed()) { |
||||
return false; |
||||
} |
||||
break; |
||||
|
||||
// Some bytes fields can be handled on fast path.
|
||||
case FieldDescriptor::TYPE_STRING: |
||||
case FieldDescriptor::TYPE_BYTES: |
||||
if (field->options().ctype() != FieldOptions::STRING) { |
||||
return false; |
||||
} |
||||
if (options.is_string_inlined) { |
||||
GOOGLE_CHECK(!field->is_repeated()); |
||||
// For inlined strings, the donation state index is stored in the
|
||||
// `aux_idx` field of the fast parsing info. We need to check the range
|
||||
// of that value instead of the auxiliary index.
|
||||
aux_idx = entry.inlined_string_idx; |
||||
} |
||||
break; |
||||
|
||||
default: |
||||
break; |
||||
} |
||||
|
||||
if (cpp::HasHasbit(field)) { |
||||
// The tailcall parser can only update the first 32 hasbits. Fields with
|
||||
// has-bits beyond the first 32 are handled by mini parsing/fallback.
|
||||
GOOGLE_CHECK_GE(entry.hasbit_idx, 0) << field->DebugString(); |
||||
if (entry.hasbit_idx >= 32) return false; |
||||
} |
||||
|
||||
// If the field needs auxiliary data, then the aux index is needed. This
|
||||
// must fit in a uint8_t.
|
||||
if (aux_idx > std::numeric_limits<uint8_t>::max()) { |
||||
return false; |
||||
} |
||||
|
||||
// The largest tag that can be read by the tailcall parser is two bytes
|
||||
// when varint-coded. This allows 14 bits for the numeric tag value:
|
||||
// byte 0 byte 1
|
||||
// 1nnnnttt 0nnnnnnn
|
||||
// ^^^^^^^ ^^^^^^^
|
||||
if (field->number() >= 1 << 11) return false; |
||||
|
||||
return true; |
||||
} |
||||
|
||||
std::vector<TailCallTableInfo::FastFieldInfo> SplitFastFieldsForSize( |
||||
const std::vector<TailCallTableInfo::FieldEntryInfo>& field_entries, |
||||
int table_size_log2, |
||||
const TailCallTableInfo::OptionProvider& option_provider) { |
||||
std::vector<TailCallTableInfo::FastFieldInfo> result(1 << table_size_log2); |
||||
const uint32_t idx_mask = static_cast<uint32_t>(result.size() - 1); |
||||
|
||||
for (const auto& entry : field_entries) { |
||||
if (!IsFieldEligibleForFastParsing(entry, option_provider)) { |
||||
continue; |
||||
} |
||||
|
||||
const auto* field = entry.field; |
||||
const auto options = option_provider.GetForField(field); |
||||
uint32_t tag = WireFormat::MakeTag(field); |
||||
|
||||
// Construct the varint-coded tag. If it is more than 7 bits, we need to
|
||||
// shift the high bits and add a continue bit.
|
||||
if (uint32_t hibits = tag & 0xFFFFFF80) { |
||||
tag = tag + hibits + 128; // tag = lobits + 2*hibits + 128
|
||||
} |
||||
|
||||
// The field index is determined by the low bits of the field number, where
|
||||
// the table size determines the width of the mask. The largest table
|
||||
// supported is 32 entries. The parse loop uses these bits directly, so that
|
||||
// the dispatch does not require arithmetic:
|
||||
// byte 0 byte 1
|
||||
// tag: 1nnnnttt 0nnnnnnn
|
||||
// ^^^^^
|
||||
// idx (table_size_log2=5)
|
||||
// This means that any field number that does not fit in the lower 4 bits
|
||||
// will always have the top bit of its table index asserted.
|
||||
const uint32_t fast_idx = (tag >> 3) & idx_mask; |
||||
|
||||
TailCallTableInfo::FastFieldInfo& info = result[fast_idx]; |
||||
if (info.field != nullptr) { |
||||
// This field entry is already filled.
|
||||
continue; |
||||
} |
||||
|
||||
// Fill in this field's entry:
|
||||
GOOGLE_CHECK(info.func_name.empty()) << info.func_name; |
||||
PopulateFastFieldEntry(entry, options, info); |
||||
info.field = field; |
||||
info.coded_tag = tag; |
||||
// If this field does not have presence, then it can set an out-of-bounds
|
||||
// bit (tailcall parsing uses a uint64_t for hasbits, but only stores 32).
|
||||
info.hasbit_idx = cpp::HasHasbit(field) ? entry.hasbit_idx : 63; |
||||
} |
||||
return result; |
||||
} |
||||
|
||||
// Filter out fields that will be handled by mini parsing.
|
||||
std::vector<const FieldDescriptor*> FilterMiniParsedFields( |
||||
const std::vector<const FieldDescriptor*>& fields, |
||||
const TailCallTableInfo::OptionProvider& option_provider |
||||
) { |
||||
std::vector<const FieldDescriptor*> generated_fallback_fields; |
||||
|
||||
for (const auto* field : fields) { |
||||
auto options = option_provider.GetForField(field); |
||||
|
||||
bool handled = false; |
||||
switch (field->type()) { |
||||
case FieldDescriptor::TYPE_DOUBLE: |
||||
case FieldDescriptor::TYPE_FLOAT: |
||||
case FieldDescriptor::TYPE_FIXED32: |
||||
case FieldDescriptor::TYPE_SFIXED32: |
||||
case FieldDescriptor::TYPE_FIXED64: |
||||
case FieldDescriptor::TYPE_SFIXED64: |
||||
case FieldDescriptor::TYPE_BOOL: |
||||
case FieldDescriptor::TYPE_UINT32: |
||||
case FieldDescriptor::TYPE_SINT32: |
||||
case FieldDescriptor::TYPE_INT32: |
||||
case FieldDescriptor::TYPE_UINT64: |
||||
case FieldDescriptor::TYPE_SINT64: |
||||
case FieldDescriptor::TYPE_INT64: |
||||
// These are handled by MiniParse, so we don't need any generated
|
||||
// fallback code.
|
||||
handled = true; |
||||
break; |
||||
|
||||
case FieldDescriptor::TYPE_ENUM: |
||||
if (field->is_repeated() && |
||||
!cpp::HasPreservingUnknownEnumSemantics(field)) { |
||||
// TODO(b/206890171): handle packed repeated closed enums
|
||||
// Non-packed repeated can be handled using tables, but we still
|
||||
// need to generate fallback code for all repeated enums in order to
|
||||
// handle packed encoding. This is because of the lite/full split
|
||||
// when handling invalid enum values in a packed field.
|
||||
handled = false; |
||||
} else { |
||||
handled = true; |
||||
} |
||||
break; |
||||
|
||||
case FieldDescriptor::TYPE_BYTES: |
||||
case FieldDescriptor::TYPE_STRING: |
||||
if (options.is_string_inlined) { |
||||
// TODO(b/198211897): support InilnedStringField.
|
||||
handled = false; |
||||
} else { |
||||
handled = true; |
||||
} |
||||
break; |
||||
|
||||
case FieldDescriptor::TYPE_MESSAGE: |
||||
case FieldDescriptor::TYPE_GROUP: |
||||
// TODO(b/210762816): support remaining field types.
|
||||
if (field->is_map() || field->options().weak() || |
||||
options.is_implicitly_weak || options.is_lazy) { |
||||
handled = false; |
||||
} else { |
||||
handled = true; |
||||
} |
||||
break; |
||||
|
||||
default: |
||||
handled = false; |
||||
break; |
||||
} |
||||
if (!handled) generated_fallback_fields.push_back(field); |
||||
} |
||||
|
||||
return generated_fallback_fields; |
||||
} |
||||
|
||||
std::vector<uint8_t> GenerateFieldNames( |
||||
const Descriptor* descriptor, |
||||
const std::vector<const FieldDescriptor*>& fields) { |
||||
static constexpr int kMaxNameLength = 255; |
||||
std::vector<uint8_t> out; |
||||
// First, we output the size of each string, as an unsigned byte. The first
|
||||
// string is the message name.
|
||||
int count = 1; |
||||
out.push_back(std::min(static_cast<int>(descriptor->full_name().size()), |
||||
kMaxNameLength)); |
||||
for (const auto* field : fields) { |
||||
out.push_back(field->name().size()); |
||||
++count; |
||||
} |
||||
while (count & 7) { // align to an 8-byte boundary
|
||||
out.push_back(0); |
||||
++count; |
||||
} |
||||
// The message name is stored at the beginning of the string
|
||||
std::string message_name = descriptor->full_name(); |
||||
if (message_name.size() > kMaxNameLength) { |
||||
static constexpr int kNameHalfLength = (kMaxNameLength - 3) / 2; |
||||
message_name = StrCat( |
||||
message_name.substr(0, kNameHalfLength), "...", |
||||
message_name.substr(message_name.size() - kNameHalfLength)); |
||||
} |
||||
out.insert(out.end(), message_name.begin(), message_name.end()); |
||||
// Then we output the actual field names
|
||||
for (const auto* field : fields) { |
||||
out.insert(out.end(), field->name().begin(), field->name().end()); |
||||
} |
||||
|
||||
return out; |
||||
} |
||||
|
||||
TailCallTableInfo::NumToEntryTable MakeNumToEntryTable( |
||||
const std::vector<const FieldDescriptor*>& field_descriptors) { |
||||
TailCallTableInfo::NumToEntryTable num_to_entry_table; |
||||
num_to_entry_table.skipmap32 = static_cast<uint32_t>(-1); |
||||
|
||||
// skip_entry_block is the current block of SkipEntries that we're
|
||||
// appending to. cur_block_first_fnum is the number of the first
|
||||
// field represented by the block.
|
||||
uint16_t field_entry_index = 0; |
||||
uint16_t N = field_descriptors.size(); |
||||
// First, handle field numbers 1-32, which affect only the initial
|
||||
// skipmap32 and don't generate additional skip-entry blocks.
|
||||
for (; field_entry_index != N; ++field_entry_index) { |
||||
auto* field_descriptor = field_descriptors[field_entry_index]; |
||||
if (field_descriptor->number() > 32) break; |
||||
auto skipmap32_index = field_descriptor->number() - 1; |
||||
num_to_entry_table.skipmap32 -= 1 << skipmap32_index; |
||||
} |
||||
// If all the field numbers were less than or equal to 32, we will have
|
||||
// no further entries to process, and we are already done.
|
||||
if (field_entry_index == N) return num_to_entry_table; |
||||
|
||||
TailCallTableInfo::SkipEntryBlock* block = nullptr; |
||||
bool start_new_block = true; |
||||
// To determine sparseness, track the field number corresponding to
|
||||
// the start of the most recent skip entry.
|
||||
uint32_t last_skip_entry_start = 0; |
||||
for (; field_entry_index != N; ++field_entry_index) { |
||||
auto* field_descriptor = field_descriptors[field_entry_index]; |
||||
uint32_t fnum = static_cast<uint32_t>(field_descriptor->number()); |
||||
GOOGLE_CHECK_GT(fnum, last_skip_entry_start); |
||||
if (start_new_block == false) { |
||||
// If the next field number is within 15 of the last_skip_entry_start, we
|
||||
// continue writing just to that entry. If it's between 16 and 31 more,
|
||||
// then we just extend the current block by one. If it's more than 31
|
||||
// more, we have to add empty skip entries in order to continue using the
|
||||
// existing block. Obviously it's just 32 more, it doesn't make sense to
|
||||
// start a whole new block, since new blocks mean having to write out
|
||||
// their starting field number, which is 32 bits, as well as the size of
|
||||
// the additional block, which is 16... while an empty SkipEntry16 only
|
||||
// costs 32 bits. So if it was 48 more, it's a slight space win; we save
|
||||
// 16 bits, but probably at the cost of slower run time. We're choosing
|
||||
// 96 for now.
|
||||
if (fnum - last_skip_entry_start > 96) start_new_block = true; |
||||
} |
||||
if (start_new_block) { |
||||
num_to_entry_table.blocks.push_back({fnum}); |
||||
block = &num_to_entry_table.blocks.back(); |
||||
start_new_block = false; |
||||
} |
||||
|
||||
auto skip_entry_num = (fnum - block->first_fnum) / 16; |
||||
auto skip_entry_index = (fnum - block->first_fnum) % 16; |
||||
while (skip_entry_num >= block->entries.size()) |
||||
block->entries.push_back({0xFFFF, field_entry_index}); |
||||
block->entries[skip_entry_num].skipmap -= 1 << (skip_entry_index); |
||||
|
||||
last_skip_entry_start = fnum - skip_entry_index; |
||||
} |
||||
return num_to_entry_table; |
||||
} |
||||
|
||||
uint16_t MakeTypeCardForField( |
||||
const FieldDescriptor* field, |
||||
const TailCallTableInfo::PerFieldOptions& options) { |
||||
uint16_t type_card; |
||||
namespace fl = internal::field_layout; |
||||
if (internal::cpp::HasHasbit(field)) { |
||||
type_card = fl::kFcOptional; |
||||
} else if (field->is_repeated()) { |
||||
type_card = fl::kFcRepeated; |
||||
} else if (field->real_containing_oneof()) { |
||||
type_card = fl::kFcOneof; |
||||
} else { |
||||
type_card = fl::kFcSingular; |
||||
} |
||||
|
||||
// The rest of the type uses convenience aliases:
|
||||
switch (field->type()) { |
||||
case FieldDescriptor::TYPE_DOUBLE: |
||||
type_card |= field->is_repeated() && field->is_packed() |
||||
? fl::kPackedDouble |
||||
: fl::kDouble; |
||||
break; |
||||
case FieldDescriptor::TYPE_FLOAT: |
||||
type_card |= field->is_repeated() && field->is_packed() ? fl::kPackedFloat |
||||
: fl::kFloat; |
||||
break; |
||||
case FieldDescriptor::TYPE_FIXED32: |
||||
type_card |= field->is_repeated() && field->is_packed() |
||||
? fl::kPackedFixed32 |
||||
: fl::kFixed32; |
||||
break; |
||||
case FieldDescriptor::TYPE_SFIXED32: |
||||
type_card |= field->is_repeated() && field->is_packed() |
||||
? fl::kPackedSFixed32 |
||||
: fl::kSFixed32; |
||||
break; |
||||
case FieldDescriptor::TYPE_FIXED64: |
||||
type_card |= field->is_repeated() && field->is_packed() |
||||
? fl::kPackedFixed64 |
||||
: fl::kFixed64; |
||||
break; |
||||
case FieldDescriptor::TYPE_SFIXED64: |
||||
type_card |= field->is_repeated() && field->is_packed() |
||||
? fl::kPackedSFixed64 |
||||
: fl::kSFixed64; |
||||
break; |
||||
case FieldDescriptor::TYPE_BOOL: |
||||
type_card |= field->is_repeated() && field->is_packed() ? fl::kPackedBool |
||||
: fl::kBool; |
||||
break; |
||||
case FieldDescriptor::TYPE_ENUM: |
||||
if (internal::cpp::HasPreservingUnknownEnumSemantics(field)) { |
||||
// No validation is required.
|
||||
type_card |= field->is_repeated() && field->is_packed() |
||||
? fl::kPackedOpenEnum |
||||
: fl::kOpenEnum; |
||||
} else { |
||||
int16_t start; |
||||
uint16_t size; |
||||
if (GetEnumValidationRange(field->enum_type(), start, size)) { |
||||
// Validation is done by range check (start/length in FieldAux).
|
||||
type_card |= field->is_repeated() && field->is_packed() |
||||
? fl::kPackedEnumRange |
||||
: fl::kEnumRange; |
||||
} else { |
||||
// Validation uses the generated _IsValid function.
|
||||
type_card |= field->is_repeated() && field->is_packed() |
||||
? fl::kPackedEnum |
||||
: fl::kEnum; |
||||
} |
||||
} |
||||
break; |
||||
case FieldDescriptor::TYPE_UINT32: |
||||
type_card |= field->is_repeated() && field->is_packed() |
||||
? fl::kPackedUInt32 |
||||
: fl::kUInt32; |
||||
break; |
||||
case FieldDescriptor::TYPE_SINT32: |
||||
type_card |= field->is_repeated() && field->is_packed() |
||||
? fl::kPackedSInt32 |
||||
: fl::kSInt32; |
||||
break; |
||||
case FieldDescriptor::TYPE_INT32: |
||||
type_card |= field->is_repeated() && field->is_packed() ? fl::kPackedInt32 |
||||
: fl::kInt32; |
||||
break; |
||||
case FieldDescriptor::TYPE_UINT64: |
||||
type_card |= field->is_repeated() && field->is_packed() |
||||
? fl::kPackedUInt64 |
||||
: fl::kUInt64; |
||||
break; |
||||
case FieldDescriptor::TYPE_SINT64: |
||||
type_card |= field->is_repeated() && field->is_packed() |
||||
? fl::kPackedSInt64 |
||||
: fl::kSInt64; |
||||
break; |
||||
case FieldDescriptor::TYPE_INT64: |
||||
type_card |= field->is_repeated() && field->is_packed() ? fl::kPackedInt64 |
||||
: fl::kInt64; |
||||
break; |
||||
|
||||
case FieldDescriptor::TYPE_BYTES: |
||||
type_card |= fl::kBytes; |
||||
break; |
||||
case FieldDescriptor::TYPE_STRING: { |
||||
switch (internal::cpp::GetUtf8CheckMode(field, options.is_lite)) { |
||||
case internal::cpp::Utf8CheckMode::kStrict: |
||||
type_card |= fl::kUtf8String; |
||||
break; |
||||
case internal::cpp::Utf8CheckMode::kVerify: |
||||
type_card |= fl::kRawString; |
||||
break; |
||||
case internal::cpp::Utf8CheckMode::kNone: |
||||
type_card |= fl::kBytes; |
||||
break; |
||||
} |
||||
break; |
||||
} |
||||
|
||||
case FieldDescriptor::TYPE_GROUP: |
||||
type_card |= 0 | fl::kMessage | fl::kRepGroup; |
||||
if (options.use_direct_tcparser_table) { |
||||
type_card |= fl::kTvTable; |
||||
} else { |
||||
type_card |= fl::kTvDefault; |
||||
} |
||||
break; |
||||
case FieldDescriptor::TYPE_MESSAGE: |
||||
if (field->is_map()) { |
||||
type_card |= fl::kMap; |
||||
} else { |
||||
type_card |= fl::kMessage; |
||||
if (options.is_lazy) { |
||||
type_card |= fl::kRepLazy; |
||||
} else if (options.is_implicitly_weak) { |
||||
type_card |= fl::kRepIWeak; |
||||
} |
||||
|
||||
if (options.use_direct_tcparser_table) { |
||||
type_card |= fl::kTvTable; |
||||
} else { |
||||
type_card |= fl::kTvDefault; |
||||
} |
||||
} |
||||
break; |
||||
} |
||||
|
||||
// Fill in extra information about string and bytes field representations.
|
||||
if (field->type() == FieldDescriptor::TYPE_BYTES || |
||||
field->type() == FieldDescriptor::TYPE_STRING) { |
||||
if (field->is_repeated()) { |
||||
type_card |= fl::kRepSString; |
||||
} else { |
||||
type_card |= fl::kRepAString; |
||||
} |
||||
} |
||||
|
||||
if (options.should_split) { |
||||
type_card |= fl::kSplitTrue; |
||||
} |
||||
|
||||
return type_card; |
||||
} |
||||
|
||||
} // namespace
|
||||
|
||||
TailCallTableInfo::TailCallTableInfo( |
||||
const Descriptor* descriptor, |
||||
const std::vector<const FieldDescriptor*>& ordered_fields, |
||||
const OptionProvider& option_provider, |
||||
const std::vector<int>& has_bit_indices, |
||||
const std::vector<int>& inlined_string_indices) { |
||||
// If this message has any inlined string fields, store the donation state
|
||||
// offset in the second auxiliary entry.
|
||||
if (!inlined_string_indices.empty()) { |
||||
aux_entries.resize(1); // pad if necessary
|
||||
aux_entries[0] = {kInlinedStringDonatedOffset}; |
||||
} |
||||
|
||||
// If this message is split, store the split pointer offset in the third
|
||||
// auxiliary entry.
|
||||
for (auto* field : ordered_fields) { |
||||
if (option_provider.GetForField(field).should_split) { |
||||
aux_entries.resize(3); // pad if necessary
|
||||
aux_entries[1] = {kSplitOffset}; |
||||
aux_entries[2] = {kSplitSizeof}; |
||||
break; |
||||
} |
||||
} |
||||
|
||||
// Fill in mini table entries.
|
||||
for (const FieldDescriptor* field : ordered_fields) { |
||||
auto options = option_provider.GetForField(field); |
||||
field_entries.push_back( |
||||
{field, internal::cpp ::HasHasbit(field) |
||||
? has_bit_indices[static_cast<size_t>(field->index())] |
||||
: -1}); |
||||
auto& entry = field_entries.back(); |
||||
entry.type_card = MakeTypeCardForField(field, options); |
||||
|
||||
if (field->type() == FieldDescriptor::TYPE_MESSAGE || |
||||
field->type() == FieldDescriptor::TYPE_GROUP) { |
||||
// Message-typed fields have a FieldAux with the default instance pointer.
|
||||
if (field->is_map()) { |
||||
// TODO(b/205904770): generate aux entries for maps
|
||||
} else if (field->options().weak()) { |
||||
// Don't generate anything for weak fields. They are handled by the
|
||||
// generated fallback.
|
||||
} else if (options.is_implicitly_weak) { |
||||
// Implicit weak fields don't need to store a default instance pointer.
|
||||
} else if (options.is_lazy) { |
||||
// Lazy fields are handled by the generated fallback function.
|
||||
} else { |
||||
field_entries.back().aux_idx = aux_entries.size(); |
||||
aux_entries.push_back( |
||||
{options.use_direct_tcparser_table ? kSubTable : kSubMessage, |
||||
{field}}); |
||||
} |
||||
} else if (field->type() == FieldDescriptor::TYPE_ENUM && |
||||
!cpp::HasPreservingUnknownEnumSemantics(field)) { |
||||
// Enum fields which preserve unknown values (proto3 behavior) are
|
||||
// effectively int32 fields with respect to parsing -- i.e., the value
|
||||
// does not need to be validated at parse time.
|
||||
//
|
||||
// Enum fields which do not preserve unknown values (proto2 behavior) use
|
||||
// a FieldAux to store validation information. If the enum values are
|
||||
// sequential (and within a range we can represent), then the FieldAux
|
||||
// entry represents the range using the minimum value (which must fit in
|
||||
// an int16_t) and count (a uint16_t). Otherwise, the entry holds a
|
||||
// pointer to the generated Name_IsValid function.
|
||||
|
||||
entry.aux_idx = aux_entries.size(); |
||||
aux_entries.push_back({}); |
||||
auto& aux_entry = aux_entries.back(); |
||||
|
||||
if (GetEnumValidationRange(field->enum_type(), aux_entry.enum_range.start, |
||||
aux_entry.enum_range.size)) { |
||||
aux_entry.type = kEnumRange; |
||||
} else { |
||||
aux_entry.type = kEnumValidator; |
||||
aux_entry.field = field; |
||||
} |
||||
|
||||
} else if ((field->type() == FieldDescriptor::TYPE_STRING || |
||||
field->type() == FieldDescriptor::TYPE_BYTES) && |
||||
options.is_string_inlined) { |
||||
GOOGLE_CHECK(!field->is_repeated()); |
||||
// Inlined strings have an extra marker to represent their donation state.
|
||||
int idx = inlined_string_indices[static_cast<size_t>(field->index())]; |
||||
// For mini parsing, the donation state index is stored as an `offset`
|
||||
// auxiliary entry.
|
||||
entry.aux_idx = aux_entries.size(); |
||||
aux_entries.push_back({kNumericOffset}); |
||||
aux_entries.back().offset = idx; |
||||
// For fast table parsing, the donation state index is stored instead of
|
||||
// the aux_idx (this will limit the range to 8 bits).
|
||||
entry.inlined_string_idx = idx; |
||||
} |
||||
} |
||||
|
||||
table_size_log2 = 0; // fallback value
|
||||
int num_fast_fields = -1; |
||||
for (int try_size_log2 : {0, 1, 2, 3, 4, 5}) { |
||||
size_t try_size = 1 << try_size_log2; |
||||
auto split_fields = |
||||
SplitFastFieldsForSize(field_entries, try_size_log2, option_provider); |
||||
GOOGLE_CHECK_EQ(split_fields.size(), try_size); |
||||
int try_num_fast_fields = 0; |
||||
for (const auto& info : split_fields) { |
||||
if (info.field != nullptr) ++try_num_fast_fields; |
||||
} |
||||
// Use this size if (and only if) it covers more fields.
|
||||
if (try_num_fast_fields > num_fast_fields) { |
||||
fast_path_fields = std::move(split_fields); |
||||
table_size_log2 = try_size_log2; |
||||
num_fast_fields = try_num_fast_fields; |
||||
} |
||||
// The largest table we allow has the same number of entries as the
|
||||
// message has fields, rounded up to the next power of 2 (e.g., a message
|
||||
// with 5 fields can have a fast table of size 8). A larger table *might*
|
||||
// cover more fields in certain cases, but a larger table in that case
|
||||
// would have mostly empty entries; so, we cap the size to avoid
|
||||
// pathologically sparse tables.
|
||||
if (try_size > ordered_fields.size()) { |
||||
break; |
||||
} |
||||
} |
||||
|
||||
// Filter out fields that are handled by MiniParse. We don't need to generate
|
||||
// a fallback for these, which saves code size.
|
||||
fallback_fields = FilterMiniParsedFields(ordered_fields, option_provider |
||||
); |
||||
|
||||
num_to_entry_table = MakeNumToEntryTable(ordered_fields); |
||||
field_name_data = GenerateFieldNames(descriptor, ordered_fields); |
||||
|
||||
// If there are no fallback fields, and at most one extension range, the
|
||||
// parser can use a generic fallback function. Otherwise, a message-specific
|
||||
// fallback routine is needed.
|
||||
use_generated_fallback = |
||||
!fallback_fields.empty() || descriptor->extension_range_count() > 1; |
||||
} |
||||
|
||||
} // namespace internal
|
||||
} // namespace protobuf
|
||||
} // namespace google
|
||||
|
||||
#include <google/protobuf/port_undef.inc> |
@ -0,0 +1,162 @@ |
||||
// Protocol Buffers - Google's data interchange format
|
||||
// Copyright 2008 Google Inc. All rights reserved.
|
||||
// https://developers.google.com/protocol-buffers/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following disclaimer
|
||||
// in the documentation and/or other materials provided with the
|
||||
// distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
// This file contains routines to generate tail-call table parsing tables.
|
||||
// Everything in this file is for internal use only.
|
||||
|
||||
#ifndef GOOGLE_PROTOBUF_GENERATED_MESSAGE_TCTABLE_GEN_H__ |
||||
#define GOOGLE_PROTOBUF_GENERATED_MESSAGE_TCTABLE_GEN_H__ |
||||
|
||||
#include <cstdint> |
||||
#include <functional> |
||||
#include <string> |
||||
#include <vector> |
||||
|
||||
#include <google/protobuf/descriptor.h> |
||||
#include <google/protobuf/descriptor.pb.h> |
||||
#include <google/protobuf/generated_message_tctable_decl.h> |
||||
|
||||
// Must come last:
|
||||
#include <google/protobuf/port_def.inc> |
||||
|
||||
namespace google { |
||||
namespace protobuf { |
||||
namespace internal { |
||||
|
||||
// Helper class for generating tailcall parsing functions.
|
||||
struct PROTOBUF_EXPORT TailCallTableInfo { |
||||
struct PerFieldOptions { |
||||
bool is_lazy; |
||||
bool is_string_inlined; |
||||
bool is_implicitly_weak; |
||||
bool use_direct_tcparser_table; |
||||
bool is_lite; |
||||
bool should_split; |
||||
}; |
||||
class OptionProvider { |
||||
public: |
||||
virtual PerFieldOptions GetForField(const FieldDescriptor*) const = 0; |
||||
|
||||
protected: |
||||
~OptionProvider() = default; |
||||
}; |
||||
|
||||
TailCallTableInfo(const Descriptor* descriptor, |
||||
const std::vector<const FieldDescriptor*>& ordered_fields, |
||||
const OptionProvider& option_provider, |
||||
const std::vector<int>& has_bit_indices, |
||||
const std::vector<int>& inlined_string_indices); |
||||
|
||||
// Fields parsed by the table fast-path.
|
||||
struct FastFieldInfo { |
||||
std::string func_name; |
||||
const FieldDescriptor* field; |
||||
uint16_t coded_tag; |
||||
uint8_t hasbit_idx; |
||||
uint8_t aux_idx; |
||||
}; |
||||
std::vector<FastFieldInfo> fast_path_fields; |
||||
|
||||
// Fields parsed by mini parsing routines.
|
||||
struct FieldEntryInfo { |
||||
const FieldDescriptor* field; |
||||
int hasbit_idx; |
||||
int inlined_string_idx; |
||||
uint16_t aux_idx; |
||||
uint16_t type_card; |
||||
}; |
||||
std::vector<FieldEntryInfo> field_entries; |
||||
|
||||
enum AuxType { |
||||
kNothing = 0, |
||||
kInlinedStringDonatedOffset, |
||||
kSplitOffset, |
||||
kSplitSizeof, |
||||
kSubMessage, |
||||
kSubTable, |
||||
kEnumRange, |
||||
kEnumValidator, |
||||
kNumericOffset, |
||||
}; |
||||
struct AuxEntry { |
||||
AuxType type; |
||||
struct EnumRange { |
||||
int16_t start; |
||||
uint16_t size; |
||||
}; |
||||
union { |
||||
const FieldDescriptor* field; |
||||
uint32_t offset; |
||||
EnumRange enum_range; |
||||
}; |
||||
}; |
||||
std::vector<AuxEntry> aux_entries; |
||||
|
||||
// Fields parsed by generated fallback function.
|
||||
std::vector<const FieldDescriptor*> fallback_fields; |
||||
|
||||
struct SkipEntry16 { |
||||
uint16_t skipmap; |
||||
uint16_t field_entry_offset; |
||||
}; |
||||
struct SkipEntryBlock { |
||||
uint32_t first_fnum; |
||||
std::vector<SkipEntry16> entries; |
||||
}; |
||||
struct NumToEntryTable { |
||||
uint32_t skipmap32; // for fields #1 - #32
|
||||
std::vector<SkipEntryBlock> blocks; |
||||
// Compute the number of uint16_t required to represent this table.
|
||||
int size16() const { |
||||
int size = 2; // for the termination field#
|
||||
for (const auto& block : blocks) { |
||||
// 2 for the field#, 1 for a count of skip entries, 2 for each entry.
|
||||
size += static_cast<int>(3 + block.entries.size() * 2); |
||||
} |
||||
return size; |
||||
} |
||||
}; |
||||
NumToEntryTable num_to_entry_table; |
||||
|
||||
std::vector<uint8_t> field_name_data; |
||||
|
||||
// Table size.
|
||||
int table_size_log2; |
||||
// True if a generated fallback function is required instead of generic.
|
||||
bool use_generated_fallback; |
||||
}; |
||||
|
||||
} // namespace internal
|
||||
} // namespace protobuf
|
||||
} // namespace google
|
||||
|
||||
#include <google/protobuf/port_undef.inc> |
||||
|
||||
#endif // GOOGLE_PROTOBUF_GENERATED_MESSAGE_TCTABLE_GEN_H__
|
Loading…
Reference in new issue