Refactor the codepath for unknown field handling.

This simplifies adding more operations without widening the side channel.

PiperOrigin-RevId: 494200747
pull/11223/head
Protobuf Team Bot 2 years ago committed by Copybara-Service
parent 176e67b2e6
commit 5e43ea27cb
  1. 14
      src/google/protobuf/generated_message_tctable_decl.h
  2. 8
      src/google/protobuf/generated_message_tctable_full.cc
  3. 53
      src/google/protobuf/generated_message_tctable_impl.h
  4. 43
      src/google/protobuf/generated_message_tctable_lite.cc

@ -122,20 +122,6 @@ struct TcFieldData {
uint32_t tag() const { return static_cast<uint32_t>(data); }
uint32_t entry_offset() const { return static_cast<uint32_t>(data >> 32); }
// Fields used for passing unknown enum values to the generic fallback:
// Bit:
// +-----------+-------------------+
// |63 .. 32|31 .. 0|
// +---------------+---------------+
// : . : . |===============| [32] tag() (decoded)
// |===============| . : . : [32] unknown_enum_value()
// +-----------+-------------------+
// |63 .. 32|31 .. 0|
// +---------------+---------------+
int32_t unknown_enum_value() const {
return static_cast<int32_t>(data >> 32);
}
uint64_t data;
};

@ -51,6 +51,10 @@ const char* TcParser::GenericFallback(PROTOBUF_TC_PARAM_DECL) {
}
const char* TcParser::ReflectionFallback(PROTOBUF_TC_PARAM_DECL) {
if (PROTOBUF_PREDICT_FALSE(MustFallbackToGeneric(PROTOBUF_TC_PARAM_PASS))) {
PROTOBUF_MUSTTAIL return GenericFallback(PROTOBUF_TC_PARAM_PASS);
}
SyncHasbits(msg, hasbits, table);
uint32_t tag = data.tag();
if (tag == 0 || (tag & 7) == WireFormatLite::WIRETYPE_END_GROUP) {
@ -58,10 +62,6 @@ const char* TcParser::ReflectionFallback(PROTOBUF_TC_PARAM_DECL) {
return ptr;
}
if (MustFallbackToGeneric(PROTOBUF_TC_PARAM_PASS)) {
PROTOBUF_MUSTTAIL return GenericFallback(PROTOBUF_TC_PARAM_PASS);
}
auto* full_msg = DownCast<Message*>(msg);
auto* descriptor = full_msg->GetDescriptor();
auto* reflection = full_msg->GetReflection();

@ -297,9 +297,8 @@ class PROTOBUF_EXPORT TcParser final {
// - MpXXX functions expect `data` with a mini table ABI.
// - The fallback functions (both GenericFallbackXXX and the codegen ones)
// expect only the tag in `data`. In addition, if a null `ptr` is passed,
// the function is used to push an unknown enum value into the
// UnknownFieldSet. The function then expects `data` to use the unknown
// enum ABI, as described in `struct TcFieldData`.
// the function is used as a way to get a UnknownFieldOps vtable, returned
// via the `const char*` return type. See `GetUnknownFieldOps()`
static bool MustFallbackToGeneric(PROTOBUF_TC_PARAM_DECL) {
return ptr == nullptr;
@ -533,8 +532,7 @@ class PROTOBUF_EXPORT TcParser final {
private:
friend class GeneratedTcTableLiteTest;
static void* MaybeGetSplitBase(MessageLite* msg, const bool is_split,
const TcParseTableBase* table,
google::protobuf::internal::ParseContext* ctx);
const TcParseTableBase* table);
// Test only access to verify that the right function is being called via
// MiniParse.
@ -577,8 +575,39 @@ class PROTOBUF_EXPORT TcParser final {
class ScopedArenaSwap;
struct UnknownFieldOps {
void (*write_varint)(MessageLite* msg, int number, int value);
void (*write_length_delimited)(MessageLite* msg, int number,
absl::string_view value);
};
static const UnknownFieldOps& GetUnknownFieldOps(
const TcParseTableBase* table);
template <typename UnknownFieldsT>
static void WriteVarintToUnknown(MessageLite* msg, int number, int value) {
internal::WriteVarint(
number, value,
msg->_internal_metadata_.mutable_unknown_fields<UnknownFieldsT>());
}
template <typename UnknownFieldsT>
static void WriteLengthDelimitedToUnknown(MessageLite* msg, int number,
absl::string_view value) {
internal::WriteLengthDelimited(
number, value,
msg->_internal_metadata_.mutable_unknown_fields<UnknownFieldsT>());
}
template <class MessageBaseT, class UnknownFieldsT>
static const char* GenericFallbackImpl(PROTOBUF_TC_PARAM_DECL) {
if (PROTOBUF_PREDICT_FALSE(ptr == nullptr)) {
// This is the ABI used by GetUnknownFieldOps(). Return the vtable.
static constexpr UnknownFieldOps kOps = {
WriteVarintToUnknown<UnknownFieldsT>,
WriteLengthDelimitedToUnknown<UnknownFieldsT>};
return reinterpret_cast<const char*>(&kOps);
}
SyncHasbits(msg, hasbits, table);
uint32_t tag = data.tag();
if ((tag & 7) == WireFormatLite::WIRETYPE_END_GROUP || tag == 0) {
@ -595,15 +624,6 @@ class PROTOBUF_EXPORT TcParser final {
&msg->_internal_metadata_, ctx);
}
// Side channel for handling unknown data. Currently, only used for unknown
// enums. The value is in the 32 high bits of data.
if (ptr == nullptr) {
internal::WriteVarint(
num, data.unknown_enum_value(),
msg->_internal_metadata_.mutable_unknown_fields<UnknownFieldsT>());
return nullptr;
}
return UnknownFieldParse(
tag, msg->_internal_metadata_.mutable_unknown_fields<UnknownFieldsT>(),
ptr, ctx);
@ -657,9 +677,8 @@ class PROTOBUF_EXPORT TcParser final {
const char* ptr, Arena* arena, SerialArena* serial_arena,
ParseContext* ctx, RepeatedPtrField<std::string>& field);
static void UnknownPackedEnum(MessageLite* msg, ParseContext* ctx,
const TcParseTableBase* table, uint32_t tag,
int32_t enum_value);
static void UnknownPackedEnum(MessageLite* msg, const TcParseTableBase* table,
uint32_t tag, int32_t enum_value);
// Mini field lookup:
static const TcParseTableBase::FieldEntry* FindFieldEntry(

@ -30,7 +30,9 @@
#include <cstdint>
#include <numeric>
#include <string>
#include <type_traits>
#include <utility>
#include "absl/cleanup/cleanup.h"
#include "google/protobuf/generated_message_tctable_decl.h"
@ -1260,16 +1262,21 @@ const char* TcParser::RepeatedEnum(PROTOBUF_TC_PARAM_DECL) {
return ToParseLoop(PROTOBUF_TC_PARAM_PASS);
}
PROTOBUF_NOINLINE void TcParser::UnknownPackedEnum(
MessageLite* msg, ParseContext* ctx, const TcParseTableBase* table,
uint32_t tag, int32_t enum_value) {
const TcParser::UnknownFieldOps& TcParser::GetUnknownFieldOps(
const TcParseTableBase* table) {
// Call the fallback function in a special mode to only act as a
// way to push the unknown enum into the UnknownFieldSet. This is
// _not_ a tail call, and we should continue processing the packed
// input.
TcFieldData data;
data.data = (uint64_t{static_cast<uint32_t>(enum_value)} << 32) | tag;
table->fallback(msg, nullptr, ctx, data, table, 0);
// way to return the ops.
// Hiding the unknown fields vtable behind the fallback function avoids adding
// more pointers in TcParseTableBase, and the extra runtime jumps are not
// relevant because unknown fields are rare.
const char* ptr = table->fallback(nullptr, nullptr, nullptr, {}, nullptr, 0);
return *reinterpret_cast<const UnknownFieldOps*>(ptr);
}
PROTOBUF_NOINLINE void TcParser::UnknownPackedEnum(
MessageLite* msg, const TcParseTableBase* table, uint32_t tag,
int32_t enum_value) {
GetUnknownFieldOps(table).write_varint(msg, tag >> 3, enum_value);
}
template <typename TagType, uint16_t xform_val>
@ -1292,7 +1299,7 @@ const char* TcParser::PackedEnum(PROTOBUF_TC_PARAM_DECL) {
const TcParseTableBase::FieldAux aux = *table->field_aux(data.aux_idx());
return ctx->ReadPackedVarint(ptr, [=](int32_t value) {
if (!EnumIsValidAux(value, xform_val, aux)) {
UnknownPackedEnum(msg, ctx, table, FastDecodeTag(saved_tag), value);
UnknownPackedEnum(msg, table, FastDecodeTag(saved_tag), value);
} else {
field->Add(value);
}
@ -1455,7 +1462,7 @@ const char* TcParser::PackedEnumSmallRange(PROTOBUF_TC_PARAM_DECL) {
}
int32_t v32 = static_cast<int32_t>(tmp);
if (PROTOBUF_PREDICT_FALSE(min > v32 || v32 > max)) {
UnknownPackedEnum(msg, ctx, table, FastDecodeTag(saved_tag), v32);
UnknownPackedEnum(msg, table, FastDecodeTag(saved_tag), v32);
} else {
field->Add(v32);
}
@ -1817,8 +1824,7 @@ uint32_t GetSizeofSplit(const TcParseTableBase* table) {
} // namespace
void* TcParser::MaybeGetSplitBase(MessageLite* msg, const bool is_split,
const TcParseTableBase* table,
::google::protobuf::internal::ParseContext* ctx) {
const TcParseTableBase* table) {
void* out = msg;
if (is_split) {
const uint32_t split_offset = GetSplitOffset(table);
@ -1867,7 +1873,7 @@ PROTOBUF_NOINLINE const char* TcParser::MpFixed(PROTOBUF_TC_PARAM_DECL) {
} else if (card == field_layout::kFcOneof) {
ChangeOneof(table, entry, data.tag() >> 3, ctx, msg);
}
void* const base = MaybeGetSplitBase(msg, is_split, table, ctx);
void* const base = MaybeGetSplitBase(msg, is_split, table);
// Copy the value:
if (rep == field_layout::kRep64Bits) {
RefAt<uint64_t>(base, entry.offset) = UnalignedLoad<uint64_t>(ptr);
@ -2008,7 +2014,7 @@ PROTOBUF_NOINLINE const char* TcParser::MpVarint(PROTOBUF_TC_PARAM_DECL) {
ChangeOneof(table, entry, data.tag() >> 3, ctx, msg);
}
void* const base = MaybeGetSplitBase(msg, is_split, table, ctx);
void* const base = MaybeGetSplitBase(msg, is_split, table);
if (rep == field_layout::kRep64Bits) {
RefAt<uint64_t>(base, entry.offset) = tmp;
} else if (rep == field_layout::kRep32Bits) {
@ -2123,7 +2129,7 @@ PROTOBUF_NOINLINE const char* TcParser::MpPackedVarint(PROTOBUF_TC_PARAM_DECL) {
const TcParseTableBase::FieldAux aux = *table->field_aux(entry.aux_idx);
return ctx->ReadPackedVarint(ptr, [=](int32_t value) {
if (!EnumIsValidAux(value, xform_val, aux)) {
UnknownPackedEnum(msg, ctx, table, data.tag(), value);
UnknownPackedEnum(msg, table, data.tag(), value);
} else {
field->Add(value);
}
@ -2197,7 +2203,7 @@ PROTOBUF_NOINLINE const char* TcParser::MpString(PROTOBUF_TC_PARAM_DECL) {
}
bool is_valid = false;
void* const base = MaybeGetSplitBase(msg, is_split, table, ctx);
void* const base = MaybeGetSplitBase(msg, is_split, table);
switch (rep) {
case field_layout::kRepAString: {
auto& field = RefAt<ArenaStringPtr>(base, entry.offset);
@ -2352,7 +2358,7 @@ PROTOBUF_NOINLINE const char* TcParser::MpMessage(PROTOBUF_TC_PARAM_DECL) {
need_init = ChangeOneof(table, entry, data.tag() >> 3, ctx, msg);
}
void* const base = MaybeGetSplitBase(msg, is_split, table, ctx);
void* const base = MaybeGetSplitBase(msg, is_split, table);
SyncHasbits(msg, hasbits, table);
MessageLite*& field = RefAt<MessageLite*>(base, entry.offset);
if ((type_card & field_layout::kTvMask) == field_layout::kTvTable) {
@ -2417,7 +2423,6 @@ const char* TcParser::MpRepeatedMessage(PROTOBUF_TC_PARAM_DECL) {
const auto aux = *table->field_aux(&entry);
if ((type_card & field_layout::kTvMask) == field_layout::kTvTable) {
auto* inner_table = aux.table;
auto& field = RefAt<RepeatedPtrFieldBase>(msg, entry.offset);
MessageLite* value = field.Add<GenericTypeHandler<MessageLite>>(
inner_table->default_instance);
if (is_group) {

Loading…
Cancel
Save