Add v2 FieldEntry #1.

PiperOrigin-RevId: 663974332
pull/17790/head
Protobuf Team Bot 3 months ago committed by Copybara-Service
parent 4e362b66bf
commit a6977fef4c
  1. 18
      src/google/protobuf/BUILD.bazel
  2. 248
      src/google/protobuf/generated_message_table.h
  3. 100
      src/google/protobuf/generated_message_table_gen.cc
  4. 36
      src/google/protobuf/generated_message_table_gen.h
  5. 182
      src/google/protobuf/generated_message_table_gen_test.cc
  6. 5
      src/google/protobuf/message.h

@ -491,6 +491,7 @@ cc_library(
"extension_set.h",
"extension_set_inl.h",
"generated_enum_util.h",
"generated_message_table.h",
"generated_message_tctable_decl.h",
"generated_message_tctable_impl.h",
"generated_message_util.h",
@ -573,6 +574,7 @@ PROTOBUF_HEADERS = [
"generated_enum_reflection.h",
"generated_message_bases.h",
"generated_message_reflection.h",
"generated_message_table_gen.h",
"generated_message_tctable_gen.h",
"map_entry.h",
"map_field.h",
@ -604,6 +606,7 @@ cc_library(
"feature_resolver.cc",
"generated_message_bases.cc",
"generated_message_reflection.cc",
"generated_message_table_gen.cc",
"generated_message_tctable_full.cc",
"generated_message_tctable_gen.cc",
"map_field.cc",
@ -1468,6 +1471,21 @@ cc_test(
],
)
cc_test(
name = "generated_message_table_gen_test",
srcs = ["generated_message_table_gen_test.cc"],
deps = [
":cc_test_protos",
":port",
":protobuf",
":protobuf_lite",
"@com_google_absl//absl/algorithm:container",
"@com_google_absl//absl/log:absl_check",
"@com_google_googletest//:gtest",
"@com_google_googletest//:gtest_main",
],
)
cc_test(
name = "inlined_string_field_unittest",
srcs = ["inlined_string_field_unittest.cc"],

@ -0,0 +1,248 @@
#ifndef GOOGLE_PROTOBUF_GENERATED_MESSAGE_TABLE_DECL_H__
#define GOOGLE_PROTOBUF_GENERATED_MESSAGE_TABLE_DECL_H__
#include <cstdint>
#include <limits>
#include "absl/log/absl_check.h"
namespace google {
namespace protobuf {
namespace internal {
namespace v2 {
// Field layout enums.
//
// Structural information about fields is packed into a 8-bit value. The enum
// types below represent bitwise fields, along with their respective widths,
// shifts, and masks. To pack into one byte, some mutually exclusive types share
// bits in [5, 7].
//
// <<Numeric Fields>>
// Bit:
// +---------------+---------------+
// |7 ... 4|3 ... 0|
// +---------------+---------------+
// : . : . : . : . : 3|===========| [3] FieldKind
// : . : . : 5|=======| . : . : . : [2] Cardinality
// : . : 6|===| . : . : . : . : . : [1] NumericKind
// +---------------+---------------+
//
// <<Message Fields>>
// Bit:
// +---------------+---------------+
// |7 ... 4|3 ... 0|
// +---------------+---------------+
// : . : . : . : . : 3|===========| [3] FieldKind
// : . : . : 5|=======| . : . : . : [2] Cardinality
// : 7|=======| . : . : . : . : . : [2] MessageKind
// +---------------+---------------+
//
// <<String Fields>>
// Bit:
// +---------------+---------------+
// |7 ... 4|3 ... 0|
// +---------------+---------------+
// : . : . : . : . : 3|===========| [3] FieldKind
// : . : . : 5|=======| . : . : . : [2] Cardinality
// |===========| . : . : . : . : . : [3] StringKind
// +---------------+---------------+
//
// clang-format off
// FieldKind (3 bits):
// These values broadly represent a wire type and an in-memory storage class.
namespace FieldKind {
constexpr int kShift = 0;
constexpr int kBits = 3;
constexpr int kMask = ((1 << kBits) - 1) << kShift;
enum Kinds : uint8_t {
kFixed8 = 0, // bool
kFixed16, // place holder
kFixed32, // (s|u)?int32, (s)?fixed32, float, enum
kFixed64, // (s|u)?int64, (s)?fixed64, double
kBytes, // bytes
kString, // string
kMessage, // group, message
kMap, // map<...>
};
static_assert(kMap < (1 << kBits), "too many types");
} // namespace FieldKind
// Cardinality (2 bits):
// These values determine how many values a field can have and its presence.
namespace Cardinality {
constexpr int kShift = FieldKind::kShift + FieldKind::kBits;
constexpr int kBits = 2;
constexpr int kMask = ((1 << kBits) - 1) << kShift;
enum Kinds : uint8_t {
kSingular = 0,
kOptional = 1 << kShift,
kRepeated = 2 << kShift,
kOneof = 3 << kShift,
};
} // namespace Cardinality
// NumericKind, MessageKind, StringKind are mutually exclusive and share the
// same bit-space (i.e. the same shift).
// NumericKind (1 bit):
// Indicates whether a numeric is signed.
namespace NumericKind {
constexpr int kShift = Cardinality::kShift + Cardinality::kBits;
constexpr int kBits = 1;
constexpr int kMask = ((1 << kBits) - 1) << kShift;
enum Kinds : uint8_t {
kUnsigned = 0,
kSigned = 1 << kShift,
};
} // namespace NumericKind
// MessageKind (2 bits):
// Indicates if it's LazyField or eager message / group.
namespace MessageKind {
constexpr int kShift = Cardinality::kShift + Cardinality::kBits;
constexpr int kBits = 2;
constexpr int kMask = ((1 << kBits) - 1) << kShift;
enum Kinds : uint8_t {
kEager = 0,
kLazy = 1 << kShift,
kGroup = 2 << kShift,
};
} // namespace MessageKind
// StringKind (3 bits):
// Indicates if it's LazyField or eager message / group.
namespace StringKind {
constexpr int kShift = Cardinality::kShift + Cardinality::kBits;
constexpr int kBits = 3;
constexpr int kMask = ((1 << kBits) - 1) << kShift;
enum Kinds : uint8_t {
kArenaPtr = 0,
kInlined = 1 << kShift,
kView = 2 << kShift,
kCord = 3 << kShift,
kStringPiece = 4 << kShift,
kStringPtr = 5 << kShift,
};
} // namespace StringKind
// Convenience aliases except cardinality (8 bits, with format):
enum FieldType : uint8_t {
// Numeric types:
kBool = 0 | FieldKind::kFixed8 | NumericKind::kUnsigned,
kInt32 = 0 | FieldKind::kFixed32 | NumericKind::kSigned,
kSInt32 = 0 | FieldKind::kFixed32 | NumericKind::kSigned,
kSFixed32 = 0 | FieldKind::kFixed32 | NumericKind::kSigned,
kUInt32 = 0 | FieldKind::kFixed32 | NumericKind::kUnsigned,
kFixed32 = 0 | FieldKind::kFixed32 | NumericKind::kUnsigned,
kFloat = 0 | FieldKind::kFixed32 | NumericKind::kUnsigned,
kEnum = 0 | FieldKind::kFixed32 | NumericKind::kSigned,
kInt64 = 0 | FieldKind::kFixed64 | NumericKind::kSigned,
kSInt64 = 0 | FieldKind::kFixed64 | NumericKind::kSigned,
kSFixed64 = 0 | FieldKind::kFixed64 | NumericKind::kSigned,
kUInt64 = 0 | FieldKind::kFixed64 | NumericKind::kUnsigned,
kFixed64 = 0 | FieldKind::kFixed64 | NumericKind::kUnsigned,
kDouble = 0 | FieldKind::kFixed64 | NumericKind::kUnsigned,
// String types:
kBytes = FieldKind::kBytes,
kString = FieldKind::kString,
// Message types:
kMessage = 0 | FieldKind::kMessage | MessageKind::kEager,
kLazyMessage = 0 | FieldKind::kMessage | MessageKind::kLazy,
kGroup = 0 | FieldKind::kMessage | MessageKind::kGroup,
// Map types:
kMap = FieldKind::kMap,
};
// clang-format on
struct FieldEntry {
// Constructors without aux index. (Should be common cases.)
constexpr FieldEntry(uint8_t type, uint8_t hasbit_index, uint16_t offset,
uint16_t number)
: field_type(type),
hasbit_index(hasbit_index),
offset(offset),
field_number(number),
aux_index(kNoAuxIdx) {}
// If any of hasbit_index, offset, field_number is too big to fit, fallback to
// aux entry for all.
constexpr FieldEntry(uint8_t type, uint16_t aux_index)
: field_type(type),
hasbit_index(kHasbitFallbackToAux),
offset(kFallbackToAux),
field_number(kFallbackToAux),
aux_index(aux_index) {}
constexpr bool ShouldLookupAuxEntry() const { return aux_index != kNoAuxIdx; }
uint8_t GetFieldKind() const { return field_type & FieldKind::kMask; }
uint8_t GetCardinality() const { return field_type & Cardinality::kMask; }
uint8_t GetNumericKind() const {
ABSL_DCHECK_LT(GetFieldKind(), FieldKind::kBytes);
return field_type & NumericKind::kMask;
}
uint8_t GetMessageKind() const {
ABSL_DCHECK_EQ(GetFieldKind(), FieldKind::kMessage);
return field_type & MessageKind::kMask;
}
uint8_t GetStringKind() const {
ABSL_DCHECK(GetFieldKind() == FieldKind::kBytes ||
GetFieldKind() == FieldKind::kString);
return field_type & StringKind::kMask;
}
bool IsSigned() const { return GetNumericKind() == NumericKind::kSigned; }
bool IsUTF8() const {
ABSL_DCHECK(GetFieldKind() == FieldKind::kBytes ||
GetFieldKind() == FieldKind::kString);
return GetFieldKind() == FieldKind::kString;
}
bool IsRepeated() const { return GetCardinality() == Cardinality::kRepeated; }
// Field type consists of FieldKind, Cardinality and type-specific Kind.
uint8_t field_type;
// Covers up to 256 fields. Fallback to aux if 0xFF.
uint8_t hasbit_index;
// Covers sizeof(Message) up to 64 KiB. Fallback to aux if 0xFFFF.
uint16_t offset;
// Most field numbers should fit 16 bits. Fallback to aux if 0xFFFF.
uint16_t field_number;
// Only up to 2^16 fallback cases are supported.
uint16_t aux_index;
static constexpr uint16_t kHasbitFallbackToAux = 0xFF;
static constexpr uint16_t kFallbackToAux = 0xFFFF;
static constexpr uint16_t kNoAuxIdx = 0xFFFF;
// These constants are same as the above but compared against values from
// reflection or protoc (hence different types) to determine whether to use
// aux entries.
static constexpr uint32_t kHasbitIdxLimit =
std::numeric_limits<uint8_t>::max();
static constexpr uint32_t kOffsetLimit = std::numeric_limits<uint16_t>::max();
static constexpr int kFieldNumberLimit = std::numeric_limits<uint16_t>::max();
};
static_assert(sizeof(FieldEntry) == sizeof(uint64_t), "");
} // namespace v2
} // namespace internal
} // namespace protobuf
} // namespace google
#endif // GOOGLE_PROTOBUF_GENERATED_MESSAGE_TABLE_DECL_H__

@ -0,0 +1,100 @@
#include "google/protobuf/generated_message_table_gen.h"
#include <cstdint>
#include "absl/log/absl_check.h"
#include "google/protobuf/descriptor.h"
#include "google/protobuf/generated_message_table.h"
#include "google/protobuf/port.h"
namespace google {
namespace protobuf {
namespace internal {
namespace v2 {
using CppStringType = FieldDescriptor::CppStringType;
namespace {
uint8_t GenerateStringKind(const FieldDescriptor* field, bool is_inlined) {
switch (field->cpp_string_type()) {
// VIEW fields are treated as strings for now.
case CppStringType::kView:
case CppStringType::kString:
return field->is_repeated() ? StringKind::kStringPtr
: is_inlined ? StringKind::kInlined
: StringKind::kArenaPtr;
case CppStringType::kCord:
ABSL_CHECK(!is_inlined);
return StringKind::kCord;
default:
Unreachable();
break;
}
}
} // namespace
uint8_t MakeTypeCardForField(const FieldDescriptor* field, FieldTypeInfo info) {
constexpr uint8_t field_type_to_type_card[] = {
0, // placeholder as type starts from 1.
FieldType::kDouble, // TYPE_DOUBLE
FieldType::kFloat, // TYPE_FLOAT
FieldType::kInt64, // TYPE_INT64
FieldType::kUInt64, // TYPE_UINT64
FieldType::kInt32, // TYPE_INT32
FieldType::kFixed64, // TYPE_FIXED64
FieldType::kFixed32, // TYPE_FIXED32
FieldType::kBool, // TYPE_BOOL
FieldType::kBytes, // TYPE_STRING
FieldType::kGroup, // TYPE_GROUP
FieldType::kMessage, // TYPE_MESSAGE
FieldType::kBytes, // TYPE_BYTES
FieldType::kUInt32, // TYPE_UINT32
FieldType::kEnum, // TYPE_ENUM
FieldType::kSFixed32, // TYPE_SFIXED32
FieldType::kSFixed64, // TYPE_SFIXED64
FieldType::kSInt32, // TYPE_SINT32
FieldType::kSInt64, // TYPE_SINT64
};
static_assert(
sizeof(field_type_to_type_card) == (FieldDescriptor::MAX_TYPE + 1), "");
if (field->is_map()) return FieldType::kMap;
auto field_type = field->type();
uint8_t type_card = field_type_to_type_card[field_type];
// Override previously set type for lazy message and UTF8 strings.
switch (field_type) {
case FieldDescriptor::TYPE_MESSAGE:
if (info.is_lazy) type_card = FieldType::kLazyMessage;
break;
case FieldDescriptor::TYPE_STRING:
if (field->requires_utf8_validation()) type_card = FieldType::kString;
break;
default:
break;
}
// Set cardinality.
if (field->is_repeated()) {
type_card |= Cardinality::kRepeated;
} else if (field->real_containing_oneof()) {
type_card |= Cardinality::kOneof;
} else if (field->has_presence()) {
type_card |= Cardinality::kOptional;
} else {
type_card |= Cardinality::kSingular;
}
// Set StringKind for string fields. Note that numerics (signedness) and
// messages (lazy) are already specified.
return field->cpp_type() != FieldDescriptor::CPPTYPE_STRING
? type_card
: type_card | GenerateStringKind(field, info.is_inlined);
}
} // namespace v2
} // namespace internal
} // namespace protobuf
} // namespace google

@ -0,0 +1,36 @@
#ifndef GOOGLE_PROTOBUF_GENERATED_MESSAGE_TABLE_GEN_H__
#define GOOGLE_PROTOBUF_GENERATED_MESSAGE_TABLE_GEN_H__
#include <cstdint>
#include "google/protobuf/descriptor.h"
// Must be included last.
#include "google/protobuf/port_def.inc"
// This file contains types and APIs to generate tables for v2 wireformat.
namespace google {
namespace protobuf {
namespace internal {
namespace v2 {
struct FieldTypeInfo {
bool is_inlined;
bool is_lazy;
};
// Returns 8 bit type card for a given field. Type cards contains information
// about field types and cardinality that are needed to iterate fields per
// message.
PROTOBUF_EXPORT uint8_t MakeTypeCardForField(const FieldDescriptor* field,
FieldTypeInfo info);
} // namespace v2
} // namespace internal
} // namespace protobuf
} // namespace google
#include "google/protobuf/port_undef.inc"
#endif // GOOGLE_PROTOBUF_GENERATED_MESSAGE_TABLE_GEN_H__

@ -0,0 +1,182 @@
#include "google/protobuf/generated_message_table_gen.h"
#include <cctype>
#include <cstdint>
#include <string>
#include <gtest/gtest.h>
#include "absl/algorithm/container.h"
#include "absl/log/absl_check.h"
#include "google/protobuf/generated_message_table.h"
#include "google/protobuf/port.h"
#include "google/protobuf/unittest.pb.h"
#include "google/protobuf/unittest_mset.pb.h"
namespace google {
namespace protobuf {
namespace internal {
namespace v2 {
class V2TableGenTester {
public:
static uint32_t HasBitIndex(const Reflection* reflection,
const FieldDescriptor* field) {
return reflection->schema_.HasBitIndex(field);
}
static uint32_t GetFieldOffset(const Reflection* reflection,
const FieldDescriptor* field) {
return reflection->schema_.GetFieldOffset(field);
}
static bool IsLazyField(const Reflection* reflection,
const FieldDescriptor* field) {
ABSL_CHECK(!field->is_extension());
return reflection->IsLazyField(field);
}
static bool IsInlined(const Reflection* reflection,
const FieldDescriptor* field) {
return reflection->schema_.IsFieldInlined(field);
}
};
namespace {
using ::protobuf_unittest::TestAllTypes;
using ::protobuf_unittest::TestMessageSetExtension1;
// Creates FieldEntry that won't require AuxEntry, which requires all fields to
// fit into smaller (but common) limit. Specifically, hasbit_index for 1B,
// offset and field number for 2B.
FieldEntry CreateFieldEntryWithoutAux(const Reflection* reflection,
const Message* message,
const FieldDescriptor* field) {
ABSL_CHECK_EQ(reflection, message->GetReflection());
uint32_t hasbit_index = V2TableGenTester::HasBitIndex(reflection, field);
uint32_t offset = V2TableGenTester::GetFieldOffset(reflection, field);
// CHECK if "field" cannot fit into FieldEntry alone and require AuxEntry.
static constexpr uint32_t kNoHasbit = static_cast<uint32_t>(-1);
ABSL_CHECK(hasbit_index == kNoHasbit ||
hasbit_index < FieldEntry::kHasbitIdxLimit);
ABSL_CHECK_LT(offset, FieldEntry::kOffsetLimit);
ABSL_CHECK_LT(field->number(), FieldEntry::kFieldNumberLimit);
bool is_lazy = V2TableGenTester::IsLazyField(reflection, field);
bool is_inlined = V2TableGenTester::IsInlined(reflection, field);
return FieldEntry(MakeTypeCardForField(field, {is_inlined, is_lazy}),
hasbit_index, offset, field->number());
}
class TableGenTest : public testing::TestWithParam<const Message*> {
public:
TableGenTest()
: message_(GetParam()), reflection_(message_->GetReflection()) {}
protected:
const Message* message_;
const Reflection* reflection_;
};
TEST_P(TableGenTest, ValidateTypeCardForField) {
const Descriptor* desc = message_->GetDescriptor();
for (int i = 0, count = desc->field_count(); i < count; ++i) {
const FieldDescriptor* field = desc->field(i);
auto field_entry = CreateFieldEntryWithoutAux(reflection_, message_, field);
// Validate cardinality.
EXPECT_EQ(field->is_repeated(), field_entry.IsRepeated());
uint8_t cardinality = field_entry.GetCardinality();
switch (cardinality) {
case Cardinality::kRepeated:
EXPECT_TRUE(field->is_repeated());
break;
case Cardinality::kOptional:
EXPECT_FALSE(field->is_repeated());
EXPECT_TRUE(field->has_presence());
break;
case Cardinality::kSingular:
EXPECT_FALSE(field->is_repeated());
EXPECT_FALSE(field->has_presence());
break;
case Cardinality::kOneof:
EXPECT_FALSE(field->is_repeated());
EXPECT_TRUE(field->real_containing_oneof());
break;
default:
Unreachable();
break;
}
EXPECT_EQ(field->is_repeated(), field_entry.IsRepeated());
// Validate field types, etc.
switch (field->cpp_type()) {
case FieldDescriptor::CPPTYPE_ENUM:
case FieldDescriptor::CPPTYPE_INT32:
EXPECT_EQ(field_entry.GetFieldKind(), FieldKind::kFixed32);
EXPECT_TRUE(field_entry.IsSigned());
break;
case FieldDescriptor::CPPTYPE_INT64:
EXPECT_EQ(field_entry.GetFieldKind(), FieldKind::kFixed64);
EXPECT_TRUE(field_entry.IsSigned());
break;
case FieldDescriptor::CPPTYPE_FLOAT:
case FieldDescriptor::CPPTYPE_UINT32:
EXPECT_EQ(field_entry.GetFieldKind(), FieldKind::kFixed32);
EXPECT_FALSE(field_entry.IsSigned());
break;
case FieldDescriptor::CPPTYPE_DOUBLE:
case FieldDescriptor::CPPTYPE_UINT64:
EXPECT_EQ(field_entry.GetFieldKind(), FieldKind::kFixed64);
EXPECT_FALSE(field_entry.IsSigned());
break;
case FieldDescriptor::CPPTYPE_BOOL:
EXPECT_EQ(field_entry.GetFieldKind(), FieldKind::kFixed8);
EXPECT_FALSE(field_entry.IsSigned());
break;
case FieldDescriptor::CPPTYPE_STRING:
EXPECT_EQ(field->requires_utf8_validation(), field_entry.IsUTF8())
<< field->full_name();
switch (field->cpp_string_type()) {
case FieldDescriptor::CppStringType::kView:
EXPECT_EQ(field_entry.GetStringKind(), StringKind::kView);
break;
case FieldDescriptor::CppStringType::kCord:
EXPECT_EQ(field_entry.GetStringKind(), StringKind::kCord);
break;
case FieldDescriptor::CppStringType::kString:
if (field->is_repeated()) {
EXPECT_EQ(field_entry.GetStringKind(), StringKind::kStringPtr);
} else if (V2TableGenTester::IsInlined(reflection_, field)) {
EXPECT_EQ(field_entry.GetStringKind(), StringKind::kInlined);
} else {
EXPECT_EQ(field_entry.GetStringKind(), StringKind::kArenaPtr);
}
break;
}
break;
case FieldDescriptor::CPPTYPE_MESSAGE:
break;
default:
Unreachable();
break;
}
}
}
INSTANTIATE_TEST_SUITE_P(
V2, TableGenTest,
testing::Values(&TestAllTypes::default_instance(),
&TestMessageSetExtension1::default_instance()),
[](const testing::TestParamInfo<TableGenTest::ParamType>& info) {
std::string name = info.param->GetTypeName();
absl::c_replace_if(name, [](char c) { return !std::isalnum(c); }, '_');
return name;
});
} // namespace
} // namespace v2
} // namespace internal
} // namespace protobuf
} // namespace google

@ -154,6 +154,9 @@ namespace field_layout {
enum TransformValidation : uint16_t;
} // namespace field_layout
namespace v2 {
class V2TableGenTester;
} // namespace v2
} // namespace internal
class UnknownFieldSet; // unknown_field_set.h
namespace io {
@ -1133,6 +1136,8 @@ class PROTOBUF_EXPORT Reflection final {
bool is_string) const;
friend class MapReflectionTester;
friend class internal::v2::V2TableGenTester;
// Returns true if key is in map. Returns false if key is not in map field.
bool ContainsMapKey(const Message& message, const FieldDescriptor* field,
const MapKey& key) const;

Loading…
Cancel
Save