// Copyright (c) 2009-2021, Google LLC
// All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of Google LLC nor the
// names of its contributors may be used to endorse or promote products
// derived from this software without specific prior written permission.
Refactored message accessors to share a common set of functions instead of duplicating logic.
Prior to this CL, there were several different code paths for reading/writing message data. Generated code, MiniTable accessors, and reflection all performed direct manipulation of the bits and bytes in a message, but they all had distinct implementations that did not share much of any code. This divergence meant that they could easily have different behavior, bugs could creep into one but not another, and we would need three different sets of tests to get full test coverage. This also made it very difficult to change the internal representation in any way, since it would require updating many places in the code.
With this CL, the three different APIs for accessing message data now all share a common set of functions. The common functions all take a `upb_MiniTableField` as the canonical description of a field's type and layout. The lowest-level functions are very branchy, as they must test for every possible variation in the field type (field vs oneof, hasbit vs no-hasbit, different field sizes, whether a nonzero default value exists, extension vs. regular field), however these functions are declared inline and designed to be very optimizable when values are known at compile time.
In generated accessors, for example, we can declare constant `upb_MiniTableField` instances so that all values can constant-propagate, and we can get fully specialized code even though we are calling a generic function. On the other hand, when we use the generic functions from reflection, we get runtime branches since values are not known at compile time. But even the function is written to still be as efficient as possible even when used from reflection. For example, we use memcpy() calls with constant length so that the compiler can optimize these into inline loads/stores without having to make an out-of-line call to memcpy().
In this way, this CL should be a benefit to both correctness and performance. It will also make it easier to change the message representation, for example to optimize the encoder by giving hasbits to all fields.
Note that we have not completely consolidated all access in this CL:
1. Some functions outside of get/set such as clear and hazzers are not yet unified.
2. The encoder and decoder still touch the message without going through the common functions. The encoder and decoder require a bit more specialized code to get good performance when reading/writing fields en masse.
PiperOrigin-RevId: 490016095
2 years ago
#include <cstdint>
#include <memory>
#include "google/protobuf/descriptor.pb.h"
#include "absl/container/flat_hash_map.h"
#include "absl/container/flat_hash_set.h"
#include "absl/strings/substitute.h"
#include "google/protobuf/compiler/code_generator.h"
#include "google/protobuf/compiler/plugin.h"
#include "google/protobuf/descriptor.h"
#include "google/protobuf/wire_format.h"
#include "upb/mini_table/encode_internal.hpp"
#include "upb/mini_table/enum_internal.h"
#include "upb/mini_table/extension_internal.h"
#include "upbc/common.h"
#include "upbc/file_layout.h"
#include "upbc/names.h"
// Must be last.
#include "upb/port/def.inc"
namespace upbc {
namespace {
namespace protoc = ::google::protobuf::compiler;
namespace protobuf = ::google::protobuf;
// Returns fields in order of "hotness", eg. how frequently they appear in
// serialized payloads. Ideally this will use a profile. When we don't have
// that, we assume that fields with smaller numbers are used more frequently.
inline std::vector<const google::protobuf::FieldDescriptor*> FieldHotnessOrder(
const google::protobuf::Descriptor* message) {
std::vector<const google::protobuf::FieldDescriptor*> fields;
size_t field_count = message->field_count();
for (size_t i = 0; i < field_count; i++) {
fields.begin(), fields.end(),
[](const google::protobuf::FieldDescriptor* a, const google::protobuf::FieldDescriptor* b) {
return std::make_pair(!a->is_required(), a->number()) <
std::make_pair(!b->is_required(), b->number());
return fields;
std::string SourceFilename(const google::protobuf::FileDescriptor* file) {
return StripExtension(file->name()) + ".upb.c";
std::string MessageInit(const protobuf::Descriptor* descriptor) {
return MessageName(descriptor) + "_msg_init";
std::string EnumInit(const protobuf::EnumDescriptor* descriptor) {
return ToCIdent(descriptor->full_name()) + "_enum_init";
std::string ExtensionIdentBase(const protobuf::FieldDescriptor* ext) {
std::string ext_scope;
if (ext->extension_scope()) {
return MessageName(ext->extension_scope());
} else {
return ToCIdent(ext->file()->package());
std::string ExtensionLayout(const google::protobuf::FieldDescriptor* ext) {
return absl::StrCat(ExtensionIdentBase(ext), "_", ext->name(), "_ext");
const char* kEnumsInit = "enums_layout";
const char* kExtensionsInit = "extensions_layout";
const char* kMessagesInit = "messages_layout";
std::string EnumValueSymbol(const protobuf::EnumValueDescriptor* value) {
return ToCIdent(value->full_name());
std::string CTypeInternal(const protobuf::FieldDescriptor* field,
bool is_const) {
std::string maybe_const = is_const ? "const " : "";
switch (field->cpp_type()) {
case protobuf::FieldDescriptor::CPPTYPE_MESSAGE: {
std::string maybe_struct =
field->file() != field->message_type()->file() ? "struct " : "";
return maybe_const + maybe_struct + MessageName(field->message_type()) +
case protobuf::FieldDescriptor::CPPTYPE_BOOL:
return "bool";
case protobuf::FieldDescriptor::CPPTYPE_FLOAT:
return "float";
case protobuf::FieldDescriptor::CPPTYPE_INT32:
case protobuf::FieldDescriptor::CPPTYPE_ENUM:
return "int32_t";
case protobuf::FieldDescriptor::CPPTYPE_UINT32:
return "uint32_t";
case protobuf::FieldDescriptor::CPPTYPE_DOUBLE:
return "double";
case protobuf::FieldDescriptor::CPPTYPE_INT64:
return "int64_t";
case protobuf::FieldDescriptor::CPPTYPE_UINT64:
return "uint64_t";
case protobuf::FieldDescriptor::CPPTYPE_STRING:
return "upb_StringView";
fprintf(stderr, "Unexpected type");
std::string SizeLg2(const protobuf::FieldDescriptor* field) {
switch (field->cpp_type()) {
case protobuf::FieldDescriptor::CPPTYPE_MESSAGE:
return "UPB_SIZE(2, 3)";
case protobuf::FieldDescriptor::CPPTYPE_ENUM:
return std::to_string(2);
case protobuf::FieldDescriptor::CPPTYPE_BOOL:
return std::to_string(0);
case protobuf::FieldDescriptor::CPPTYPE_FLOAT:
return std::to_string(2);
case protobuf::FieldDescriptor::CPPTYPE_INT32:
return std::to_string(2);
case protobuf::FieldDescriptor::CPPTYPE_UINT32:
return std::to_string(2);
case protobuf::FieldDescriptor::CPPTYPE_DOUBLE:
return std::to_string(3);
case protobuf::FieldDescriptor::CPPTYPE_INT64:
return std::to_string(3);
case protobuf::FieldDescriptor::CPPTYPE_UINT64:
return std::to_string(3);
case protobuf::FieldDescriptor::CPPTYPE_STRING:
return "UPB_SIZE(3, 4)";
fprintf(stderr, "Unexpected type");
std::string FloatToCLiteral(float value) {
if (value == std::numeric_limits<float>::infinity()) {
return "kUpb_FltInfinity";
} else if (value == -std::numeric_limits<float>::infinity()) {
return "-kUpb_FltInfinity";
Refactored message accessors to share a common set of functions instead of duplicating logic.
Prior to this CL, there were several different code paths for reading/writing message data. Generated code, MiniTable accessors, and reflection all performed direct manipulation of the bits and bytes in a message, but they all had distinct implementations that did not share much of any code. This divergence meant that they could easily have different behavior, bugs could creep into one but not another, and we would need three different sets of tests to get full test coverage. This also made it very difficult to change the internal representation in any way, since it would require updating many places in the code.
With this CL, the three different APIs for accessing message data now all share a common set of functions. The common functions all take a `upb_MiniTableField` as the canonical description of a field's type and layout. The lowest-level functions are very branchy, as they must test for every possible variation in the field type (field vs oneof, hasbit vs no-hasbit, different field sizes, whether a nonzero default value exists, extension vs. regular field), however these functions are declared inline and designed to be very optimizable when values are known at compile time.
In generated accessors, for example, we can declare constant `upb_MiniTableField` instances so that all values can constant-propagate, and we can get fully specialized code even though we are calling a generic function. On the other hand, when we use the generic functions from reflection, we get runtime branches since values are not known at compile time. But even the function is written to still be as efficient as possible even when used from reflection. For example, we use memcpy() calls with constant length so that the compiler can optimize these into inline loads/stores without having to make an out-of-line call to memcpy().
In this way, this CL should be a benefit to both correctness and performance. It will also make it easier to change the message representation, for example to optimize the encoder by giving hasbits to all fields.
Note that we have not completely consolidated all access in this CL:
1. Some functions outside of get/set such as clear and hazzers are not yet unified.
2. The encoder and decoder still touch the message without going through the common functions. The encoder and decoder require a bit more specialized code to get good performance when reading/writing fields en masse.
PiperOrigin-RevId: 490016095
2 years ago
} else if (std::isnan(value)) {
return "kUpb_NaN";
} else {
return absl::StrCat(value);
std::string DoubleToCLiteral(double value) {
if (value == std::numeric_limits<double>::infinity()) {
return "kUpb_Infinity";
} else if (value == -std::numeric_limits<double>::infinity()) {
return "-kUpb_Infinity";
Refactored message accessors to share a common set of functions instead of duplicating logic.
Prior to this CL, there were several different code paths for reading/writing message data. Generated code, MiniTable accessors, and reflection all performed direct manipulation of the bits and bytes in a message, but they all had distinct implementations that did not share much of any code. This divergence meant that they could easily have different behavior, bugs could creep into one but not another, and we would need three different sets of tests to get full test coverage. This also made it very difficult to change the internal representation in any way, since it would require updating many places in the code.
With this CL, the three different APIs for accessing message data now all share a common set of functions. The common functions all take a `upb_MiniTableField` as the canonical description of a field's type and layout. The lowest-level functions are very branchy, as they must test for every possible variation in the field type (field vs oneof, hasbit vs no-hasbit, different field sizes, whether a nonzero default value exists, extension vs. regular field), however these functions are declared inline and designed to be very optimizable when values are known at compile time.
In generated accessors, for example, we can declare constant `upb_MiniTableField` instances so that all values can constant-propagate, and we can get fully specialized code even though we are calling a generic function. On the other hand, when we use the generic functions from reflection, we get runtime branches since values are not known at compile time. But even the function is written to still be as efficient as possible even when used from reflection. For example, we use memcpy() calls with constant length so that the compiler can optimize these into inline loads/stores without having to make an out-of-line call to memcpy().
In this way, this CL should be a benefit to both correctness and performance. It will also make it easier to change the message representation, for example to optimize the encoder by giving hasbits to all fields.
Note that we have not completely consolidated all access in this CL:
1. Some functions outside of get/set such as clear and hazzers are not yet unified.
2. The encoder and decoder still touch the message without going through the common functions. The encoder and decoder require a bit more specialized code to get good performance when reading/writing fields en masse.
PiperOrigin-RevId: 490016095
2 years ago
} else if (std::isnan(value)) {
return "kUpb_NaN";
} else {
return absl::StrCat(value);
std::string FieldDefault(const protobuf::FieldDescriptor* field) {
switch (field->cpp_type()) {
case protobuf::FieldDescriptor::CPPTYPE_MESSAGE:
return "NULL";
case protobuf::FieldDescriptor::CPPTYPE_STRING:
return absl::Substitute("upb_StringView_FromString(\"$0\")",
case protobuf::FieldDescriptor::CPPTYPE_INT32:
return absl::Substitute("(int32_t)$0", field->default_value_int32());
case protobuf::FieldDescriptor::CPPTYPE_INT64:
return absl::Substitute("(int64_t)$0ll", field->default_value_int64());
case protobuf::FieldDescriptor::CPPTYPE_UINT32:
return absl::Substitute("(uint32_t)$0u", field->default_value_uint32());
case protobuf::FieldDescriptor::CPPTYPE_UINT64:
return absl::Substitute("(uint64_t)$0ull", field->default_value_uint64());
case protobuf::FieldDescriptor::CPPTYPE_FLOAT:
return FloatToCLiteral(field->default_value_float());
case protobuf::FieldDescriptor::CPPTYPE_DOUBLE:
return DoubleToCLiteral(field->default_value_double());
case protobuf::FieldDescriptor::CPPTYPE_BOOL:
return field->default_value_bool() ? "true" : "false";
case protobuf::FieldDescriptor::CPPTYPE_ENUM:
// Use a number instead of a symbolic name so that we don't require
// this enum's header to be included.
return absl::StrCat(field->default_value_enum()->number());
return "XXX";
std::string CType(const protobuf::FieldDescriptor* field) {
return CTypeInternal(field, false);
std::string CTypeConst(const protobuf::FieldDescriptor* field) {
return CTypeInternal(field, true);
std::string MapKeyCType(const protobuf::FieldDescriptor* map_field) {
return CType(map_field->message_type()->map_key());
std::string MapValueCType(const protobuf::FieldDescriptor* map_field) {
return CType(map_field->message_type()->map_value());
std::string MapKeySize(const protobuf::FieldDescriptor* map_field,
absl::string_view expr) {
return map_field->message_type()->map_key()->cpp_type() ==
? "0"
: absl::StrCat("sizeof(", expr, ")");
std::string MapValueSize(const protobuf::FieldDescriptor* map_field,
absl::string_view expr) {
return map_field->message_type()->map_value()->cpp_type() ==
? "0"
: absl::StrCat("sizeof(", expr, ")");
std::string FieldInitializer(const FileLayout& layout,
const protobuf::FieldDescriptor* field);
void DumpEnumValues(const protobuf::EnumDescriptor* desc, Output& output) {
std::vector<const protobuf::EnumValueDescriptor*> values;
for (int i = 0; i < desc->value_count(); i++) {
std::sort(values.begin(), values.end(),
[](const protobuf::EnumValueDescriptor* a,
const protobuf::EnumValueDescriptor* b) {
return a->number() < b->number();
for (size_t i = 0; i < values.size(); i++) {
auto value = values[i];
output(" $0 = $1", EnumValueSymbol(value), value->number());
if (i != values.size() - 1) {
Refactored message accessors to share a common set of functions instead of duplicating logic.
Prior to this CL, there were several different code paths for reading/writing message data. Generated code, MiniTable accessors, and reflection all performed direct manipulation of the bits and bytes in a message, but they all had distinct implementations that did not share much of any code. This divergence meant that they could easily have different behavior, bugs could creep into one but not another, and we would need three different sets of tests to get full test coverage. This also made it very difficult to change the internal representation in any way, since it would require updating many places in the code.
With this CL, the three different APIs for accessing message data now all share a common set of functions. The common functions all take a `upb_MiniTableField` as the canonical description of a field's type and layout. The lowest-level functions are very branchy, as they must test for every possible variation in the field type (field vs oneof, hasbit vs no-hasbit, different field sizes, whether a nonzero default value exists, extension vs. regular field), however these functions are declared inline and designed to be very optimizable when values are known at compile time.
In generated accessors, for example, we can declare constant `upb_MiniTableField` instances so that all values can constant-propagate, and we can get fully specialized code even though we are calling a generic function. On the other hand, when we use the generic functions from reflection, we get runtime branches since values are not known at compile time. But even the function is written to still be as efficient as possible even when used from reflection. For example, we use memcpy() calls with constant length so that the compiler can optimize these into inline loads/stores without having to make an out-of-line call to memcpy().
In this way, this CL should be a benefit to both correctness and performance. It will also make it easier to change the message representation, for example to optimize the encoder by giving hasbits to all fields.
Note that we have not completely consolidated all access in this CL:
1. Some functions outside of get/set such as clear and hazzers are not yet unified.
2. The encoder and decoder still touch the message without going through the common functions. The encoder and decoder require a bit more specialized code to get good performance when reading/writing fields en masse.
PiperOrigin-RevId: 490016095
2 years ago
std::string GetFieldRep(const FileLayout& layout,
const protobuf::FieldDescriptor* field);
void GenerateExtensionInHeader(const protobuf::FieldDescriptor* ext,
Refactored message accessors to share a common set of functions instead of duplicating logic.
Prior to this CL, there were several different code paths for reading/writing message data. Generated code, MiniTable accessors, and reflection all performed direct manipulation of the bits and bytes in a message, but they all had distinct implementations that did not share much of any code. This divergence meant that they could easily have different behavior, bugs could creep into one but not another, and we would need three different sets of tests to get full test coverage. This also made it very difficult to change the internal representation in any way, since it would require updating many places in the code.
With this CL, the three different APIs for accessing message data now all share a common set of functions. The common functions all take a `upb_MiniTableField` as the canonical description of a field's type and layout. The lowest-level functions are very branchy, as they must test for every possible variation in the field type (field vs oneof, hasbit vs no-hasbit, different field sizes, whether a nonzero default value exists, extension vs. regular field), however these functions are declared inline and designed to be very optimizable when values are known at compile time.
In generated accessors, for example, we can declare constant `upb_MiniTableField` instances so that all values can constant-propagate, and we can get fully specialized code even though we are calling a generic function. On the other hand, when we use the generic functions from reflection, we get runtime branches since values are not known at compile time. But even the function is written to still be as efficient as possible even when used from reflection. For example, we use memcpy() calls with constant length so that the compiler can optimize these into inline loads/stores without having to make an out-of-line call to memcpy().
In this way, this CL should be a benefit to both correctness and performance. It will also make it easier to change the message representation, for example to optimize the encoder by giving hasbits to all fields.
Note that we have not completely consolidated all access in this CL:
1. Some functions outside of get/set such as clear and hazzers are not yet unified.
2. The encoder and decoder still touch the message without going through the common functions. The encoder and decoder require a bit more specialized code to get good performance when reading/writing fields en masse.
PiperOrigin-RevId: 490016095
2 years ago
const FileLayout& layout, Output& output) {
UPB_INLINE bool $0_has_$1(const struct $2* msg) {
return _upb_Message_HasExtensionField(msg, &$3);
ExtensionIdentBase(ext), ext->name(), MessageName(ext->containing_type()),
UPB_INLINE void $0_clear_$1(struct $2* msg) {
_upb_Message_ClearExtensionField(msg, &$3);
ExtensionIdentBase(ext), ext->name(), MessageName(ext->containing_type()),
if (ext->is_repeated()) {
Refactored message accessors to share a common set of functions instead of duplicating logic.
Prior to this CL, there were several different code paths for reading/writing message data. Generated code, MiniTable accessors, and reflection all performed direct manipulation of the bits and bytes in a message, but they all had distinct implementations that did not share much of any code. This divergence meant that they could easily have different behavior, bugs could creep into one but not another, and we would need three different sets of tests to get full test coverage. This also made it very difficult to change the internal representation in any way, since it would require updating many places in the code.
With this CL, the three different APIs for accessing message data now all share a common set of functions. The common functions all take a `upb_MiniTableField` as the canonical description of a field's type and layout. The lowest-level functions are very branchy, as they must test for every possible variation in the field type (field vs oneof, hasbit vs no-hasbit, different field sizes, whether a nonzero default value exists, extension vs. regular field), however these functions are declared inline and designed to be very optimizable when values are known at compile time.
In generated accessors, for example, we can declare constant `upb_MiniTableField` instances so that all values can constant-propagate, and we can get fully specialized code even though we are calling a generic function. On the other hand, when we use the generic functions from reflection, we get runtime branches since values are not known at compile time. But even the function is written to still be as efficient as possible even when used from reflection. For example, we use memcpy() calls with constant length so that the compiler can optimize these into inline loads/stores without having to make an out-of-line call to memcpy().
In this way, this CL should be a benefit to both correctness and performance. It will also make it easier to change the message representation, for example to optimize the encoder by giving hasbits to all fields.
Note that we have not completely consolidated all access in this CL:
1. Some functions outside of get/set such as clear and hazzers are not yet unified.
2. The encoder and decoder still touch the message without going through the common functions. The encoder and decoder require a bit more specialized code to get good performance when reading/writing fields en masse.
PiperOrigin-RevId: 490016095
2 years ago
// TODO(b/259861668): We need generated accessors for repeated extensions.
} else {
UPB_INLINE $0 $1_$2(const struct $3* msg) {
Refactored message accessors to share a common set of functions instead of duplicating logic.
Prior to this CL, there were several different code paths for reading/writing message data. Generated code, MiniTable accessors, and reflection all performed direct manipulation of the bits and bytes in a message, but they all had distinct implementations that did not share much of any code. This divergence meant that they could easily have different behavior, bugs could creep into one but not another, and we would need three different sets of tests to get full test coverage. This also made it very difficult to change the internal representation in any way, since it would require updating many places in the code.
With this CL, the three different APIs for accessing message data now all share a common set of functions. The common functions all take a `upb_MiniTableField` as the canonical description of a field's type and layout. The lowest-level functions are very branchy, as they must test for every possible variation in the field type (field vs oneof, hasbit vs no-hasbit, different field sizes, whether a nonzero default value exists, extension vs. regular field), however these functions are declared inline and designed to be very optimizable when values are known at compile time.
In generated accessors, for example, we can declare constant `upb_MiniTableField` instances so that all values can constant-propagate, and we can get fully specialized code even though we are calling a generic function. On the other hand, when we use the generic functions from reflection, we get runtime branches since values are not known at compile time. But even the function is written to still be as efficient as possible even when used from reflection. For example, we use memcpy() calls with constant length so that the compiler can optimize these into inline loads/stores without having to make an out-of-line call to memcpy().
In this way, this CL should be a benefit to both correctness and performance. It will also make it easier to change the message representation, for example to optimize the encoder by giving hasbits to all fields.
Note that we have not completely consolidated all access in this CL:
1. Some functions outside of get/set such as clear and hazzers are not yet unified.
2. The encoder and decoder still touch the message without going through the common functions. The encoder and decoder require a bit more specialized code to get good performance when reading/writing fields en masse.
PiperOrigin-RevId: 490016095
2 years ago
const upb_MiniTableExtension* ext = &$4;
UPB_ASSUME(_upb_MiniTableField_GetRep(&ext->field) == $5);
$0 default_val = $6;
$0 ret;
_upb_Message_GetExtensionField(msg, ext, &default_val, &ret);
Refactored message accessors to share a common set of functions instead of duplicating logic.
Prior to this CL, there were several different code paths for reading/writing message data. Generated code, MiniTable accessors, and reflection all performed direct manipulation of the bits and bytes in a message, but they all had distinct implementations that did not share much of any code. This divergence meant that they could easily have different behavior, bugs could creep into one but not another, and we would need three different sets of tests to get full test coverage. This also made it very difficult to change the internal representation in any way, since it would require updating many places in the code.
With this CL, the three different APIs for accessing message data now all share a common set of functions. The common functions all take a `upb_MiniTableField` as the canonical description of a field's type and layout. The lowest-level functions are very branchy, as they must test for every possible variation in the field type (field vs oneof, hasbit vs no-hasbit, different field sizes, whether a nonzero default value exists, extension vs. regular field), however these functions are declared inline and designed to be very optimizable when values are known at compile time.
In generated accessors, for example, we can declare constant `upb_MiniTableField` instances so that all values can constant-propagate, and we can get fully specialized code even though we are calling a generic function. On the other hand, when we use the generic functions from reflection, we get runtime branches since values are not known at compile time. But even the function is written to still be as efficient as possible even when used from reflection. For example, we use memcpy() calls with constant length so that the compiler can optimize these into inline loads/stores without having to make an out-of-line call to memcpy().
In this way, this CL should be a benefit to both correctness and performance. It will also make it easier to change the message representation, for example to optimize the encoder by giving hasbits to all fields.
Note that we have not completely consolidated all access in this CL:
1. Some functions outside of get/set such as clear and hazzers are not yet unified.
2. The encoder and decoder still touch the message without going through the common functions. The encoder and decoder require a bit more specialized code to get good performance when reading/writing fields en masse.
PiperOrigin-RevId: 490016095
2 years ago
return ret;
CTypeConst(ext), ExtensionIdentBase(ext), ext->name(),
MessageName(ext->containing_type()), ExtensionLayout(ext),
Refactored message accessors to share a common set of functions instead of duplicating logic.
Prior to this CL, there were several different code paths for reading/writing message data. Generated code, MiniTable accessors, and reflection all performed direct manipulation of the bits and bytes in a message, but they all had distinct implementations that did not share much of any code. This divergence meant that they could easily have different behavior, bugs could creep into one but not another, and we would need three different sets of tests to get full test coverage. This also made it very difficult to change the internal representation in any way, since it would require updating many places in the code.
With this CL, the three different APIs for accessing message data now all share a common set of functions. The common functions all take a `upb_MiniTableField` as the canonical description of a field's type and layout. The lowest-level functions are very branchy, as they must test for every possible variation in the field type (field vs oneof, hasbit vs no-hasbit, different field sizes, whether a nonzero default value exists, extension vs. regular field), however these functions are declared inline and designed to be very optimizable when values are known at compile time.
In generated accessors, for example, we can declare constant `upb_MiniTableField` instances so that all values can constant-propagate, and we can get fully specialized code even though we are calling a generic function. On the other hand, when we use the generic functions from reflection, we get runtime branches since values are not known at compile time. But even the function is written to still be as efficient as possible even when used from reflection. For example, we use memcpy() calls with constant length so that the compiler can optimize these into inline loads/stores without having to make an out-of-line call to memcpy().
In this way, this CL should be a benefit to both correctness and performance. It will also make it easier to change the message representation, for example to optimize the encoder by giving hasbits to all fields.
Note that we have not completely consolidated all access in this CL:
1. Some functions outside of get/set such as clear and hazzers are not yet unified.
2. The encoder and decoder still touch the message without going through the common functions. The encoder and decoder require a bit more specialized code to get good performance when reading/writing fields en masse.
PiperOrigin-RevId: 490016095
2 years ago
GetFieldRep(layout, ext), FieldDefault(ext));
Refactored message accessors to share a common set of functions instead of duplicating logic.
Prior to this CL, there were several different code paths for reading/writing message data. Generated code, MiniTable accessors, and reflection all performed direct manipulation of the bits and bytes in a message, but they all had distinct implementations that did not share much of any code. This divergence meant that they could easily have different behavior, bugs could creep into one but not another, and we would need three different sets of tests to get full test coverage. This also made it very difficult to change the internal representation in any way, since it would require updating many places in the code.
With this CL, the three different APIs for accessing message data now all share a common set of functions. The common functions all take a `upb_MiniTableField` as the canonical description of a field's type and layout. The lowest-level functions are very branchy, as they must test for every possible variation in the field type (field vs oneof, hasbit vs no-hasbit, different field sizes, whether a nonzero default value exists, extension vs. regular field), however these functions are declared inline and designed to be very optimizable when values are known at compile time.
In generated accessors, for example, we can declare constant `upb_MiniTableField` instances so that all values can constant-propagate, and we can get fully specialized code even though we are calling a generic function. On the other hand, when we use the generic functions from reflection, we get runtime branches since values are not known at compile time. But even the function is written to still be as efficient as possible even when used from reflection. For example, we use memcpy() calls with constant length so that the compiler can optimize these into inline loads/stores without having to make an out-of-line call to memcpy().
In this way, this CL should be a benefit to both correctness and performance. It will also make it easier to change the message representation, for example to optimize the encoder by giving hasbits to all fields.
Note that we have not completely consolidated all access in this CL:
1. Some functions outside of get/set such as clear and hazzers are not yet unified.
2. The encoder and decoder still touch the message without going through the common functions. The encoder and decoder require a bit more specialized code to get good performance when reading/writing fields en masse.
PiperOrigin-RevId: 490016095
2 years ago
UPB_INLINE void $1_set_$2(struct $3* msg, $0 val, upb_Arena* arena) {
const upb_MiniTableExtension* ext = &$4;
UPB_ASSUME(_upb_MiniTableField_GetRep(&ext->field) == $5);
bool ok = _upb_Message_SetExtensionField(msg, ext, &val, arena);
Refactored message accessors to share a common set of functions instead of duplicating logic.
Prior to this CL, there were several different code paths for reading/writing message data. Generated code, MiniTable accessors, and reflection all performed direct manipulation of the bits and bytes in a message, but they all had distinct implementations that did not share much of any code. This divergence meant that they could easily have different behavior, bugs could creep into one but not another, and we would need three different sets of tests to get full test coverage. This also made it very difficult to change the internal representation in any way, since it would require updating many places in the code.
With this CL, the three different APIs for accessing message data now all share a common set of functions. The common functions all take a `upb_MiniTableField` as the canonical description of a field's type and layout. The lowest-level functions are very branchy, as they must test for every possible variation in the field type (field vs oneof, hasbit vs no-hasbit, different field sizes, whether a nonzero default value exists, extension vs. regular field), however these functions are declared inline and designed to be very optimizable when values are known at compile time.
In generated accessors, for example, we can declare constant `upb_MiniTableField` instances so that all values can constant-propagate, and we can get fully specialized code even though we are calling a generic function. On the other hand, when we use the generic functions from reflection, we get runtime branches since values are not known at compile time. But even the function is written to still be as efficient as possible even when used from reflection. For example, we use memcpy() calls with constant length so that the compiler can optimize these into inline loads/stores without having to make an out-of-line call to memcpy().
In this way, this CL should be a benefit to both correctness and performance. It will also make it easier to change the message representation, for example to optimize the encoder by giving hasbits to all fields.
Note that we have not completely consolidated all access in this CL:
1. Some functions outside of get/set such as clear and hazzers are not yet unified.
2. The encoder and decoder still touch the message without going through the common functions. The encoder and decoder require a bit more specialized code to get good performance when reading/writing fields en masse.
PiperOrigin-RevId: 490016095
2 years ago
CTypeConst(ext), ExtensionIdentBase(ext), ext->name(),
MessageName(ext->containing_type()), ExtensionLayout(ext),
Refactored message accessors to share a common set of functions instead of duplicating logic.
Prior to this CL, there were several different code paths for reading/writing message data. Generated code, MiniTable accessors, and reflection all performed direct manipulation of the bits and bytes in a message, but they all had distinct implementations that did not share much of any code. This divergence meant that they could easily have different behavior, bugs could creep into one but not another, and we would need three different sets of tests to get full test coverage. This also made it very difficult to change the internal representation in any way, since it would require updating many places in the code.
With this CL, the three different APIs for accessing message data now all share a common set of functions. The common functions all take a `upb_MiniTableField` as the canonical description of a field's type and layout. The lowest-level functions are very branchy, as they must test for every possible variation in the field type (field vs oneof, hasbit vs no-hasbit, different field sizes, whether a nonzero default value exists, extension vs. regular field), however these functions are declared inline and designed to be very optimizable when values are known at compile time.
In generated accessors, for example, we can declare constant `upb_MiniTableField` instances so that all values can constant-propagate, and we can get fully specialized code even though we are calling a generic function. On the other hand, when we use the generic functions from reflection, we get runtime branches since values are not known at compile time. But even the function is written to still be as efficient as possible even when used from reflection. For example, we use memcpy() calls with constant length so that the compiler can optimize these into inline loads/stores without having to make an out-of-line call to memcpy().
In this way, this CL should be a benefit to both correctness and performance. It will also make it easier to change the message representation, for example to optimize the encoder by giving hasbits to all fields.
Note that we have not completely consolidated all access in this CL:
1. Some functions outside of get/set such as clear and hazzers are not yet unified.
2. The encoder and decoder still touch the message without going through the common functions. The encoder and decoder require a bit more specialized code to get good performance when reading/writing fields en masse.
PiperOrigin-RevId: 490016095
2 years ago
GetFieldRep(layout, ext));
void GenerateMessageFunctionsInHeader(const protobuf::Descriptor* message,
Output& output) {
// TODO(b/235839510): The generated code here does not check the return values
// from upb_Encode(). How can we even fix this without breaking other things?
UPB_INLINE $0* $0_new(upb_Arena* arena) {
return ($0*)_upb_Message_New(&$1, arena);
UPB_INLINE $0* $0_parse(const char* buf, size_t size, upb_Arena* arena) {
$0* ret = $0_new(arena);
if (!ret) return NULL;
if (upb_Decode(buf, size, ret, &$1, NULL, 0, arena) != kUpb_DecodeStatus_Ok) {
return NULL;
return ret;
UPB_INLINE $0* $0_parse_ex(const char* buf, size_t size,
const upb_ExtensionRegistry* extreg,
int options, upb_Arena* arena) {
$0* ret = $0_new(arena);
if (!ret) return NULL;
if (upb_Decode(buf, size, ret, &$1, extreg, options, arena) !=
kUpb_DecodeStatus_Ok) {
return NULL;
return ret;
UPB_INLINE char* $0_serialize(const $0* msg, upb_Arena* arena, size_t* len) {
char* ptr;
(void)upb_Encode(msg, &$1, 0, arena, &ptr, len);
return ptr;
UPB_INLINE char* $0_serialize_ex(const $0* msg, int options,
upb_Arena* arena, size_t* len) {
char* ptr;
(void)upb_Encode(msg, &$1, options, arena, &ptr, len);
return ptr;
MessageName(message), MessageInit(message));
void GenerateOneofInHeader(const protobuf::OneofDescriptor* oneof,
const FileLayout& layout, absl::string_view msg_name,
Output& output) {
std::string fullname = ToCIdent(oneof->full_name());
output("typedef enum {\n");
for (int j = 0; j < oneof->field_count(); j++) {
const protobuf::FieldDescriptor* field = oneof->field(j);
output(" $0_$1 = $2,\n", fullname, field->name(), field->number());
" $0_NOT_SET = 0\n"
"} $0_oneofcases;\n",
UPB_INLINE $0_oneofcases $1_$2_case(const $1* msg) {
const upb_MiniTableField field = $3;
return ($0_oneofcases)upb_Message_WhichOneofFieldNumber(msg, &field);
fullname, msg_name, oneof->name(),
FieldInitializer(layout, oneof->field(0)));
void GenerateHazzer(const protobuf::FieldDescriptor* field,
const FileLayout& layout, absl::string_view msg_name,
const NameToFieldDescriptorMap& field_names,
Output& output) {
std::string resolved_name = ResolveFieldName(field, field_names);
if (field->has_presence()) {
UPB_INLINE bool $0_has_$1(const $0* msg) {
const upb_MiniTableField field = $2;
return _upb_Message_HasNonExtensionField(msg, &field);
msg_name, resolved_name, FieldInitializer(layout, field));
} else if (field->is_map()) {
// TODO(b/259616267): remove.
UPB_INLINE bool $0_has_$1(const $0* msg) {
return $0_$1_size(msg) != 0;
msg_name, resolved_name);
} else if (field->is_repeated()) {
// TODO(b/259616267): remove.
UPB_INLINE bool $0_has_$1(const $0* msg) {
size_t size;
$0_$1(msg, &size);
return size != 0;
msg_name, resolved_name);
void GenerateClear(const protobuf::FieldDescriptor* field,
const FileLayout& layout, absl::string_view msg_name,
const NameToFieldDescriptorMap& field_names,
Output& output) {
if (field == field->containing_type()->map_key() ||
field == field->containing_type()->map_value()) {
// Cannot be cleared.
std::string resolved_name = ResolveFieldName(field, field_names);
UPB_INLINE void $0_clear_$1($0* msg) {
const upb_MiniTableField field = $2;
_upb_Message_ClearNonExtensionField(msg, &field);
msg_name, resolved_name, FieldInitializer(layout, field));
void GenerateMapGetters(const protobuf::FieldDescriptor* field,
const FileLayout& layout, absl::string_view msg_name,
const NameToFieldDescriptorMap& field_names,
Output& output) {
std::string resolved_name = ResolveFieldName(field, field_names);
UPB_INLINE size_t $0_$1_size(const $0* msg) {
const upb_MiniTableField field = $2;
const upb_Map* map = upb_Message_GetMap(msg, &field);
return map ? _upb_Map_Size(map) : 0;
msg_name, resolved_name, FieldInitializer(layout, field));
UPB_INLINE bool $0_$1_get(const $0* msg, $2 key, $3* val) {
const upb_MiniTableField field = $4;
const upb_Map* map = upb_Message_GetMap(msg, &field);
if (!map) return false;
return _upb_Map_Get(map, &key, $5, val, $6);
msg_name, resolved_name, MapKeyCType(field), MapValueCType(field),
FieldInitializer(layout, field), MapKeySize(field, "key"),
MapValueSize(field, "*val"));
UPB_INLINE $0 $1_$2_next(const $1* msg, size_t* iter) {
const upb_MiniTableField field = $3;
const upb_Map* map = upb_Message_GetMap(msg, &field);
if (!map) return NULL;
return ($0)_upb_map_next(map, iter);
CTypeConst(field), msg_name, resolved_name,
FieldInitializer(layout, field));
void GenerateMapEntryGetters(const protobuf::FieldDescriptor* field,
absl::string_view msg_name, Output& output) {
UPB_INLINE $0 $1_$2(const $1* msg) {
$3 ret;
_upb_msg_map_$2(msg, &ret, $4);
return ret;
CTypeConst(field), msg_name, field->name(), CType(field),
field->cpp_type() == protobuf::FieldDescriptor::CPPTYPE_STRING
? "0"
: "sizeof(ret)");
void GenerateRepeatedGetters(const protobuf::FieldDescriptor* field,
const FileLayout& layout,
absl::string_view msg_name,
const NameToFieldDescriptorMap& field_names,
Output& output) {
UPB_INLINE $0 const* $1_$2(const $1* msg, size_t* len) {
return ($0 const*)_upb_array_accessor(msg, $3, len);
CTypeConst(field), msg_name, ResolveFieldName(field, field_names),
void GenerateScalarGetters(const protobuf::FieldDescriptor* field,
const FileLayout& layout, absl::string_view msg_name,
const NameToFieldDescriptorMap& field_names,
Output& output) {
Refactored message accessors to share a common set of functions instead of duplicating logic.
Prior to this CL, there were several different code paths for reading/writing message data. Generated code, MiniTable accessors, and reflection all performed direct manipulation of the bits and bytes in a message, but they all had distinct implementations that did not share much of any code. This divergence meant that they could easily have different behavior, bugs could creep into one but not another, and we would need three different sets of tests to get full test coverage. This also made it very difficult to change the internal representation in any way, since it would require updating many places in the code.
With this CL, the three different APIs for accessing message data now all share a common set of functions. The common functions all take a `upb_MiniTableField` as the canonical description of a field's type and layout. The lowest-level functions are very branchy, as they must test for every possible variation in the field type (field vs oneof, hasbit vs no-hasbit, different field sizes, whether a nonzero default value exists, extension vs. regular field), however these functions are declared inline and designed to be very optimizable when values are known at compile time.
In generated accessors, for example, we can declare constant `upb_MiniTableField` instances so that all values can constant-propagate, and we can get fully specialized code even though we are calling a generic function. On the other hand, when we use the generic functions from reflection, we get runtime branches since values are not known at compile time. But even the function is written to still be as efficient as possible even when used from reflection. For example, we use memcpy() calls with constant length so that the compiler can optimize these into inline loads/stores without having to make an out-of-line call to memcpy().
In this way, this CL should be a benefit to both correctness and performance. It will also make it easier to change the message representation, for example to optimize the encoder by giving hasbits to all fields.
Note that we have not completely consolidated all access in this CL:
1. Some functions outside of get/set such as clear and hazzers are not yet unified.
2. The encoder and decoder still touch the message without going through the common functions. The encoder and decoder require a bit more specialized code to get good performance when reading/writing fields en masse.
PiperOrigin-RevId: 490016095
2 years ago
std::string field_name = ResolveFieldName(field, field_names);
UPB_INLINE $0 $1_$2(const $1* msg) {
Refactored message accessors to share a common set of functions instead of duplicating logic.
Prior to this CL, there were several different code paths for reading/writing message data. Generated code, MiniTable accessors, and reflection all performed direct manipulation of the bits and bytes in a message, but they all had distinct implementations that did not share much of any code. This divergence meant that they could easily have different behavior, bugs could creep into one but not another, and we would need three different sets of tests to get full test coverage. This also made it very difficult to change the internal representation in any way, since it would require updating many places in the code.
With this CL, the three different APIs for accessing message data now all share a common set of functions. The common functions all take a `upb_MiniTableField` as the canonical description of a field's type and layout. The lowest-level functions are very branchy, as they must test for every possible variation in the field type (field vs oneof, hasbit vs no-hasbit, different field sizes, whether a nonzero default value exists, extension vs. regular field), however these functions are declared inline and designed to be very optimizable when values are known at compile time.
In generated accessors, for example, we can declare constant `upb_MiniTableField` instances so that all values can constant-propagate, and we can get fully specialized code even though we are calling a generic function. On the other hand, when we use the generic functions from reflection, we get runtime branches since values are not known at compile time. But even the function is written to still be as efficient as possible even when used from reflection. For example, we use memcpy() calls with constant length so that the compiler can optimize these into inline loads/stores without having to make an out-of-line call to memcpy().
In this way, this CL should be a benefit to both correctness and performance. It will also make it easier to change the message representation, for example to optimize the encoder by giving hasbits to all fields.
Note that we have not completely consolidated all access in this CL:
1. Some functions outside of get/set such as clear and hazzers are not yet unified.
2. The encoder and decoder still touch the message without going through the common functions. The encoder and decoder require a bit more specialized code to get good performance when reading/writing fields en masse.
PiperOrigin-RevId: 490016095
2 years ago
$0 default_val = $3;
$0 ret;
const upb_MiniTableField field = $4;
_upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
Refactored message accessors to share a common set of functions instead of duplicating logic.
Prior to this CL, there were several different code paths for reading/writing message data. Generated code, MiniTable accessors, and reflection all performed direct manipulation of the bits and bytes in a message, but they all had distinct implementations that did not share much of any code. This divergence meant that they could easily have different behavior, bugs could creep into one but not another, and we would need three different sets of tests to get full test coverage. This also made it very difficult to change the internal representation in any way, since it would require updating many places in the code.
With this CL, the three different APIs for accessing message data now all share a common set of functions. The common functions all take a `upb_MiniTableField` as the canonical description of a field's type and layout. The lowest-level functions are very branchy, as they must test for every possible variation in the field type (field vs oneof, hasbit vs no-hasbit, different field sizes, whether a nonzero default value exists, extension vs. regular field), however these functions are declared inline and designed to be very optimizable when values are known at compile time.
In generated accessors, for example, we can declare constant `upb_MiniTableField` instances so that all values can constant-propagate, and we can get fully specialized code even though we are calling a generic function. On the other hand, when we use the generic functions from reflection, we get runtime branches since values are not known at compile time. But even the function is written to still be as efficient as possible even when used from reflection. For example, we use memcpy() calls with constant length so that the compiler can optimize these into inline loads/stores without having to make an out-of-line call to memcpy().
In this way, this CL should be a benefit to both correctness and performance. It will also make it easier to change the message representation, for example to optimize the encoder by giving hasbits to all fields.
Note that we have not completely consolidated all access in this CL:
1. Some functions outside of get/set such as clear and hazzers are not yet unified.
2. The encoder and decoder still touch the message without going through the common functions. The encoder and decoder require a bit more specialized code to get good performance when reading/writing fields en masse.
PiperOrigin-RevId: 490016095
2 years ago
return ret;
CTypeConst(field), msg_name, field_name, FieldDefault(field),
FieldInitializer(layout, field));
void GenerateGetters(const protobuf::FieldDescriptor* field,
const FileLayout& layout, absl::string_view msg_name,
const NameToFieldDescriptorMap& field_names,
Output& output) {
if (field->is_map()) {
GenerateMapGetters(field, layout, msg_name, field_names, output);
} else if (field->containing_type()->options().map_entry()) {
GenerateMapEntryGetters(field, msg_name, output);
} else if (field->is_repeated()) {
GenerateRepeatedGetters(field, layout, msg_name, field_names, output);
} else {
GenerateScalarGetters(field, layout, msg_name, field_names, output);
void GenerateMapSetters(const protobuf::FieldDescriptor* field,
const FileLayout& layout, absl::string_view msg_name,
const NameToFieldDescriptorMap& field_names,
Output& output) {
std::string resolved_name = ResolveFieldName(field, field_names);
UPB_INLINE void $0_$1_clear($0* msg) {
const upb_MiniTableField field = $2;
upb_Map* map = (upb_Map*)upb_Message_GetMap(msg, &field);
if (!map) return;
msg_name, resolved_name, FieldInitializer(layout, field));
UPB_INLINE bool $0_$1_set($0* msg, $2 key, $3 val, upb_Arena* a) {
const upb_MiniTableField field = $4;
upb_Map* map = _upb_MiniTable_GetOrCreateMutableMap(msg, &field, $5, $6, a);
return _upb_Map_Insert(map, &key, $5, &val, $6, a) !=
msg_name, resolved_name, MapKeyCType(field), MapValueCType(field),
FieldInitializer(layout, field), MapKeySize(field, "key"),
MapValueSize(field, "val"));
UPB_INLINE bool $0_$1_delete($0* msg, $2 key) {
const upb_MiniTableField field = $3;
upb_Map* map = (upb_Map*)upb_Message_GetMap(msg, &field);
if (!map) return false;
return _upb_Map_Delete(map, &key, $4, NULL);
msg_name, resolved_name, MapKeyCType(field),
FieldInitializer(layout, field), MapKeySize(field, "key"));
UPB_INLINE $0 $1_$2_nextmutable($1* msg, size_t* iter) {
const upb_MiniTableField field = $3;
upb_Map* map = (upb_Map*)upb_Message_GetMap(msg, &field);
if (!map) return NULL;
return ($0)_upb_map_next(map, iter);
CType(field), msg_name, resolved_name, FieldInitializer(layout, field));
void GenerateRepeatedSetters(const protobuf::FieldDescriptor* field,
const FileLayout& layout,
absl::string_view msg_name,
const NameToFieldDescriptorMap& field_names,
Output& output) {
std::string resolved_name = ResolveFieldName(field, field_names);
UPB_INLINE $0* $1_mutable_$2($1* msg, size_t* len) {
return ($0*)_upb_array_mutable_accessor(msg, $3, len);
CType(field), msg_name, resolved_name, layout.GetFieldOffset(field));
UPB_INLINE $0* $1_resize_$2($1* msg, size_t len, upb_Arena* arena) {
return ($0*)_upb_Array_Resize_accessor2(msg, $3, len, $4, arena);
CType(field), msg_name, resolved_name, layout.GetFieldOffset(field),
if (field->cpp_type() == protobuf::FieldDescriptor::CPPTYPE_MESSAGE) {
UPB_INLINE struct $0* $1_add_$2($1* msg, upb_Arena* arena) {
struct $0* sub = (struct $0*)_upb_Message_New(&$3, arena);
bool ok = _upb_Array_Append_accessor2(msg, $4, $5, &sub, arena);
if (!ok) return NULL;
return sub;
MessageName(field->message_type()), msg_name, resolved_name,
MessageInit(field->message_type()), layout.GetFieldOffset(field),
} else {
UPB_INLINE bool $1_add_$2($1* msg, $0 val, upb_Arena* arena) {
return _upb_Array_Append_accessor2(msg, $3, $4, &val, arena);
CType(field), msg_name, resolved_name, layout.GetFieldOffset(field),
void GenerateNonRepeatedSetters(const protobuf::FieldDescriptor* field,
const FileLayout& layout,
absl::string_view msg_name,
const NameToFieldDescriptorMap& field_names,
Output& output) {
if (field == field->containing_type()->map_key()) {
// Key cannot be mutated.
Refactored message accessors to share a common set of functions instead of duplicating logic.
Prior to this CL, there were several different code paths for reading/writing message data. Generated code, MiniTable accessors, and reflection all performed direct manipulation of the bits and bytes in a message, but they all had distinct implementations that did not share much of any code. This divergence meant that they could easily have different behavior, bugs could creep into one but not another, and we would need three different sets of tests to get full test coverage. This also made it very difficult to change the internal representation in any way, since it would require updating many places in the code.
With this CL, the three different APIs for accessing message data now all share a common set of functions. The common functions all take a `upb_MiniTableField` as the canonical description of a field's type and layout. The lowest-level functions are very branchy, as they must test for every possible variation in the field type (field vs oneof, hasbit vs no-hasbit, different field sizes, whether a nonzero default value exists, extension vs. regular field), however these functions are declared inline and designed to be very optimizable when values are known at compile time.
In generated accessors, for example, we can declare constant `upb_MiniTableField` instances so that all values can constant-propagate, and we can get fully specialized code even though we are calling a generic function. On the other hand, when we use the generic functions from reflection, we get runtime branches since values are not known at compile time. But even the function is written to still be as efficient as possible even when used from reflection. For example, we use memcpy() calls with constant length so that the compiler can optimize these into inline loads/stores without having to make an out-of-line call to memcpy().
In this way, this CL should be a benefit to both correctness and performance. It will also make it easier to change the message representation, for example to optimize the encoder by giving hasbits to all fields.
Note that we have not completely consolidated all access in this CL:
1. Some functions outside of get/set such as clear and hazzers are not yet unified.
2. The encoder and decoder still touch the message without going through the common functions. The encoder and decoder require a bit more specialized code to get good performance when reading/writing fields en masse.
PiperOrigin-RevId: 490016095
2 years ago
std::string field_name = ResolveFieldName(field, field_names);
if (field == field->containing_type()->map_value()) {
Refactored message accessors to share a common set of functions instead of duplicating logic.
Prior to this CL, there were several different code paths for reading/writing message data. Generated code, MiniTable accessors, and reflection all performed direct manipulation of the bits and bytes in a message, but they all had distinct implementations that did not share much of any code. This divergence meant that they could easily have different behavior, bugs could creep into one but not another, and we would need three different sets of tests to get full test coverage. This also made it very difficult to change the internal representation in any way, since it would require updating many places in the code.
With this CL, the three different APIs for accessing message data now all share a common set of functions. The common functions all take a `upb_MiniTableField` as the canonical description of a field's type and layout. The lowest-level functions are very branchy, as they must test for every possible variation in the field type (field vs oneof, hasbit vs no-hasbit, different field sizes, whether a nonzero default value exists, extension vs. regular field), however these functions are declared inline and designed to be very optimizable when values are known at compile time.
In generated accessors, for example, we can declare constant `upb_MiniTableField` instances so that all values can constant-propagate, and we can get fully specialized code even though we are calling a generic function. On the other hand, when we use the generic functions from reflection, we get runtime branches since values are not known at compile time. But even the function is written to still be as efficient as possible even when used from reflection. For example, we use memcpy() calls with constant length so that the compiler can optimize these into inline loads/stores without having to make an out-of-line call to memcpy().
In this way, this CL should be a benefit to both correctness and performance. It will also make it easier to change the message representation, for example to optimize the encoder by giving hasbits to all fields.
Note that we have not completely consolidated all access in this CL:
1. Some functions outside of get/set such as clear and hazzers are not yet unified.
2. The encoder and decoder still touch the message without going through the common functions. The encoder and decoder require a bit more specialized code to get good performance when reading/writing fields en masse.
PiperOrigin-RevId: 490016095
2 years ago
UPB_INLINE void $0_set_$1($0 *msg, $2 value) {
_upb_msg_map_set_value(msg, &value, $3);
msg_name, field_name, CType(field),
field->cpp_type() == protobuf::FieldDescriptor::CPPTYPE_STRING
? "0"
: "sizeof(" + CType(field) + ")");
} else {
Refactored message accessors to share a common set of functions instead of duplicating logic.
Prior to this CL, there were several different code paths for reading/writing message data. Generated code, MiniTable accessors, and reflection all performed direct manipulation of the bits and bytes in a message, but they all had distinct implementations that did not share much of any code. This divergence meant that they could easily have different behavior, bugs could creep into one but not another, and we would need three different sets of tests to get full test coverage. This also made it very difficult to change the internal representation in any way, since it would require updating many places in the code.
With this CL, the three different APIs for accessing message data now all share a common set of functions. The common functions all take a `upb_MiniTableField` as the canonical description of a field's type and layout. The lowest-level functions are very branchy, as they must test for every possible variation in the field type (field vs oneof, hasbit vs no-hasbit, different field sizes, whether a nonzero default value exists, extension vs. regular field), however these functions are declared inline and designed to be very optimizable when values are known at compile time.
In generated accessors, for example, we can declare constant `upb_MiniTableField` instances so that all values can constant-propagate, and we can get fully specialized code even though we are calling a generic function. On the other hand, when we use the generic functions from reflection, we get runtime branches since values are not known at compile time. But even the function is written to still be as efficient as possible even when used from reflection. For example, we use memcpy() calls with constant length so that the compiler can optimize these into inline loads/stores without having to make an out-of-line call to memcpy().
In this way, this CL should be a benefit to both correctness and performance. It will also make it easier to change the message representation, for example to optimize the encoder by giving hasbits to all fields.
Note that we have not completely consolidated all access in this CL:
1. Some functions outside of get/set such as clear and hazzers are not yet unified.
2. The encoder and decoder still touch the message without going through the common functions. The encoder and decoder require a bit more specialized code to get good performance when reading/writing fields en masse.
PiperOrigin-RevId: 490016095
2 years ago
UPB_INLINE void $0_set_$1($0 *msg, $2 value) {
const upb_MiniTableField field = $3;
_upb_Message_SetNonExtensionField(msg, &field, &value);
Refactored message accessors to share a common set of functions instead of duplicating logic.
Prior to this CL, there were several different code paths for reading/writing message data. Generated code, MiniTable accessors, and reflection all performed direct manipulation of the bits and bytes in a message, but they all had distinct implementations that did not share much of any code. This divergence meant that they could easily have different behavior, bugs could creep into one but not another, and we would need three different sets of tests to get full test coverage. This also made it very difficult to change the internal representation in any way, since it would require updating many places in the code.
With this CL, the three different APIs for accessing message data now all share a common set of functions. The common functions all take a `upb_MiniTableField` as the canonical description of a field's type and layout. The lowest-level functions are very branchy, as they must test for every possible variation in the field type (field vs oneof, hasbit vs no-hasbit, different field sizes, whether a nonzero default value exists, extension vs. regular field), however these functions are declared inline and designed to be very optimizable when values are known at compile time.
In generated accessors, for example, we can declare constant `upb_MiniTableField` instances so that all values can constant-propagate, and we can get fully specialized code even though we are calling a generic function. On the other hand, when we use the generic functions from reflection, we get runtime branches since values are not known at compile time. But even the function is written to still be as efficient as possible even when used from reflection. For example, we use memcpy() calls with constant length so that the compiler can optimize these into inline loads/stores without having to make an out-of-line call to memcpy().
In this way, this CL should be a benefit to both correctness and performance. It will also make it easier to change the message representation, for example to optimize the encoder by giving hasbits to all fields.
Note that we have not completely consolidated all access in this CL:
1. Some functions outside of get/set such as clear and hazzers are not yet unified.
2. The encoder and decoder still touch the message without going through the common functions. The encoder and decoder require a bit more specialized code to get good performance when reading/writing fields en masse.
PiperOrigin-RevId: 490016095
2 years ago
msg_name, field_name, CType(field), FieldInitializer(layout, field));
// Message fields also have a Msg_mutable_foo() accessor that will create
// the sub-message if it doesn't already exist.
if (field->cpp_type() == protobuf::FieldDescriptor::CPPTYPE_MESSAGE &&
!field->containing_type()->options().map_entry()) {
UPB_INLINE struct $0* $1_mutable_$2($1* msg, upb_Arena* arena) {
struct $0* sub = (struct $0*)$1_$2(msg);
if (sub == NULL) {
sub = (struct $0*)_upb_Message_New(&$3, arena);
Refactored message accessors to share a common set of functions instead of duplicating logic.
Prior to this CL, there were several different code paths for reading/writing message data. Generated code, MiniTable accessors, and reflection all performed direct manipulation of the bits and bytes in a message, but they all had distinct implementations that did not share much of any code. This divergence meant that they could easily have different behavior, bugs could creep into one but not another, and we would need three different sets of tests to get full test coverage. This also made it very difficult to change the internal representation in any way, since it would require updating many places in the code.
With this CL, the three different APIs for accessing message data now all share a common set of functions. The common functions all take a `upb_MiniTableField` as the canonical description of a field's type and layout. The lowest-level functions are very branchy, as they must test for every possible variation in the field type (field vs oneof, hasbit vs no-hasbit, different field sizes, whether a nonzero default value exists, extension vs. regular field), however these functions are declared inline and designed to be very optimizable when values are known at compile time.
In generated accessors, for example, we can declare constant `upb_MiniTableField` instances so that all values can constant-propagate, and we can get fully specialized code even though we are calling a generic function. On the other hand, when we use the generic functions from reflection, we get runtime branches since values are not known at compile time. But even the function is written to still be as efficient as possible even when used from reflection. For example, we use memcpy() calls with constant length so that the compiler can optimize these into inline loads/stores without having to make an out-of-line call to memcpy().
In this way, this CL should be a benefit to both correctness and performance. It will also make it easier to change the message representation, for example to optimize the encoder by giving hasbits to all fields.
Note that we have not completely consolidated all access in this CL:
1. Some functions outside of get/set such as clear and hazzers are not yet unified.
2. The encoder and decoder still touch the message without going through the common functions. The encoder and decoder require a bit more specialized code to get good performance when reading/writing fields en masse.
PiperOrigin-RevId: 490016095
2 years ago
if (sub) $1_set_$2(msg, sub);
return sub;
Refactored message accessors to share a common set of functions instead of duplicating logic.
Prior to this CL, there were several different code paths for reading/writing message data. Generated code, MiniTable accessors, and reflection all performed direct manipulation of the bits and bytes in a message, but they all had distinct implementations that did not share much of any code. This divergence meant that they could easily have different behavior, bugs could creep into one but not another, and we would need three different sets of tests to get full test coverage. This also made it very difficult to change the internal representation in any way, since it would require updating many places in the code.
With this CL, the three different APIs for accessing message data now all share a common set of functions. The common functions all take a `upb_MiniTableField` as the canonical description of a field's type and layout. The lowest-level functions are very branchy, as they must test for every possible variation in the field type (field vs oneof, hasbit vs no-hasbit, different field sizes, whether a nonzero default value exists, extension vs. regular field), however these functions are declared inline and designed to be very optimizable when values are known at compile time.
In generated accessors, for example, we can declare constant `upb_MiniTableField` instances so that all values can constant-propagate, and we can get fully specialized code even though we are calling a generic function. On the other hand, when we use the generic functions from reflection, we get runtime branches since values are not known at compile time. But even the function is written to still be as efficient as possible even when used from reflection. For example, we use memcpy() calls with constant length so that the compiler can optimize these into inline loads/stores without having to make an out-of-line call to memcpy().
In this way, this CL should be a benefit to both correctness and performance. It will also make it easier to change the message representation, for example to optimize the encoder by giving hasbits to all fields.
Note that we have not completely consolidated all access in this CL:
1. Some functions outside of get/set such as clear and hazzers are not yet unified.
2. The encoder and decoder still touch the message without going through the common functions. The encoder and decoder require a bit more specialized code to get good performance when reading/writing fields en masse.
PiperOrigin-RevId: 490016095
2 years ago
MessageName(field->message_type()), msg_name, field_name,
void GenerateSetters(const protobuf::FieldDescriptor* field,
const FileLayout& layout, absl::string_view msg_name,
const NameToFieldDescriptorMap& field_names,
Output& output) {
if (field->is_map()) {
GenerateMapSetters(field, layout, msg_name, field_names, output);
} else if (field->is_repeated()) {
GenerateRepeatedSetters(field, layout, msg_name, field_names, output);
} else {
GenerateNonRepeatedSetters(field, layout, msg_name, field_names, output);
void GenerateMessageInHeader(const protobuf::Descriptor* message,
const FileLayout& layout, Output& output) {
output("/* $0 */\n\n", message->full_name());
std::string msg_name = ToCIdent(message->full_name());
if (!message->options().map_entry()) {
GenerateMessageFunctionsInHeader(message, output);
for (int i = 0; i < message->real_oneof_decl_count(); i++) {
GenerateOneofInHeader(message->oneof_decl(i), layout, msg_name, output);
auto field_names = CreateFieldNameMap(message);
for (auto field : FieldNumberOrder(message)) {
GenerateClear(field, layout, msg_name, field_names, output);
GenerateGetters(field, layout, msg_name, field_names, output);
GenerateHazzer(field, layout, msg_name, field_names, output);
for (auto field : FieldNumberOrder(message)) {
GenerateSetters(field, layout, msg_name, field_names, output);
void WriteHeader(const FileLayout& layout, Output& output) {
const protobuf::FileDescriptor* file = layout.descriptor();
EmitFileWarning(file, output);
"#ifndef $0_UPB_H_\n"
"#define $0_UPB_H_\n\n"
"#include \"upb/collections/array_internal.h\"\n"
"#include \"upb/collections/map_gencode_util.h\"\n"
"#include \"upb/message/accessors.h\"\n"
"#include \"upb/message/internal.h\"\n"
"#include \"upb/mini_table/enum_internal.h\"\n"
"#include \"upb/wire/decode.h\"\n"
"#include \"upb/wire/decode_fast.h\"\n"
"#include \"upb/wire/encode.h\"\n\n",
for (int i = 0; i < file->public_dependency_count(); i++) {
if (i == 0) {
output("/* Public Imports. */\n");
output("#include \"$0\"\n", HeaderFilename(file->public_dependency(i)));
if (i == file->public_dependency_count() - 1) {
"#include \"upb/port/def.inc\"\n"
"#ifdef __cplusplus\n"
"extern \"C\" {\n"
const std::vector<const protobuf::Descriptor*> this_file_messages =
const std::vector<const protobuf::FieldDescriptor*> this_file_exts =
// Forward-declare types defined in this file.
for (auto message : this_file_messages) {
output("typedef struct $0 $0;\n", ToCIdent(message->full_name()));
for (auto message : this_file_messages) {
output("extern const upb_MiniTable $0;\n", MessageInit(message));
for (auto ext : this_file_exts) {
output("extern const upb_MiniTableExtension $0;\n", ExtensionLayout(ext));
// Forward-declare types not in this file, but used as submessages.
// Order by full name for consistent ordering.
std::map<std::string, const protobuf::Descriptor*> forward_messages;
for (auto* message : this_file_messages) {
for (int i = 0; i < message->field_count(); i++) {
const protobuf::FieldDescriptor* field = message->field(i);
if (field->cpp_type() == protobuf::FieldDescriptor::CPPTYPE_MESSAGE &&
field->file() != field->message_type()->file()) {
forward_messages[field->message_type()->full_name()] =
for (auto ext : this_file_exts) {
if (ext->file() != ext->containing_type()->file()) {
forward_messages[ext->containing_type()->full_name()] =
for (const auto& pair : forward_messages) {
output("struct $0;\n", MessageName(pair.second));
for (const auto& pair : forward_messages) {
output("extern const upb_MiniTable $0;\n", MessageInit(pair.second));
if (!this_file_messages.empty()) {
std::vector<const protobuf::EnumDescriptor*> this_file_enums =
this_file_enums.begin(), this_file_enums.end(),
[](const protobuf::EnumDescriptor* a, const protobuf::EnumDescriptor* b) {
return a->full_name() < b->full_name();
for (auto enumdesc : this_file_enums) {
output("typedef enum {\n");
DumpEnumValues(enumdesc, output);
output("} $0;\n\n", ToCIdent(enumdesc->full_name()));
if (file->syntax() == protobuf::FileDescriptor::SYNTAX_PROTO2) {
for (const auto* enumdesc : this_file_enums) {
output("extern const upb_MiniTableEnum $0;\n", EnumInit(enumdesc));
for (auto message : this_file_messages) {
GenerateMessageInHeader(message, layout, output);
for (auto ext : this_file_exts) {
Refactored message accessors to share a common set of functions instead of duplicating logic.
Prior to this CL, there were several different code paths for reading/writing message data. Generated code, MiniTable accessors, and reflection all performed direct manipulation of the bits and bytes in a message, but they all had distinct implementations that did not share much of any code. This divergence meant that they could easily have different behavior, bugs could creep into one but not another, and we would need three different sets of tests to get full test coverage. This also made it very difficult to change the internal representation in any way, since it would require updating many places in the code.
With this CL, the three different APIs for accessing message data now all share a common set of functions. The common functions all take a `upb_MiniTableField` as the canonical description of a field's type and layout. The lowest-level functions are very branchy, as they must test for every possible variation in the field type (field vs oneof, hasbit vs no-hasbit, different field sizes, whether a nonzero default value exists, extension vs. regular field), however these functions are declared inline and designed to be very optimizable when values are known at compile time.
In generated accessors, for example, we can declare constant `upb_MiniTableField` instances so that all values can constant-propagate, and we can get fully specialized code even though we are calling a generic function. On the other hand, when we use the generic functions from reflection, we get runtime branches since values are not known at compile time. But even the function is written to still be as efficient as possible even when used from reflection. For example, we use memcpy() calls with constant length so that the compiler can optimize these into inline loads/stores without having to make an out-of-line call to memcpy().
In this way, this CL should be a benefit to both correctness and performance. It will also make it easier to change the message representation, for example to optimize the encoder by giving hasbits to all fields.
Note that we have not completely consolidated all access in this CL:
1. Some functions outside of get/set such as clear and hazzers are not yet unified.
2. The encoder and decoder still touch the message without going through the common functions. The encoder and decoder require a bit more specialized code to get good performance when reading/writing fields en masse.
PiperOrigin-RevId: 490016095
2 years ago
GenerateExtensionInHeader(ext, layout, output);
output("extern const upb_MiniTableFile $0;\n\n", FileLayoutName(file));
if (file->name() ==
protobuf::FileDescriptorProto::descriptor()->file()->name()) {
// This is gratuitously inefficient with how many times it rebuilds
// MessageLayout objects for the same message. But we only do this for one
// proto (descriptor.proto) so we don't worry about it.
const protobuf::Descriptor* max32_message = nullptr;
const protobuf::Descriptor* max64_message = nullptr;
size_t max32 = 0;
size_t max64 = 0;
for (const auto* message : this_file_messages) {
if (absl::EndsWith(message->name(), "Options")) {
size_t size32 = layout.GetMiniTable32(message)->size;
size_t size64 = layout.GetMiniTable64(message)->size;
if (size32 > max32) {
max32 = size32;
max32_message = message;
if (size64 > max64) {
max64 = size64;
max64_message = message;
output("/* Max size 32 is $0 */\n", max32_message->full_name());
output("/* Max size 64 is $0 */\n", max64_message->full_name());
output("#define _UPB_MAXOPT_SIZE UPB_SIZE($0, $1)\n\n", max32, max64);
"#ifdef __cplusplus\n"
"} /* extern \"C\" */\n"
"#include \"upb/port/undef.inc\"\n"
"#endif /* $0_UPB_H_ */\n",
typedef std::pair<std::string, uint64_t> TableEntry;
uint64_t GetEncodedTag(const protobuf::FieldDescriptor* field) {
protobuf::internal::WireFormatLite::WireType wire_type =
uint32_t unencoded_tag =
protobuf::internal::WireFormatLite::MakeTag(field->number(), wire_type);
uint8_t tag_bytes[10] = {0};
uint64_t encoded_tag = 0;
memcpy(&encoded_tag, tag_bytes, sizeof(encoded_tag));
// TODO: byte-swap for big endian.
return encoded_tag;
int GetTableSlot(const protobuf::FieldDescriptor* field) {
uint64_t tag = GetEncodedTag(field);
if (tag > 0x7fff) {
// Tag must fit within a two-byte varint.
return -1;
return (tag & 0xf8) >> 3;
bool TryFillTableEntry(const FileLayout& layout,
const protobuf::FieldDescriptor* field,
TableEntry& ent) {
const upb_MiniTable* mt = layout.GetMiniTable64(field->containing_type());
const upb_MiniTableField* mt_f =
upb_MiniTable_FindFieldByNumber(mt, field->number());
std::string type = "";
std::string cardinality = "";
switch (mt_f->descriptortype) {
case kUpb_FieldType_Bool:
type = "b1";
case kUpb_FieldType_Enum:
// We don't have the means to test proto2 enum fields for valid values.
return false;
case kUpb_FieldType_Int32:
case kUpb_FieldType_UInt32:
type = "v4";
case kUpb_FieldType_Int64:
case kUpb_FieldType_UInt64:
type = "v8";
case kUpb_FieldType_Fixed32:
case kUpb_FieldType_SFixed32:
case kUpb_FieldType_Float:
type = "f4";
case kUpb_FieldType_Fixed64:
case kUpb_FieldType_SFixed64:
case kUpb_FieldType_Double:
type = "f8";
case kUpb_FieldType_SInt32:
type = "z4";
case kUpb_FieldType_SInt64:
type = "z8";
case kUpb_FieldType_String:
type = "s";
case kUpb_FieldType_Bytes:
type = "b";
case kUpb_FieldType_Message:
type = "m";
return false; // Not supported yet.
switch (upb_FieldMode_Get(mt_f)) {
case kUpb_FieldMode_Map:
return false; // Not supported yet (ever?).
case kUpb_FieldMode_Array:
if (mt_f->mode & kUpb_LabelFlags_IsPacked) {
cardinality = "p";
} else {
cardinality = "r";
case kUpb_FieldMode_Scalar:
if (mt_f->presence < 0) {
cardinality = "o";
} else {
cardinality = "s";
uint64_t expected_tag = GetEncodedTag(field);
// Data is:
// 48 32 16 0
// |--------|--------|--------|--------|--------|--------|--------|--------|
// | offset (16) |case offset (16) |presence| submsg | exp. tag (16) |
// |--------|--------|--------|--------|--------|--------|--------|--------|
// - |presence| is either hasbit index or field number for oneofs.
uint64_t data = static_cast<uint64_t>(mt_f->offset) << 48 | expected_tag;
if (field->is_repeated()) {
// No hasbit/oneof-related fields.
if (field->real_containing_oneof()) {
uint64_t case_offset = ~mt_f->presence;
if (case_offset > 0xffff) return false;
assert(field->number() < 256);
data |= field->number() << 24;
data |= case_offset << 32;
} else {
uint64_t hasbit_index = 63; // No hasbit (set a high, unused bit).
if (mt_f->presence) {
hasbit_index = mt_f->presence;
if (hasbit_index > 31) return false;
data |= hasbit_index << 24;
if (field->cpp_type() == protobuf::FieldDescriptor::CPPTYPE_MESSAGE) {
uint64_t idx = mt_f->submsg_index;
if (idx > 255) return false;
data |= idx << 16;
I think this may have reached the optimization limit.
Benchmark Time CPU Iterations
BM_ArenaOneAlloc 21 ns 21 ns 32994231
BM_ArenaInitialBlockOneAlloc 6 ns 6 ns 116318005
BM_ParseDescriptorNoHeap 3028 ns 3028 ns 231138 2.34354GB/s
BM_ParseDescriptor 3557 ns 3557 ns 196583 1.99498GB/s
BM_ParseDescriptorProto2NoArena 33228 ns 33226 ns 21196 218.688MB/s
BM_ParseDescriptorProto2WithArena 22863 ns 22861 ns 30666 317.831MB/s
BM_SerializeDescriptorProto2 5444 ns 5444 ns 127368 1.30348GB/s
BM_SerializeDescriptor 12509 ns 12508 ns 55816 580.914MB/s
$ perf stat bazel-bin/benchmark --benchmark_filter=BM_ParseDescriptorNoHeap
2020-10-08 14:07:06
Running bazel-bin/benchmark
Run on (72 X 3700 MHz CPU s)
CPU Caches:
L1 Data 32K (x36)
L1 Instruction 32K (x36)
L2 Unified 1024K (x36)
L3 Unified 25344K (x2)
Benchmark Time CPU Iterations
BM_ParseDescriptorNoHeap 3071 ns 3071 ns 227743 2.31094GB/s
Performance counter stats for 'bazel-bin/benchmark --benchmark_filter=BM_ParseDescriptorNoHeap':
1,050.22 msec task-clock # 0.978 CPUs utilized
4 context-switches # 0.004 K/sec
0 cpu-migrations # 0.000 K/sec
179 page-faults # 0.170 K/sec
3,875,796,334 cycles # 3.690 GHz
13,282,835,967 instructions # 3.43 insn per cycle
2,887,725,848 branches # 2749.627 M/sec
8,324,912 branch-misses # 0.29% of all branches
1.073924364 seconds time elapsed
1.042806000 seconds user
0.008021000 seconds sys
23.96% benchmark benchmark [.] upb_prm_1bt_max192b
22.44% benchmark benchmark [.] fastdecode_dispatch
18.96% benchmark benchmark [.] upb_pss_1bt
14.20% benchmark benchmark [.] upb_psv4_1bt
8.33% benchmark benchmark [.] upb_prm_1bt_max64b
6.66% benchmark benchmark [.] upb_prm_1bt_max128b
1.29% benchmark benchmark [.] upb_psm_1bt_max64b
0.77% benchmark benchmark [.] fastdecode_generic
0.55% benchmark [kernel.kallsyms] [k] smp_call_function_single
0.42% benchmark [kernel.kallsyms] [k] _raw_spin_lock_irqsave
0.42% benchmark benchmark [.] upb_psm_1bt_max256b
0.31% benchmark benchmark [.] upb_psb1_1bt
0.21% benchmark benchmark [.] upb_plv4_5bv
0.14% benchmark benchmark [.] upb_psb1_2bt
0.12% benchmark benchmark [.] decode_longvarint64
0.08% benchmark [kernel.kallsyms] [k] vsnprintf
0.07% benchmark [kernel.kallsyms] [k] _raw_spin_lock
0.07% benchmark benchmark [.] _upb_msg_new
0.06% benchmark ld-2.31.so [.] check_match
4 years ago
std::string size_ceil = "max";
size_t size = SIZE_MAX;
if (field->message_type()->file() == field->file()) {
// We can only be guaranteed the size of the sub-message if it is in the
// same file as us. We could relax this to increase the speed of
// cross-file sub-message parsing if we are comfortable requiring that
// users compile all messages at the same time.
const upb_MiniTable* sub_mt =
size = sub_mt->size + 8;
I think this may have reached the optimization limit.
Benchmark Time CPU Iterations
BM_ArenaOneAlloc 21 ns 21 ns 32994231
BM_ArenaInitialBlockOneAlloc 6 ns 6 ns 116318005
BM_ParseDescriptorNoHeap 3028 ns 3028 ns 231138 2.34354GB/s
BM_ParseDescriptor 3557 ns 3557 ns 196583 1.99498GB/s
BM_ParseDescriptorProto2NoArena 33228 ns 33226 ns 21196 218.688MB/s
BM_ParseDescriptorProto2WithArena 22863 ns 22861 ns 30666 317.831MB/s
BM_SerializeDescriptorProto2 5444 ns 5444 ns 127368 1.30348GB/s
BM_SerializeDescriptor 12509 ns 12508 ns 55816 580.914MB/s
$ perf stat bazel-bin/benchmark --benchmark_filter=BM_ParseDescriptorNoHeap
2020-10-08 14:07:06
Running bazel-bin/benchmark
Run on (72 X 3700 MHz CPU s)
CPU Caches:
L1 Data 32K (x36)
L1 Instruction 32K (x36)
L2 Unified 1024K (x36)
L3 Unified 25344K (x2)
Benchmark Time CPU Iterations
BM_ParseDescriptorNoHeap 3071 ns 3071 ns 227743 2.31094GB/s
Performance counter stats for 'bazel-bin/benchmark --benchmark_filter=BM_ParseDescriptorNoHeap':
1,050.22 msec task-clock # 0.978 CPUs utilized
4 context-switches # 0.004 K/sec
0 cpu-migrations # 0.000 K/sec
179 page-faults # 0.170 K/sec
3,875,796,334 cycles # 3.690 GHz
13,282,835,967 instructions # 3.43 insn per cycle
2,887,725,848 branches # 2749.627 M/sec
8,324,912 branch-misses # 0.29% of all branches
1.073924364 seconds time elapsed
1.042806000 seconds user
0.008021000 seconds sys
23.96% benchmark benchmark [.] upb_prm_1bt_max192b
22.44% benchmark benchmark [.] fastdecode_dispatch
18.96% benchmark benchmark [.] upb_pss_1bt
14.20% benchmark benchmark [.] upb_psv4_1bt
8.33% benchmark benchmark [.] upb_prm_1bt_max64b
6.66% benchmark benchmark [.] upb_prm_1bt_max128b
1.29% benchmark benchmark [.] upb_psm_1bt_max64b
0.77% benchmark benchmark [.] fastdecode_generic
0.55% benchmark [kernel.kallsyms] [k] smp_call_function_single
0.42% benchmark [kernel.kallsyms] [k] _raw_spin_lock_irqsave
0.42% benchmark benchmark [.] upb_psm_1bt_max256b
0.31% benchmark benchmark [.] upb_psb1_1bt
0.21% benchmark benchmark [.] upb_plv4_5bv
0.14% benchmark benchmark [.] upb_psb1_2bt
0.12% benchmark benchmark [.] decode_longvarint64
0.08% benchmark [kernel.kallsyms] [k] vsnprintf
0.07% benchmark [kernel.kallsyms] [k] _raw_spin_lock
0.07% benchmark benchmark [.] _upb_msg_new
0.06% benchmark ld-2.31.so [.] check_match
4 years ago
std::vector<size_t> breaks = {64, 128, 192, 256};
for (auto brk : breaks) {
if (size <= brk) {
size_ceil = std::to_string(brk);
ent.first = absl::Substitute("upb_p$0$1_$2bt_max$3b", cardinality, type,
expected_tag > 0xff ? "2" : "1", size_ceil);
I think this may have reached the optimization limit.
Benchmark Time CPU Iterations
BM_ArenaOneAlloc 21 ns 21 ns 32994231
BM_ArenaInitialBlockOneAlloc 6 ns 6 ns 116318005
BM_ParseDescriptorNoHeap 3028 ns 3028 ns 231138 2.34354GB/s
BM_ParseDescriptor 3557 ns 3557 ns 196583 1.99498GB/s
BM_ParseDescriptorProto2NoArena 33228 ns 33226 ns 21196 218.688MB/s
BM_ParseDescriptorProto2WithArena 22863 ns 22861 ns 30666 317.831MB/s
BM_SerializeDescriptorProto2 5444 ns 5444 ns 127368 1.30348GB/s
BM_SerializeDescriptor 12509 ns 12508 ns 55816 580.914MB/s
$ perf stat bazel-bin/benchmark --benchmark_filter=BM_ParseDescriptorNoHeap
2020-10-08 14:07:06
Running bazel-bin/benchmark
Run on (72 X 3700 MHz CPU s)
CPU Caches:
L1 Data 32K (x36)
L1 Instruction 32K (x36)
L2 Unified 1024K (x36)
L3 Unified 25344K (x2)
Benchmark Time CPU Iterations
BM_ParseDescriptorNoHeap 3071 ns 3071 ns 227743 2.31094GB/s
Performance counter stats for 'bazel-bin/benchmark --benchmark_filter=BM_ParseDescriptorNoHeap':
1,050.22 msec task-clock # 0.978 CPUs utilized
4 context-switches # 0.004 K/sec
0 cpu-migrations # 0.000 K/sec
179 page-faults # 0.170 K/sec
3,875,796,334 cycles # 3.690 GHz
13,282,835,967 instructions # 3.43 insn per cycle
2,887,725,848 branches # 2749.627 M/sec
8,324,912 branch-misses # 0.29% of all branches
1.073924364 seconds time elapsed
1.042806000 seconds user
0.008021000 seconds sys
23.96% benchmark benchmark [.] upb_prm_1bt_max192b
22.44% benchmark benchmark [.] fastdecode_dispatch
18.96% benchmark benchmark [.] upb_pss_1bt
14.20% benchmark benchmark [.] upb_psv4_1bt
8.33% benchmark benchmark [.] upb_prm_1bt_max64b
6.66% benchmark benchmark [.] upb_prm_1bt_max128b
1.29% benchmark benchmark [.] upb_psm_1bt_max64b
0.77% benchmark benchmark [.] fastdecode_generic
0.55% benchmark [kernel.kallsyms] [k] smp_call_function_single
0.42% benchmark [kernel.kallsyms] [k] _raw_spin_lock_irqsave
0.42% benchmark benchmark [.] upb_psm_1bt_max256b
0.31% benchmark benchmark [.] upb_psb1_1bt
0.21% benchmark benchmark [.] upb_plv4_5bv
0.14% benchmark benchmark [.] upb_psb1_2bt
0.12% benchmark benchmark [.] decode_longvarint64
0.08% benchmark [kernel.kallsyms] [k] vsnprintf
0.07% benchmark [kernel.kallsyms] [k] _raw_spin_lock
0.07% benchmark benchmark [.] _upb_msg_new
0.06% benchmark ld-2.31.so [.] check_match
4 years ago
} else {
ent.first = absl::Substitute("upb_p$0$1_$2bt", cardinality, type,
expected_tag > 0xff ? "2" : "1");
I think this may have reached the optimization limit.
Benchmark Time CPU Iterations
BM_ArenaOneAlloc 21 ns 21 ns 32994231
BM_ArenaInitialBlockOneAlloc 6 ns 6 ns 116318005
BM_ParseDescriptorNoHeap 3028 ns 3028 ns 231138 2.34354GB/s
BM_ParseDescriptor 3557 ns 3557 ns 196583 1.99498GB/s
BM_ParseDescriptorProto2NoArena 33228 ns 33226 ns 21196 218.688MB/s
BM_ParseDescriptorProto2WithArena 22863 ns 22861 ns 30666 317.831MB/s
BM_SerializeDescriptorProto2 5444 ns 5444 ns 127368 1.30348GB/s
BM_SerializeDescriptor 12509 ns 12508 ns 55816 580.914MB/s
$ perf stat bazel-bin/benchmark --benchmark_filter=BM_ParseDescriptorNoHeap
2020-10-08 14:07:06
Running bazel-bin/benchmark
Run on (72 X 3700 MHz CPU s)
CPU Caches:
L1 Data 32K (x36)
L1 Instruction 32K (x36)
L2 Unified 1024K (x36)
L3 Unified 25344K (x2)
Benchmark Time CPU Iterations
BM_ParseDescriptorNoHeap 3071 ns 3071 ns 227743 2.31094GB/s
Performance counter stats for 'bazel-bin/benchmark --benchmark_filter=BM_ParseDescriptorNoHeap':
1,050.22 msec task-clock # 0.978 CPUs utilized
4 context-switches # 0.004 K/sec
0 cpu-migrations # 0.000 K/sec
179 page-faults # 0.170 K/sec
3,875,796,334 cycles # 3.690 GHz
13,282,835,967 instructions # 3.43 insn per cycle
2,887,725,848 branches # 2749.627 M/sec
8,324,912 branch-misses # 0.29% of all branches
1.073924364 seconds time elapsed
1.042806000 seconds user
0.008021000 seconds sys
23.96% benchmark benchmark [.] upb_prm_1bt_max192b
22.44% benchmark benchmark [.] fastdecode_dispatch
18.96% benchmark benchmark [.] upb_pss_1bt
14.20% benchmark benchmark [.] upb_psv4_1bt
8.33% benchmark benchmark [.] upb_prm_1bt_max64b
6.66% benchmark benchmark [.] upb_prm_1bt_max128b
1.29% benchmark benchmark [.] upb_psm_1bt_max64b
0.77% benchmark benchmark [.] fastdecode_generic
0.55% benchmark [kernel.kallsyms] [k] smp_call_function_single
0.42% benchmark [kernel.kallsyms] [k] _raw_spin_lock_irqsave
0.42% benchmark benchmark [.] upb_psm_1bt_max256b
0.31% benchmark benchmark [.] upb_psb1_1bt
0.21% benchmark benchmark [.] upb_plv4_5bv
0.14% benchmark benchmark [.] upb_psb1_2bt
0.12% benchmark benchmark [.] decode_longvarint64
0.08% benchmark [kernel.kallsyms] [k] vsnprintf
0.07% benchmark [kernel.kallsyms] [k] _raw_spin_lock
0.07% benchmark benchmark [.] _upb_msg_new
0.06% benchmark ld-2.31.so [.] check_match
4 years ago
ent.second = data;
return true;
std::vector<TableEntry> FastDecodeTable(const protobuf::Descriptor* message,
const FileLayout& layout) {
std::vector<TableEntry> table;
for (const auto field : FieldHotnessOrder(message)) {
TableEntry ent;
int slot = GetTableSlot(field);
// std::cerr << "table slot: " << field->number() << ": " << slot << "\n";
if (slot < 0) {
// Tag can't fit in the table.
if (!TryFillTableEntry(layout, field, ent)) {
// Unsupported field type or offset, hasbit index, etc. doesn't fit.
while ((size_t)slot >= table.size()) {
size_t size = std::max(static_cast<size_t>(1), table.size() * 2);
table.resize(size, TableEntry{"_upb_FastDecoder_DecodeGeneric", 0});
if (table[slot].first != "_upb_FastDecoder_DecodeGeneric") {
// A hotter field already filled this slot.
table[slot] = ent;
return table;
Refactored message accessors to share a common set of functions instead of duplicating logic.
Prior to this CL, there were several different code paths for reading/writing message data. Generated code, MiniTable accessors, and reflection all performed direct manipulation of the bits and bytes in a message, but they all had distinct implementations that did not share much of any code. This divergence meant that they could easily have different behavior, bugs could creep into one but not another, and we would need three different sets of tests to get full test coverage. This also made it very difficult to change the internal representation in any way, since it would require updating many places in the code.
With this CL, the three different APIs for accessing message data now all share a common set of functions. The common functions all take a `upb_MiniTableField` as the canonical description of a field's type and layout. The lowest-level functions are very branchy, as they must test for every possible variation in the field type (field vs oneof, hasbit vs no-hasbit, different field sizes, whether a nonzero default value exists, extension vs. regular field), however these functions are declared inline and designed to be very optimizable when values are known at compile time.
In generated accessors, for example, we can declare constant `upb_MiniTableField` instances so that all values can constant-propagate, and we can get fully specialized code even though we are calling a generic function. On the other hand, when we use the generic functions from reflection, we get runtime branches since values are not known at compile time. But even the function is written to still be as efficient as possible even when used from reflection. For example, we use memcpy() calls with constant length so that the compiler can optimize these into inline loads/stores without having to make an out-of-line call to memcpy().
In this way, this CL should be a benefit to both correctness and performance. It will also make it easier to change the message representation, for example to optimize the encoder by giving hasbits to all fields.
Note that we have not completely consolidated all access in this CL:
1. Some functions outside of get/set such as clear and hazzers are not yet unified.
2. The encoder and decoder still touch the message without going through the common functions. The encoder and decoder require a bit more specialized code to get good performance when reading/writing fields en masse.
PiperOrigin-RevId: 490016095
2 years ago
std::string GetFieldRep(const upb_MiniTableField* field32,
const upb_MiniTableField* field64) {
switch (_upb_MiniTableField_GetRep(field32)) {
case kUpb_FieldRep_1Byte:
return "kUpb_FieldRep_1Byte";
case kUpb_FieldRep_4Byte: {
if (_upb_MiniTableField_GetRep(field64) == kUpb_FieldRep_4Byte) {
return "kUpb_FieldRep_4Byte";
} else {
assert(_upb_MiniTableField_GetRep(field64) == kUpb_FieldRep_8Byte);
return "UPB_SIZE(kUpb_FieldRep_4Byte, kUpb_FieldRep_8Byte)";
case kUpb_FieldRep_StringView:
return "kUpb_FieldRep_StringView";
case kUpb_FieldRep_8Byte:
return "kUpb_FieldRep_8Byte";
std::string GetFieldRep(const FileLayout& layout,
const protobuf::FieldDescriptor* field) {
return GetFieldRep(layout.GetField32(field), layout.GetField64(field));
// Returns the field mode as a string initializer.
// We could just emit this as a number (and we may yet go in that direction) but
// for now emitting symbolic constants gives this better readability and
// debuggability.
Refactored message accessors to share a common set of functions instead of duplicating logic.
Prior to this CL, there were several different code paths for reading/writing message data. Generated code, MiniTable accessors, and reflection all performed direct manipulation of the bits and bytes in a message, but they all had distinct implementations that did not share much of any code. This divergence meant that they could easily have different behavior, bugs could creep into one but not another, and we would need three different sets of tests to get full test coverage. This also made it very difficult to change the internal representation in any way, since it would require updating many places in the code.
With this CL, the three different APIs for accessing message data now all share a common set of functions. The common functions all take a `upb_MiniTableField` as the canonical description of a field's type and layout. The lowest-level functions are very branchy, as they must test for every possible variation in the field type (field vs oneof, hasbit vs no-hasbit, different field sizes, whether a nonzero default value exists, extension vs. regular field), however these functions are declared inline and designed to be very optimizable when values are known at compile time.
In generated accessors, for example, we can declare constant `upb_MiniTableField` instances so that all values can constant-propagate, and we can get fully specialized code even though we are calling a generic function. On the other hand, when we use the generic functions from reflection, we get runtime branches since values are not known at compile time. But even the function is written to still be as efficient as possible even when used from reflection. For example, we use memcpy() calls with constant length so that the compiler can optimize these into inline loads/stores without having to make an out-of-line call to memcpy().
In this way, this CL should be a benefit to both correctness and performance. It will also make it easier to change the message representation, for example to optimize the encoder by giving hasbits to all fields.
Note that we have not completely consolidated all access in this CL:
1. Some functions outside of get/set such as clear and hazzers are not yet unified.
2. The encoder and decoder still touch the message without going through the common functions. The encoder and decoder require a bit more specialized code to get good performance when reading/writing fields en masse.
PiperOrigin-RevId: 490016095
2 years ago
std::string GetModeInit(const upb_MiniTableField* field32,
const upb_MiniTableField* field64) {
std::string ret;
Refactored message accessors to share a common set of functions instead of duplicating logic.
Prior to this CL, there were several different code paths for reading/writing message data. Generated code, MiniTable accessors, and reflection all performed direct manipulation of the bits and bytes in a message, but they all had distinct implementations that did not share much of any code. This divergence meant that they could easily have different behavior, bugs could creep into one but not another, and we would need three different sets of tests to get full test coverage. This also made it very difficult to change the internal representation in any way, since it would require updating many places in the code.
With this CL, the three different APIs for accessing message data now all share a common set of functions. The common functions all take a `upb_MiniTableField` as the canonical description of a field's type and layout. The lowest-level functions are very branchy, as they must test for every possible variation in the field type (field vs oneof, hasbit vs no-hasbit, different field sizes, whether a nonzero default value exists, extension vs. regular field), however these functions are declared inline and designed to be very optimizable when values are known at compile time.
In generated accessors, for example, we can declare constant `upb_MiniTableField` instances so that all values can constant-propagate, and we can get fully specialized code even though we are calling a generic function. On the other hand, when we use the generic functions from reflection, we get runtime branches since values are not known at compile time. But even the function is written to still be as efficient as possible even when used from reflection. For example, we use memcpy() calls with constant length so that the compiler can optimize these into inline loads/stores without having to make an out-of-line call to memcpy().
In this way, this CL should be a benefit to both correctness and performance. It will also make it easier to change the message representation, for example to optimize the encoder by giving hasbits to all fields.
Note that we have not completely consolidated all access in this CL:
1. Some functions outside of get/set such as clear and hazzers are not yet unified.
2. The encoder and decoder still touch the message without going through the common functions. The encoder and decoder require a bit more specialized code to get good performance when reading/writing fields en masse.
PiperOrigin-RevId: 490016095
2 years ago
uint8_t mode32 = field32->mode;
switch (mode32 & kUpb_FieldMode_Mask) {
case kUpb_FieldMode_Map:
ret = "kUpb_FieldMode_Map";
case kUpb_FieldMode_Array:
ret = "kUpb_FieldMode_Array";
case kUpb_FieldMode_Scalar:
ret = "kUpb_FieldMode_Scalar";
if (mode32 & kUpb_LabelFlags_IsPacked) {
absl::StrAppend(&ret, " | kUpb_LabelFlags_IsPacked");
if (mode32 & kUpb_LabelFlags_IsExtension) {
absl::StrAppend(&ret, " | kUpb_LabelFlags_IsExtension");
Added function for getting the type of a MiniTable field
Prior to this CL, users were relying on `field->descriptortype` to get the field type. This almost works, as `field->descriptortype` is almost, but not quite, the field type of the field. In two special cases we deviate from the true field type, for ease of parsing and serialization:
- For open enums, we use `kUpb_FieldType_Int32` instead of `kUpb_FieldType_Enum`, because from the perspective of the wire format, an open enum field is equivalent to int32.
- For proto2 strings, we use `kUpb_FieldType_Bytes` instead of `kUpb_FieldType_String`, because proto2 strings do not perform UTF-8 validation, which makes them equivalent to bytes.
In this CL we add a public API function:
// Returns the true field type for this field.
upb_FieldType upb_MiniTableField_Type(const upb_MiniTable_Field* f);
This will provide the actual field type for this field.
Note that this CL changes the MiniDescriptor format. Previously MiniDescriptors did not contain enough information to distinguish between Enum/Int32. To remedy this we added a new encoded field type, `kUpb_EncodedType_ClosedEnum`.
PiperOrigin-RevId: 479387672
2 years ago
if (mode32 & kUpb_LabelFlags_IsAlternate) {
absl::StrAppend(&ret, " | kUpb_LabelFlags_IsAlternate");
Refactored message accessors to share a common set of functions instead of duplicating logic.
Prior to this CL, there were several different code paths for reading/writing message data. Generated code, MiniTable accessors, and reflection all performed direct manipulation of the bits and bytes in a message, but they all had distinct implementations that did not share much of any code. This divergence meant that they could easily have different behavior, bugs could creep into one but not another, and we would need three different sets of tests to get full test coverage. This also made it very difficult to change the internal representation in any way, since it would require updating many places in the code.
With this CL, the three different APIs for accessing message data now all share a common set of functions. The common functions all take a `upb_MiniTableField` as the canonical description of a field's type and layout. The lowest-level functions are very branchy, as they must test for every possible variation in the field type (field vs oneof, hasbit vs no-hasbit, different field sizes, whether a nonzero default value exists, extension vs. regular field), however these functions are declared inline and designed to be very optimizable when values are known at compile time.
In generated accessors, for example, we can declare constant `upb_MiniTableField` instances so that all values can constant-propagate, and we can get fully specialized code even though we are calling a generic function. On the other hand, when we use the generic functions from reflection, we get runtime branches since values are not known at compile time. But even the function is written to still be as efficient as possible even when used from reflection. For example, we use memcpy() calls with constant length so that the compiler can optimize these into inline loads/stores without having to make an out-of-line call to memcpy().
In this way, this CL should be a benefit to both correctness and performance. It will also make it easier to change the message representation, for example to optimize the encoder by giving hasbits to all fields.
Note that we have not completely consolidated all access in this CL:
1. Some functions outside of get/set such as clear and hazzers are not yet unified.
2. The encoder and decoder still touch the message without going through the common functions. The encoder and decoder require a bit more specialized code to get good performance when reading/writing fields en masse.
PiperOrigin-RevId: 490016095
2 years ago
absl::StrAppend(&ret, " | (", GetFieldRep(field32, field64),
" << kUpb_FieldRep_Shift)");
return ret;
Refactored message accessors to share a common set of functions instead of duplicating logic.
Prior to this CL, there were several different code paths for reading/writing message data. Generated code, MiniTable accessors, and reflection all performed direct manipulation of the bits and bytes in a message, but they all had distinct implementations that did not share much of any code. This divergence meant that they could easily have different behavior, bugs could creep into one but not another, and we would need three different sets of tests to get full test coverage. This also made it very difficult to change the internal representation in any way, since it would require updating many places in the code.
With this CL, the three different APIs for accessing message data now all share a common set of functions. The common functions all take a `upb_MiniTableField` as the canonical description of a field's type and layout. The lowest-level functions are very branchy, as they must test for every possible variation in the field type (field vs oneof, hasbit vs no-hasbit, different field sizes, whether a nonzero default value exists, extension vs. regular field), however these functions are declared inline and designed to be very optimizable when values are known at compile time.
In generated accessors, for example, we can declare constant `upb_MiniTableField` instances so that all values can constant-propagate, and we can get fully specialized code even though we are calling a generic function. On the other hand, when we use the generic functions from reflection, we get runtime branches since values are not known at compile time. But even the function is written to still be as efficient as possible even when used from reflection. For example, we use memcpy() calls with constant length so that the compiler can optimize these into inline loads/stores without having to make an out-of-line call to memcpy().
In this way, this CL should be a benefit to both correctness and performance. It will also make it easier to change the message representation, for example to optimize the encoder by giving hasbits to all fields.
Note that we have not completely consolidated all access in this CL:
1. Some functions outside of get/set such as clear and hazzers are not yet unified.
2. The encoder and decoder still touch the message without going through the common functions. The encoder and decoder require a bit more specialized code to get good performance when reading/writing fields en masse.
PiperOrigin-RevId: 490016095
2 years ago
std::string FieldInitializer(const upb_MiniTableField* field64,
const upb_MiniTableField* field32) {
return absl::Substitute(
"{$0, $1, $2, $3, $4, $5}", field64->number,
FileLayout::UpbSize(field32->offset, field64->offset),
FileLayout::UpbSize(field32->presence, field64->presence),
field64->submsg_index == kUpb_NoSub
? "kUpb_NoSub"
: absl::StrCat(field64->submsg_index).c_str(),
field64->descriptortype, GetModeInit(field32, field64));
std::string FieldInitializer(const FileLayout& layout,
const protobuf::FieldDescriptor* field) {
return FieldInitializer(layout.GetField64(field), layout.GetField32(field));
// Writes a single field into a .upb.c source file.
void WriteMessageField(const upb_MiniTableField* field64,
const upb_MiniTableField* field32, Output& output) {
Refactored message accessors to share a common set of functions instead of duplicating logic.
Prior to this CL, there were several different code paths for reading/writing message data. Generated code, MiniTable accessors, and reflection all performed direct manipulation of the bits and bytes in a message, but they all had distinct implementations that did not share much of any code. This divergence meant that they could easily have different behavior, bugs could creep into one but not another, and we would need three different sets of tests to get full test coverage. This also made it very difficult to change the internal representation in any way, since it would require updating many places in the code.
With this CL, the three different APIs for accessing message data now all share a common set of functions. The common functions all take a `upb_MiniTableField` as the canonical description of a field's type and layout. The lowest-level functions are very branchy, as they must test for every possible variation in the field type (field vs oneof, hasbit vs no-hasbit, different field sizes, whether a nonzero default value exists, extension vs. regular field), however these functions are declared inline and designed to be very optimizable when values are known at compile time.
In generated accessors, for example, we can declare constant `upb_MiniTableField` instances so that all values can constant-propagate, and we can get fully specialized code even though we are calling a generic function. On the other hand, when we use the generic functions from reflection, we get runtime branches since values are not known at compile time. But even the function is written to still be as efficient as possible even when used from reflection. For example, we use memcpy() calls with constant length so that the compiler can optimize these into inline loads/stores without having to make an out-of-line call to memcpy().
In this way, this CL should be a benefit to both correctness and performance. It will also make it easier to change the message representation, for example to optimize the encoder by giving hasbits to all fields.
Note that we have not completely consolidated all access in this CL:
1. Some functions outside of get/set such as clear and hazzers are not yet unified.
2. The encoder and decoder still touch the message without going through the common functions. The encoder and decoder require a bit more specialized code to get good performance when reading/writing fields en masse.
PiperOrigin-RevId: 490016095
2 years ago
output(" $0,\n", FieldInitializer(field64, field32));
// Writes a single message into a .upb.c source file.
void WriteMessage(const protobuf::Descriptor* message, const FileLayout& layout,
Output& output, bool fasttable_enabled) {
std::string msg_name = ToCIdent(message->full_name());
std::string fields_array_ref = "NULL";
std::string submsgs_array_ref = "NULL";
std::string subenums_array_ref = "NULL";
const upb_MiniTable* mt_32 = layout.GetMiniTable32(message);
const upb_MiniTable* mt_64 = layout.GetMiniTable64(message);
std::vector<std::string> subs;
for (int i = 0; i < mt_64->field_count; i++) {
const upb_MiniTableField* f = &mt_64->fields[i];
if (f->submsg_index != kUpb_NoSub) {
if (!subs.empty()) {
std::string submsgs_array_name = msg_name + "_submsgs";
submsgs_array_ref = "&" + submsgs_array_name + "[0]";
output("static const upb_MiniTableSub $0[$1] = {\n", submsgs_array_name,
for (const auto& sub : subs) {
output(" $0,\n", sub);
Optimized decoder and paved the way for parsing extensions.
The primary motivation for this change is to avoid referring to the
`upb_msglayout` object when we are trying to fetch the `upb_msglayout`
object for a sub-message. This will help pave the way for parsing
extensions. We also implement several optimizations so that we can
make this change without regressing performance.
Normally we compute the layout for a sub-message field like so:
const upb_msglayout *get_submsg_layout(
const upb_msglayout *layout,
const upb_msglayout_field *field) {
return layout->submsgs[field->submsg_index]
The reason for this indirection is to avoid storing a pointer directly
in `upb_msglayout_field`, as this would double its size (from 12 to 24
bytes on 64-bit architectures) which is wasteful as this pointer is
only needed for message typed fields.
However `get_submsg_layout` as written above does not work for
extensions, as they will not have entries in the message's
`layout->submsgs` array by nature, and we want to avoid creating
an entire fake `upb_msglayout` for each such extension since that
would also be wasteful.
This change removes the dependency on `upb_msglayout` by passing down
the `submsgs` array instead:
const upb_msglayout *get_submsg_layout(
const upb_msglayout *const *submsgs,
const upb_msglayout_field *field) {
return submsgs[field->submsg_index]
This will pave the way for parsing extensions, as we can more easily
create an alternative `submsgs` array for extension fields without
extra overhead or waste.
Along the way several optimizations presented themselves that allow
a nice increase in performance:
1. Passing the parsed `wireval` by address instead of by value ended
up avoiding an expensive and useless stack copy (this is on Clang,
which was used for all measurements).
2. When field numbers are densely packed, we can find a field by number
with a single indexed lookup instead of linear search. At codegen
time we can compute the maximum field number that will allow such
an indexed lookup.
3. For fields that do require linear search, we can start the linear
search at the location where we found the previous field, taking
advantage of the fact that field numbers are generally increasing.
4. When the hasbit index is less than 32 (the common case) we can use
a less expensive code sequence to set it.
5. We check for the hasbit case before the oneof case, as optional
fields are more common than oneof fields.
Benchmark results indicate a 20% improvement in parse speed with a
small code size increase:
name old time/op new time/op delta
ArenaOneAlloc 21.3ns ± 0% 21.5ns ± 0% +0.96% (p=0.000 n=12+12)
ArenaInitialBlockOneAlloc 6.32ns ± 0% 6.32ns ± 0% +0.03% (p=0.000 n=12+10)
LoadDescriptor_Upb 53.5µs ± 1% 51.5µs ± 2% -3.70% (p=0.000 n=12+12)
LoadAdsDescriptor_Upb 2.78ms ± 2% 2.68ms ± 0% -3.57% (p=0.000 n=12+12)
LoadDescriptor_Proto2 240µs ± 0% 240µs ± 0% +0.12% (p=0.001 n=12+12)
LoadAdsDescriptor_Proto2 12.8ms ± 0% 12.7ms ± 0% -1.15% (p=0.000 n=12+10)
Parse_Upb_FileDesc<UseArena,Copy> 13.2µs ± 2% 10.7µs ± 0% -18.49% (p=0.000 n=10+12)
Parse_Upb_FileDesc<UseArena,Alias> 11.3µs ± 0% 9.6µs ± 0% -15.11% (p=0.000 n=12+11)
Parse_Upb_FileDesc<InitBlock,Copy> 12.7µs ± 0% 10.3µs ± 0% -19.00% (p=0.000 n=10+12)
Parse_Upb_FileDesc<InitBlock,Alias> 10.9µs ± 0% 9.2µs ± 0% -15.82% (p=0.000 n=12+12)
Parse_Proto2<FileDesc,NoArena,Copy> 29.4µs ± 0% 29.5µs ± 0% +0.61% (p=0.000 n=12+12)
Parse_Proto2<FileDesc,UseArena,Copy> 20.7µs ± 2% 20.6µs ± 2% ~ (p=0.260 n=12+11)
Parse_Proto2<FileDesc,InitBlock,Copy> 16.7µs ± 1% 16.7µs ± 0% -0.25% (p=0.036 n=12+10)
Parse_Proto2<FileDescSV,InitBlock,Alias> 16.5µs ± 0% 16.5µs ± 0% +0.20% (p=0.016 n=12+11)
SerializeDescriptor_Proto2 5.30µs ± 1% 5.36µs ± 1% +1.09% (p=0.000 n=12+11)
SerializeDescriptor_Upb 12.9µs ± 0% 13.0µs ± 0% +0.90% (p=0.000 n=12+11)
-------------- --------------
+1.5% +176 +1.6% +176 upb/decode.c
+1.8% +176 +1.9% +176 decode_msg
+0.4% +64 +0.4% +64 upb/def.c
+1.4% +64 +1.4% +64 _upb_symtab_addfile
+1.2% +48 +1.4% +48 upb/reflection.c
+15% +32 +18% +32 upb_msg_set
+2.9% +16 +3.1% +16 upb_msg_mutable
-9.3% -288 [ = ] 0 [Unmapped]
[ = ] 0 +0.2% +288 TOTAL
4 years ago
if (mt_64->field_count > 0) {
std::string fields_array_name = msg_name + "__fields";
fields_array_ref = "&" + fields_array_name + "[0]";
output("static const upb_MiniTableField $0[$1] = {\n", fields_array_name,
for (int i = 0; i < mt_64->field_count; i++) {
WriteMessageField(&mt_64->fields[i], &mt_32->fields[i], output);
std::vector<TableEntry> table;
uint8_t table_mask = -1;
if (fasttable_enabled) {
table = FastDecodeTable(message, layout);
if (table.size() > 1) {
assert((table.size() & (table.size() - 1)) == 0);
table_mask = (table.size() - 1) << 3;
std::string msgext = "kUpb_ExtMode_NonExtendable";
if (message->extension_range_count()) {
if (message->options().message_set_wire_format()) {
msgext = "kUpb_ExtMode_IsMessageSet";
} else {
msgext = "kUpb_ExtMode_Extendable";
output("const upb_MiniTable $0 = {\n", MessageInit(message));
output(" $0,\n", submsgs_array_ref);
output(" $0,\n", fields_array_ref);
output(" $0, $1, $2, $3, $4, $5,\n", layout.GetMessageSize(message),
mt_64->field_count, msgext, mt_64->dense_below, table_mask,
if (!table.empty()) {
output(" UPB_FASTTABLE_INIT({\n");
for (const auto& ent : table) {
output(" {0x$1, &$0},\n", ent.first,
absl::StrCat(absl::Hex(ent.second, absl::kZeroPad16)));
output(" }),\n");
void WriteEnum(const upb_MiniTableEnum* mt, const protobuf::EnumDescriptor* e,
Output& output) {
std::string values_init = "{\n";
uint32_t value_count = (mt->mask_limit / 32) + mt->value_count;
for (uint32_t i = 0; i < value_count; i++) {
absl::StrAppend(&values_init, " 0x", absl::Hex(mt->data[i]),
values_init += " }";
const upb_MiniTableEnum $0 = {
EnumInit(e), mt->mask_limit, mt->value_count, values_init);
int WriteEnums(const FileLayout& layout, Output& output) {
const protobuf::FileDescriptor* file = layout.descriptor();
if (file->syntax() != protobuf::FileDescriptor::SYNTAX_PROTO2) {
return 0;
std::vector<const protobuf::EnumDescriptor*> this_file_enums =
for (const auto* e : this_file_enums) {
WriteEnum(layout.GetEnumTable(e), e, output);
if (!this_file_enums.empty()) {
output("static const upb_MiniTableEnum *$0[$1] = {\n", kEnumsInit,
for (const auto* e : this_file_enums) {
output(" &$0,\n", EnumInit(e));
return this_file_enums.size();
int WriteMessages(const FileLayout& layout, Output& output,
bool fasttable_enabled) {
const protobuf::FileDescriptor* file = layout.descriptor();
std::vector<const protobuf::Descriptor*> file_messages = SortedMessages(file);
if (file_messages.empty()) return 0;
for (auto message : file_messages) {
WriteMessage(message, layout, output, fasttable_enabled);
output("static const upb_MiniTable *$0[$1] = {\n", kMessagesInit,
for (auto message : file_messages) {
output(" &$0,\n", MessageInit(message));
return file_messages.size();
Refactored message accessors to share a common set of functions instead of duplicating logic.
Prior to this CL, there were several different code paths for reading/writing message data. Generated code, MiniTable accessors, and reflection all performed direct manipulation of the bits and bytes in a message, but they all had distinct implementations that did not share much of any code. This divergence meant that they could easily have different behavior, bugs could creep into one but not another, and we would need three different sets of tests to get full test coverage. This also made it very difficult to change the internal representation in any way, since it would require updating many places in the code.
With this CL, the three different APIs for accessing message data now all share a common set of functions. The common functions all take a `upb_MiniTableField` as the canonical description of a field's type and layout. The lowest-level functions are very branchy, as they must test for every possible variation in the field type (field vs oneof, hasbit vs no-hasbit, different field sizes, whether a nonzero default value exists, extension vs. regular field), however these functions are declared inline and designed to be very optimizable when values are known at compile time.
In generated accessors, for example, we can declare constant `upb_MiniTableField` instances so that all values can constant-propagate, and we can get fully specialized code even though we are calling a generic function. On the other hand, when we use the generic functions from reflection, we get runtime branches since values are not known at compile time. But even the function is written to still be as efficient as possible even when used from reflection. For example, we use memcpy() calls with constant length so that the compiler can optimize these into inline loads/stores without having to make an out-of-line call to memcpy().
In this way, this CL should be a benefit to both correctness and performance. It will also make it easier to change the message representation, for example to optimize the encoder by giving hasbits to all fields.
Note that we have not completely consolidated all access in this CL:
1. Some functions outside of get/set such as clear and hazzers are not yet unified.
2. The encoder and decoder still touch the message without going through the common functions. The encoder and decoder require a bit more specialized code to get good performance when reading/writing fields en masse.
PiperOrigin-RevId: 490016095
2 years ago
void WriteExtension(const protobuf::FieldDescriptor* ext,
const FileLayout& layout, Output& output) {
output("$0,\n", FieldInitializer(layout, ext));
const upb_MiniTableExtension* mt_ext =
reinterpret_cast<const upb_MiniTableExtension*>(layout.GetField32(ext));
output(" &$0,\n", reinterpret_cast<const char*>(mt_ext->extendee));
output(" $0,\n", FilePlatformLayout::GetSub(mt_ext->sub));
int WriteExtensions(const FileLayout& layout, Output& output) {
auto exts = SortedExtensions(layout.descriptor());
absl::flat_hash_set<const protobuf::Descriptor*> forward_decls;
if (exts.empty()) return 0;
// Order by full name for consistent ordering.
std::map<std::string, const protobuf::Descriptor*> forward_messages;
for (auto ext : exts) {
forward_messages[ext->containing_type()->full_name()] =
if (ext->message_type()) {
forward_messages[ext->message_type()->full_name()] = ext->message_type();
for (const auto& decl : forward_messages) {
output("extern const upb_MiniTable $0;\n", MessageInit(decl.second));
for (auto ext : exts) {
output("const upb_MiniTableExtension $0 = {\n ", ExtensionLayout(ext));
Refactored message accessors to share a common set of functions instead of duplicating logic.
Prior to this CL, there were several different code paths for reading/writing message data. Generated code, MiniTable accessors, and reflection all performed direct manipulation of the bits and bytes in a message, but they all had distinct implementations that did not share much of any code. This divergence meant that they could easily have different behavior, bugs could creep into one but not another, and we would need three different sets of tests to get full test coverage. This also made it very difficult to change the internal representation in any way, since it would require updating many places in the code.
With this CL, the three different APIs for accessing message data now all share a common set of functions. The common functions all take a `upb_MiniTableField` as the canonical description of a field's type and layout. The lowest-level functions are very branchy, as they must test for every possible variation in the field type (field vs oneof, hasbit vs no-hasbit, different field sizes, whether a nonzero default value exists, extension vs. regular field), however these functions are declared inline and designed to be very optimizable when values are known at compile time.
In generated accessors, for example, we can declare constant `upb_MiniTableField` instances so that all values can constant-propagate, and we can get fully specialized code even though we are calling a generic function. On the other hand, when we use the generic functions from reflection, we get runtime branches since values are not known at compile time. But even the function is written to still be as efficient as possible even when used from reflection. For example, we use memcpy() calls with constant length so that the compiler can optimize these into inline loads/stores without having to make an out-of-line call to memcpy().
In this way, this CL should be a benefit to both correctness and performance. It will also make it easier to change the message representation, for example to optimize the encoder by giving hasbits to all fields.
Note that we have not completely consolidated all access in this CL:
1. Some functions outside of get/set such as clear and hazzers are not yet unified.
2. The encoder and decoder still touch the message without going through the common functions. The encoder and decoder require a bit more specialized code to get good performance when reading/writing fields en masse.
PiperOrigin-RevId: 490016095
2 years ago
WriteExtension(ext, layout, output);
"static const upb_MiniTableExtension *$0[$1] = {\n",
kExtensionsInit, exts.size());
for (auto ext : exts) {
output(" &$0,\n", ExtensionLayout(ext));
return exts.size();
Added a codegen parameter for whether fasttables are generated or not.
$ CC=clang bazel build -c opt --copt=-g benchmarks:benchmark --//:fasttable_enabled=false
INFO: Build option --//:fasttable_enabled has changed, discarding analysis cache.
INFO: Analyzed target //benchmarks:benchmark (0 packages loaded, 913 targets configured).
INFO: Found 1 target...
Target //benchmarks:benchmark up-to-date:
INFO: Elapsed time: 0.760s, Critical Path: 0.58s
INFO: 7 processes: 1 internal, 6 linux-sandbox.
INFO: Build completed successfully, 7 total actions
$ bazel-bin/benchmarks/benchmark --benchmark_filter=BM_Parse_Upb
Benchmark Time CPU Iterations
BM_Parse_Upb_FileDesc_WithArena 10985 ns 10984 ns 63567 651.857MB/s
BM_Parse_Upb_FileDesc_WithInitialBlock 10556 ns 10554 ns 66138 678.458MB/s
$ CC=clang bazel build -c opt --copt=-g benchmarks:benchmark --//:fasttable_enabled=true
INFO: Build option --//:fasttable_enabled has changed, discarding analysis cache.
INFO: Analyzed target //benchmarks:benchmark (0 packages loaded, 913 targets configured).
INFO: Found 1 target...
Target //benchmarks:benchmark up-to-date:
INFO: Elapsed time: 0.744s, Critical Path: 0.58s
INFO: 7 processes: 1 internal, 6 linux-sandbox.
INFO: Build completed successfully, 7 total actions
$ bazel-bin/benchmarks/benchmark --benchmark_filter=BM_Parse_Upb
Benchmark Time CPU Iterations
BM_Parse_Upb_FileDesc_WithArena 3284 ns 3284 ns 213495 2.1293GB/s
BM_Parse_Upb_FileDesc_WithInitialBlock 2882 ns 2882 ns 243069 2.4262GB/s
Biggest unknown is whether this parameter should default to true or false.
4 years ago
// Writes a .upb.cc source file.
void WriteSource(const FileLayout& layout, Output& output,
bool fasttable_enabled) {
const protobuf::FileDescriptor* file = layout.descriptor();
EmitFileWarning(file, output);
"#include <stddef.h>\n"
"#include \"upb/collections/array_internal.h\"\n"
"#include \"upb/message/internal.h\"\n"
"#include \"upb/mini_table/enum_internal.h\"\n"
"#include \"$0\"\n",
for (int i = 0; i < file->dependency_count(); i++) {
output("#include \"$0\"\n", HeaderFilename(file->dependency(i)));
"#include \"upb/port/def.inc\"\n"
int msg_count = WriteMessages(layout, output, fasttable_enabled);
int ext_count = WriteExtensions(layout, output);
int enum_count = WriteEnums(layout, output);
output("const upb_MiniTableFile $0 = {\n", FileLayoutName(file));
output(" $0,\n", msg_count ? kMessagesInit : "NULL");
output(" $0,\n", enum_count ? kEnumsInit : "NULL");
output(" $0,\n", ext_count ? kExtensionsInit : "NULL");
output(" $0,\n", msg_count);
output(" $0,\n", enum_count);
output(" $0,\n", ext_count);
output("#include \"upb/port/undef.inc\"\n");
class Generator : public protoc::CodeGenerator {
~Generator() override {}
bool Generate(const protobuf::FileDescriptor* file,
const std::string& parameter, protoc::GeneratorContext* context,
std::string* error) const override;
uint64_t GetSupportedFeatures() const override {
bool Generator::Generate(const protobuf::FileDescriptor* file,
Added a codegen parameter for whether fasttables are generated or not.
$ CC=clang bazel build -c opt --copt=-g benchmarks:benchmark --//:fasttable_enabled=false
INFO: Build option --//:fasttable_enabled has changed, discarding analysis cache.
INFO: Analyzed target //benchmarks:benchmark (0 packages loaded, 913 targets configured).
INFO: Found 1 target...
Target //benchmarks:benchmark up-to-date:
INFO: Elapsed time: 0.760s, Critical Path: 0.58s
INFO: 7 processes: 1 internal, 6 linux-sandbox.
INFO: Build completed successfully, 7 total actions
$ bazel-bin/benchmarks/benchmark --benchmark_filter=BM_Parse_Upb
Benchmark Time CPU Iterations
BM_Parse_Upb_FileDesc_WithArena 10985 ns 10984 ns 63567 651.857MB/s
BM_Parse_Upb_FileDesc_WithInitialBlock 10556 ns 10554 ns 66138 678.458MB/s
$ CC=clang bazel build -c opt --copt=-g benchmarks:benchmark --//:fasttable_enabled=true
INFO: Build option --//:fasttable_enabled has changed, discarding analysis cache.
INFO: Analyzed target //benchmarks:benchmark (0 packages loaded, 913 targets configured).
INFO: Found 1 target...
Target //benchmarks:benchmark up-to-date:
INFO: Elapsed time: 0.744s, Critical Path: 0.58s
INFO: 7 processes: 1 internal, 6 linux-sandbox.
INFO: Build completed successfully, 7 total actions
$ bazel-bin/benchmarks/benchmark --benchmark_filter=BM_Parse_Upb
Benchmark Time CPU Iterations
BM_Parse_Upb_FileDesc_WithArena 3284 ns 3284 ns 213495 2.1293GB/s
BM_Parse_Upb_FileDesc_WithInitialBlock 2882 ns 2882 ns 243069 2.4262GB/s
Biggest unknown is whether this parameter should default to true or false.
4 years ago
const std::string& parameter,
protoc::GeneratorContext* context,
std::string* error) const {
bool fasttable_enabled = false;
std::vector<std::pair<std::string, std::string>> params;
google::protobuf::compiler::ParseGeneratorParameter(parameter, ¶ms);
for (const auto& pair : params) {
if (pair.first == "fasttable") {
fasttable_enabled = true;
} else {
*error = "Unknown parameter: " + pair.first;
return false;
FileLayout layout(file);
std::unique_ptr<protobuf::io::ZeroCopyOutputStream> h_output_stream(
Output h_output(h_output_stream.get());
WriteHeader(layout, h_output);
std::unique_ptr<protobuf::io::ZeroCopyOutputStream> c_output_stream(
Output c_output(c_output_stream.get());
WriteSource(layout, c_output, fasttable_enabled);
return true;
} // namespace
} // namespace upbc
int main(int argc, char** argv) {
std::unique_ptr<google::protobuf::compiler::CodeGenerator> generator(
new upbc::Generator());
return google::protobuf::compiler::PluginMain(argc, argv, generator.get());