Optimizes `upb_MiniTable_Enum` for large but dense enums.

Optimizes `upb_MiniTable_Enum` for enums with many values (>64) but with relatively dense packing in numeric space.

This CL optimizes both the size and speed of such enums:

- size: 30x code size reduction
- speed: moved from linear search to a constant-time bit test

Negative enum values are still expensive, as they are never put into the bitfield.

PiperOrigin-RevId: 473259819
pull/13171/head
Joshua Haberman 2 years ago committed by Copybara-Service
parent 5ae398f202
commit 896e74c141
  1. 2
      bazel/build_defs.bzl
  2. 11
      upb/decode.c
  3. 2
      upb/def.c
  4. 78
      upb/mini_table.c
  5. 13
      upb/mini_table.h
  6. 42
      upb/msg_internal.h
  7. 18
      upbc/protoc-gen-upb.cc

@ -41,8 +41,6 @@ _DEFAULT_CPPOPTS.extend([
])
_DEFAULT_COPTS.extend([
"-std=c99",
"-pedantic",
"-Werror=pedantic",
"-Wall",
"-Wstrict-prototypes",
# GCC (at least) emits spurious warnings for this that cannot be fixed

@ -31,6 +31,7 @@
#include "upb/internal/array.h"
#include "upb/internal/decode.h"
#include "upb/msg_internal.h"
#include "upb/upb.h"
// Must be last.
@ -421,11 +422,7 @@ static bool _upb_Decoder_CheckEnumSlow(upb_Decoder* d, const char* ptr,
const upb_MiniTable_Enum* e,
const upb_MiniTable_Field* field,
uint32_t v) {
// OPT: binary search long lists?
int n = e->value_count;
for (int i = 0; i < n; i++) {
if ((uint32_t)e->values[i] == v) return true;
}
if (_upb_MiniTable_CheckEnumValueSlow(e, v)) return true;
// Unrecognized enum goes into unknown fields.
// For packed fields the tag could be arbitrarily far in the past, so we
@ -445,8 +442,8 @@ static bool _upb_Decoder_CheckEnum(upb_Decoder* d, const char* ptr,
wireval* val) {
uint32_t v = val->uint32_val;
if (UPB_LIKELY(v < 64) && UPB_LIKELY(((1ULL << v) & e->mask))) return true;
_kUpb_FastEnumCheck_Status status = _upb_MiniTable_CheckEnumValueFast(e, v);
if (UPB_LIKELY(status == _kUpb_FastEnumCheck_ValueIsInEnum)) return true;
return _upb_Decoder_CheckEnumSlow(d, ptr, msg, e, field, v);
}

@ -2644,8 +2644,6 @@ static void create_enumdef(upb_DefBuilder* ctx, const char* prefix,
if (ctx->layout) {
UPB_ASSERT(ctx->enum_count < ctx->layout->enum_count);
e->layout = ctx->layout->enums[ctx->enum_count++];
UPB_ASSERT(upb_inttable_count(&e->iton) ==
e->layout->value_count + count_bits_debug(e->layout->mask));
} else {
e->layout = create_enumlayout(ctx, e);
}

@ -30,6 +30,7 @@
#include <inttypes.h>
#include <setjmp.h>
#include "upb/arena.h"
#include "upb/msg_internal.h"
#include "upb/upb.h"
@ -387,6 +388,13 @@ typedef struct {
upb_LayoutItemVector vec;
upb_Arena* arena;
upb_Status* status;
// When building enums.
upb_MiniTable_Enum* enum_table;
uint32_t enum_value_count;
uint32_t enum_data_count;
uint32_t enum_data_capacity;
jmp_buf err;
} upb_MtDecoder;
@ -1034,41 +1042,67 @@ upb_MiniTable* upb_MiniTable_BuildMapEntry(upb_FieldType key_type,
return ret;
}
static bool upb_MiniTable_BuildEnumValue(upb_MtDecoder* d,
upb_MiniTable_Enum* table,
uint32_t val, upb_Arena* arena) {
if (val < 64) {
table->mask |= 1ULL << val;
return true;
static size_t upb_MiniTable_EnumSize(size_t count) {
return sizeof(upb_MiniTable_Enum) + count * sizeof(uint32_t);
}
static upb_MiniTable_Enum* _upb_MiniTable_AddEnumDataMember(upb_MtDecoder* d,
uint32_t val) {
if (d->enum_data_count == d->enum_data_capacity) {
size_t old_sz = upb_MiniTable_EnumSize(d->enum_data_capacity);
d->enum_data_capacity = UPB_MAX(2, d->enum_data_capacity * 2);
size_t new_sz = upb_MiniTable_EnumSize(d->enum_data_capacity);
d->enum_table = upb_Arena_Realloc(d->arena, d->enum_table, old_sz, new_sz);
upb_MtDecoder_CheckOutOfMemory(d, d->enum_table);
}
d->enum_table->data[d->enum_data_count++] = val;
return d->enum_table;
}
int32_t* values = (void*)table->values;
values = upb_Arena_Realloc(arena, values, table->value_count * 4,
(table->value_count + 1) * 4);
upb_MtDecoder_CheckOutOfMemory(d, values);
values[table->value_count++] = (int32_t)val;
table->values = values;
return true;
static void upb_MiniTable_BuildEnumValue(upb_MtDecoder* d, uint32_t val) {
upb_MiniTable_Enum* table = d->enum_table;
d->enum_value_count++;
if (table->value_count || (val > 512 && d->enum_value_count < val / 32)) {
if (table->value_count == 0) {
assert(d->enum_data_count == table->mask_limit / 32);
}
table = _upb_MiniTable_AddEnumDataMember(d, val);
table->value_count++;
} else {
uint32_t new_mask_limit = ((val / 32) + 1) * 32;
while (table->mask_limit < new_mask_limit) {
table = _upb_MiniTable_AddEnumDataMember(d, 0);
table->mask_limit += 32;
}
table->data[val / 32] |= 1ULL << (val % 32);
}
}
upb_MiniTable_Enum* upb_MiniTable_BuildEnum(const char* data, size_t len,
upb_Arena* arena,
upb_Status* status) {
upb_MtDecoder d = {
.enum_table = upb_Arena_Malloc(arena, upb_MiniTable_EnumSize(2)),
.enum_value_count = 0,
.enum_data_count = 0,
.enum_data_capacity = 1,
.status = status,
.end = UPB_PTRADD(data, len),
.arena = arena,
};
if (UPB_SETJMP(d.err)) {
return NULL;
}
upb_MiniTable_Enum* table = upb_Arena_Malloc(arena, sizeof(*table));
upb_MtDecoder_CheckOutOfMemory(&d, table);
upb_MtDecoder_CheckOutOfMemory(&d, d.enum_table);
// Guarantee at least 64 bits of mask without checking mask size.
d.enum_table->mask_limit = 64;
d.enum_table = _upb_MiniTable_AddEnumDataMember(&d, 0);
d.enum_table = _upb_MiniTable_AddEnumDataMember(&d, 0);
table->mask = 0;
table->value_count = 0;
table->values = NULL;
d.enum_table->value_count = 0;
const char* ptr = data;
uint32_t base = 0;
@ -1078,11 +1112,7 @@ upb_MiniTable_Enum* upb_MiniTable_BuildEnum(const char* data, size_t len,
if (ch <= kUpb_EncodedValue_MaxEnumMask) {
uint32_t mask = upb_FromBase92(ch);
for (int i = 0; i < 5; i++, base++, mask >>= 1) {
if (mask & 1) {
if (!upb_MiniTable_BuildEnumValue(&d, table, base, arena)) {
return NULL;
}
}
if (mask & 1) upb_MiniTable_BuildEnumValue(&d, base);
}
} else if (kUpb_EncodedValue_MinSkip <= ch &&
ch <= kUpb_EncodedValue_MaxSkip) {
@ -1097,7 +1127,7 @@ upb_MiniTable_Enum* upb_MiniTable_BuildEnum(const char* data, size_t len,
}
}
return table;
return d.enum_table;
}
const char* upb_MiniTable_BuildExtension(const char* data, size_t len,

@ -50,19 +50,6 @@ UPB_INLINE const upb_MiniTable_Enum* upb_MiniTable_GetSubEnumTable(
return mini_table->subs[field->submsg_index].subenum;
}
// Validates enum value against range defined by enum mini table.
UPB_INLINE bool upb_MiniTable_Enum_CheckValue(const upb_MiniTable_Enum* e,
int32_t val) {
uint32_t uval = (uint32_t)val;
if (uval < 64) return e->mask & (1ULL << uval);
// OPT: binary search long lists?
int n = e->value_count;
for (int i = 0; i < n; i++) {
if (e->values[i] == val) return true;
}
return false;
}
/** upb_MtDataEncoder *********************************************************/
// Functions to encode a string in a format that can be loaded by

@ -137,11 +137,47 @@ typedef struct {
} _upb_FastTable_Entry;
typedef struct {
const int32_t* values; // List of values <0 or >63
uint64_t mask; // Bits are set for acceptable value 0 <= x < 64
int value_count;
uint32_t mask_limit; // Limit enum value that can be tested with mask.
uint32_t value_count; // Number of values after the bitfield.
uint32_t data[]; // Bitmask + enumerated values follow.
} upb_MiniTable_Enum;
typedef enum {
_kUpb_FastEnumCheck_ValueIsInEnum = 0,
_kUpb_FastEnumCheck_ValueIsNotInEnum = 1,
_kUpb_FastEnumCheck_CannotCheckFast = 2,
} _kUpb_FastEnumCheck_Status;
UPB_INLINE _kUpb_FastEnumCheck_Status
_upb_MiniTable_CheckEnumValueFast(const upb_MiniTable_Enum* e, uint32_t val) {
if (UPB_UNLIKELY(val >= 64)) return _kUpb_FastEnumCheck_CannotCheckFast;
uint64_t mask = e->data[0] | ((uint64_t)e->data[1] << 32);
return (mask & (1ULL << val)) ? _kUpb_FastEnumCheck_ValueIsInEnum
: _kUpb_FastEnumCheck_ValueIsNotInEnum;
}
UPB_INLINE bool _upb_MiniTable_CheckEnumValueSlow(const upb_MiniTable_Enum* e,
uint32_t val) {
if (val < e->mask_limit) return e->data[val / 32] & (1ULL << (val % 32));
// OPT: binary search long lists?
const uint32_t* start = &e->data[e->mask_limit / 32];
const uint32_t* limit = &e->data[(e->mask_limit / 32) + e->value_count];
for (const uint32_t* p = start; p < limit; p++) {
if (*p == val) return true;
}
return false;
}
// Validates enum value against range defined by enum mini table.
UPB_INLINE bool upb_MiniTable_Enum_CheckValue(const upb_MiniTable_Enum* e,
uint32_t val) {
_kUpb_FastEnumCheck_Status status = _upb_MiniTable_CheckEnumValueFast(e, val);
if (UPB_UNLIKELY(status == _kUpb_FastEnumCheck_CannotCheckFast)) {
return _upb_MiniTable_CheckEnumValueSlow(e, val);
}
return status == _kUpb_FastEnumCheck_ValueIsInEnum ? true : false;
}
typedef union {
const struct upb_MiniTable* submsg;
const upb_MiniTable_Enum* subenum;

@ -1373,16 +1373,13 @@ void WriteMessage(const protobuf::Descriptor* message, const FileLayout& layout,
void WriteEnum(const upb_MiniTable_Enum* mt, const protobuf::EnumDescriptor* e,
Output& output) {
std::string values_init = "NULL";
if (mt->value_count) {
values_init = EnumInit(e) + "_values";
output("static const int32_t $0[$1] = {\n", values_init, mt->value_count);
for (int i = 0; i < mt->value_count; i++) {
output(" $0,\n", mt->values[i]);
}
output("};\n\n");
std::string values_init = "{\n";
uint32_t value_count = (mt->mask_limit / 32) + mt->value_count;
for (uint32_t i = 0; i < value_count; i++) {
absl::StrAppend(&values_init, " 0x", absl::Hex(mt->data[i]),
",\n");
}
values_init += " }";
output(
R"cc(
@ -1392,8 +1389,7 @@ void WriteEnum(const upb_MiniTable_Enum* mt, const protobuf::EnumDescriptor* e,
$3,
};
)cc",
EnumInit(e), values_init, absl::StrCat("0x", absl::Hex(mt->mask), "ULL"),
mt->value_count);
EnumInit(e), mt->mask_limit, mt->value_count, values_init);
output("\n");
}

Loading…
Cancel
Save