Merge pull request #539 from haberman/mini-table-1

First draft of new mini-table building API
pull/13171/head
Joshua Haberman 3 years ago committed by GitHub
commit 7497539732
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 22
      BUILD
  2. 4
      benchmarks/compare.py
  3. 6
      cmake/CMakeLists.txt
  4. 6
      upb/encode.c
  5. 949
      upb/mini_table.c
  6. 164
      upb/mini_table.h
  7. 99
      upb/mini_table.hpp
  8. 174
      upb/mini_table_test.cc
  9. 6
      upb/msg.c
  10. 18
      upb/msg_internal.h
  11. 26
      upb/upb.h

22
BUILD

@ -109,6 +109,28 @@ cc_library(
],
)
cc_library(
name = "mini_table",
srcs = ["upb/mini_table.c"],
hdrs = [
"upb/mini_table.h",
"upb/mini_table.hpp",
],
copts = UPB_DEFAULT_COPTS,
visibility = ["//visibility:public"],
deps = [":upb"],
)
cc_test(
name = "mini_table_test",
srcs = ["upb/mini_table_test.cc"],
deps = [
":mini_table",
"@com_google_googletest//:gtest_main",
"@com_google_absl//absl/container:flat_hash_set",
],
)
cc_library(
name = "fastdecode",
srcs = [

@ -81,8 +81,8 @@ def Benchmark(outbase, bench_cpu=True, runs=12, fasttable=False):
print("{} {} {} ns/op".format(*values), file=f)
Run("sort {} -o {} ".format(txt_filename, txt_filename))
Run("CC=clang bazel build -c opt --copt=-g --copt=-march=native tests:conformance_upb" + extra_args)
Run("cp -f bazel-bin/tests/conformance_upb {}.bin".format(outbase))
Run("CC=clang bazel build -c opt --copt=-g --copt=-march=native :conformance_upb" + extra_args)
Run("cp -f bazel-bin/conformance_upb {}.bin".format(outbase))
baseline = "main"

@ -81,6 +81,12 @@ target_link_libraries(upb
fastdecode
port
/third_party/utf8_range)
add_library(mini_table
../upb/mini_table.c
../upb/mini_table.h
../upb/mini_table.hpp)
target_link_libraries(mini_table
upb)
add_library(fastdecode
../upb/decode.h
../upb/decode_fast.c

@ -448,11 +448,17 @@ static bool encode_shouldencode(upb_encstate* e, const upb_Message* msg,
memcpy(&ch, mem, 1);
return ch != 0;
}
#if UINTPTR_MAX == 0xffffffff
case kUpb_FieldRep_Pointer:
#endif
case kUpb_FieldRep_4Byte: {
uint32_t u32;
memcpy(&u32, mem, 4);
return u32 != 0;
}
#if UINTPTR_MAX != 0xffffffff
case kUpb_FieldRep_Pointer:
#endif
case kUpb_FieldRep_8Byte: {
uint64_t u64;
memcpy(&u64, mem, 8);

@ -0,0 +1,949 @@
/*
* Copyright (c) 2009-2021, Google LLC
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Google LLC nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT,
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "upb/mini_table.h"
#include <inttypes.h>
#include <setjmp.h>
#include "upb/msg_internal.h"
#include "upb/upb.h"
// Must be last.
#include "upb/port_def.inc"
typedef enum {
kUpb_EncodedType_Double = 0,
kUpb_EncodedType_Float = 1,
kUpb_EncodedType_Fixed32 = 2,
kUpb_EncodedType_Fixed64 = 3,
kUpb_EncodedType_SFixed32 = 4,
kUpb_EncodedType_SFixed64 = 5,
kUpb_EncodedType_Int32 = 6,
kUpb_EncodedType_UInt32 = 7,
kUpb_EncodedType_SInt32 = 8,
kUpb_EncodedType_Int64 = 9,
kUpb_EncodedType_UInt64 = 10,
kUpb_EncodedType_SInt64 = 11,
kUpb_EncodedType_Enum = 12,
kUpb_EncodedType_Bool = 13,
kUpb_EncodedType_Bytes = 14,
kUpb_EncodedType_String = 15,
kUpb_EncodedType_Group = 16,
kUpb_EncodedType_Message = 17,
kUpb_EncodedType_RepeatedBase = 20,
} upb_EncodedType;
typedef enum {
kUpb_EncodedFieldModifier_IsUnpacked = 1 << 0,
kUpb_EncodedFieldModifier_JspbString = 1 << 1,
// upb only.
kUpb_EncodedFieldModifier_IsProto3Singular = 1 << 2,
kUpb_EncodedFieldModifier_IsRequired = 1 << 3,
} upb_EncodedFieldModifier;
enum {
kUpb_EncodedValue_MinField = ' ',
kUpb_EncodedValue_MaxField = 'K',
kUpb_EncodedValue_MinModifier = 'L',
kUpb_EncodedValue_MaxModifier = '[',
kUpb_EncodedValue_End = '^',
kUpb_EncodedValue_MinSkip = '_',
kUpb_EncodedValue_MaxSkip = '~',
kUpb_EncodedValue_OneofSeparator = '~',
kUpb_EncodedValue_FieldSeparator = '|',
kUpb_EncodedValue_MinOneofField = ' ',
kUpb_EncodedValue_MaxOneofField = 'b',
};
char upb_ToBase92(int8_t ch) {
static const char kUpb_ToBase92[] = {
' ', '!', '#', '$', '%', '&', '(', ')', '*', '+', ',', '-', '.', '/',
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<', '=',
'>', '?', '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K',
'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y',
'Z', '[', ']', '^', '_', '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h',
'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
'w', 'x', 'y', 'z', '{', '|', '}', '~',
};
UPB_ASSERT(0 <= ch && ch < 92);
return kUpb_ToBase92[ch];
}
char upb_FromBase92(uint8_t ch) {
static const int8_t kUpb_FromBase92[] = {
0, 1, -1, 2, 3, 4, 5, -1, 6, 7, 8, 9, 10, 11, 12, 13,
14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, -1, 58, 59, 60,
61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76,
77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91,
};
if (' ' > ch || ch > '~') return -1;
return kUpb_FromBase92[ch - ' '];
}
/** upb_MtDataEncoder *********************************************************/
typedef struct {
char* buf_start; // Only for checking kUpb_MtDataEncoder_MinSize.
uint64_t msg_mod;
uint32_t last_field_num;
enum {
kUpb_OneofState_NotStarted,
kUpb_OneofState_StartedOneof,
kUpb_OneofState_EmittedOneofField,
} oneof_state;
} upb_MtDataEncoderInternal;
static upb_MtDataEncoderInternal* upb_MtDataEncoder_GetInternal(
upb_MtDataEncoder* e, char* buf_start) {
upb_MtDataEncoderInternal* ret = (upb_MtDataEncoderInternal*)e->internal;
ret->buf_start = buf_start;
return ret;
}
static char* upb_MtDataEncoder_Put(upb_MtDataEncoder* e, char* ptr, char ch) {
upb_MtDataEncoderInternal* in = (upb_MtDataEncoderInternal*)e->internal;
UPB_ASSERT(ptr - in->buf_start < kUpb_MtDataEncoder_MinSize);
if (ptr == e->end) return NULL;
*ptr++ = upb_ToBase92(ch);
return ptr;
}
static char* upb_MtDataEncoder_PutBase92Varint(upb_MtDataEncoder* e, char* ptr,
uint32_t val, int min, int max) {
int shift = _upb_Log2Ceiling(upb_FromBase92(max) - upb_FromBase92(min) + 1);
UPB_ASSERT(shift <= 6);
uint32_t mask = (1 << shift) - 1;
do {
uint32_t bits = val & mask;
ptr = upb_MtDataEncoder_Put(e, ptr, bits + upb_FromBase92(min));
if (!ptr) return NULL;
val >>= shift;
} while (val);
return ptr;
}
char* upb_MtDataEncoder_StartMessage(upb_MtDataEncoder* e, char* ptr,
uint64_t msg_mod) {
upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr);
in->msg_mod = msg_mod;
in->last_field_num = 0;
in->oneof_state = kUpb_OneofState_NotStarted;
return ptr;
}
char* upb_MtDataEncoder_PutField(upb_MtDataEncoder* e, char* ptr,
upb_FieldType type, uint32_t field_num,
uint64_t modifiers) {
static const char kUpb_TypeToEncoded[] = {
[kUpb_FieldType_Double] = kUpb_EncodedType_Double,
[kUpb_FieldType_Float] = kUpb_EncodedType_Float,
[kUpb_FieldType_Int64] = kUpb_EncodedType_Int64,
[kUpb_FieldType_UInt64] = kUpb_EncodedType_UInt64,
[kUpb_FieldType_Int32] = kUpb_EncodedType_Int32,
[kUpb_FieldType_Fixed64] = kUpb_EncodedType_Fixed64,
[kUpb_FieldType_Fixed32] = kUpb_EncodedType_Fixed32,
[kUpb_FieldType_Bool] = kUpb_EncodedType_Bool,
[kUpb_FieldType_String] = kUpb_EncodedType_String,
[kUpb_FieldType_Group] = kUpb_EncodedType_Group,
[kUpb_FieldType_Message] = kUpb_EncodedType_Message,
[kUpb_FieldType_Bytes] = kUpb_EncodedType_Bytes,
[kUpb_FieldType_UInt32] = kUpb_EncodedType_UInt32,
[kUpb_FieldType_Enum] = kUpb_EncodedType_Enum,
[kUpb_FieldType_SFixed32] = kUpb_EncodedType_SFixed32,
[kUpb_FieldType_SFixed64] = kUpb_EncodedType_SFixed64,
[kUpb_FieldType_SInt32] = kUpb_EncodedType_SInt32,
[kUpb_FieldType_SInt64] = kUpb_EncodedType_SInt64,
};
upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr);
if (field_num <= in->last_field_num) return NULL;
if (in->last_field_num + 1 != field_num) {
// Put skip.
UPB_ASSERT(field_num > in->last_field_num);
uint32_t skip = field_num - in->last_field_num;
ptr = upb_MtDataEncoder_PutBase92Varint(
e, ptr, skip, kUpb_EncodedValue_MinSkip, kUpb_EncodedValue_MaxSkip);
if (!ptr) return NULL;
}
in->last_field_num = field_num;
// Put field type.
int encoded_type = kUpb_TypeToEncoded[type];
if (modifiers & kUpb_FieldModifier_IsRepeated) {
// Repeated fields shift the type number up (unlike other modifiers which
// are bit flags).
encoded_type += kUpb_EncodedType_RepeatedBase;
}
ptr = upb_MtDataEncoder_Put(e, ptr, encoded_type);
if (!ptr) return NULL;
uint32_t encoded_modifiers = 0;
if (modifiers & kUpb_FieldModifier_IsProto3Singular) {
encoded_modifiers |= kUpb_EncodedFieldModifier_IsProto3Singular;
}
if (modifiers & kUpb_FieldModifier_IsRequired) {
encoded_modifiers |= kUpb_EncodedFieldModifier_IsRequired;
}
if ((modifiers & kUpb_FieldModifier_IsPacked) !=
(in->msg_mod & kUpb_MessageModifier_DefaultIsPacked)) {
encoded_modifiers |= kUpb_EncodedFieldModifier_IsUnpacked;
}
if (encoded_modifiers) {
ptr = upb_MtDataEncoder_PutBase92Varint(e, ptr, encoded_modifiers,
kUpb_EncodedValue_MinModifier,
kUpb_EncodedValue_MaxModifier);
}
return ptr;
}
char* upb_MtDataEncoder_StartOneof(upb_MtDataEncoder* e, char* ptr) {
upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr);
if (in->oneof_state == kUpb_OneofState_NotStarted) {
ptr = upb_MtDataEncoder_Put(e, ptr, upb_FromBase92(kUpb_EncodedValue_End));
} else {
ptr = upb_MtDataEncoder_Put(
e, ptr, upb_FromBase92(kUpb_EncodedValue_OneofSeparator));
}
in->oneof_state = kUpb_OneofState_StartedOneof;
return ptr;
}
char* upb_MtDataEncoder_PutOneofField(upb_MtDataEncoder* e, char* ptr,
uint32_t field_num) {
upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr);
if (in->oneof_state == kUpb_OneofState_EmittedOneofField) {
ptr = upb_MtDataEncoder_Put(
e, ptr, upb_FromBase92(kUpb_EncodedValue_FieldSeparator));
if (!ptr) return NULL;
}
ptr = upb_MtDataEncoder_PutBase92Varint(e, ptr, field_num, upb_ToBase92(0),
upb_ToBase92(63));
in->oneof_state = kUpb_OneofState_EmittedOneofField;
return ptr;
}
const upb_MiniTable_Field* upb_MiniTable_FindFieldByNumber(
const upb_MiniTable* table, uint32_t number) {
int n = table->field_count;
for (int i = 0; i < n; i++) {
if (table->fields[i].number == number) {
return &table->fields[i];
}
}
return NULL;
}
/** Data decoder **************************************************************/
// Note: we sort by this number when calculating layout order.
typedef enum {
kUpb_LayoutItemType_OneofCase, // Oneof case.
kUpb_LayoutItemType_OneofField, // Oneof field data.
kUpb_LayoutItemType_Field, // Non-oneof field data.
kUpb_LayoutItemType_Max = kUpb_LayoutItemType_Field,
} upb_LayoutItemType;
#define kUpb_LayoutItem_IndexSentinel ((uint16_t)-1)
typedef struct {
// Index of the corresponding field. When this is a oneof field, the field's
// offset will be the index of the next field in a linked list.
uint16_t field_index;
uint16_t offset;
upb_FieldRep rep;
upb_LayoutItemType type;
} upb_LayoutItem;
typedef struct {
upb_LayoutItem* data;
size_t size;
size_t capacity;
} upb_LayoutItemVector;
typedef struct {
const char* end;
upb_MiniTable* table;
upb_MiniTable_Field* fields;
upb_MiniTablePlatform platform;
upb_LayoutItemVector vec;
upb_Arena* arena;
upb_Status* status;
jmp_buf err;
} upb_MtDecoder;
UPB_PRINTF(2, 3)
UPB_NORETURN static void upb_MtDecoder_ErrorFormat(upb_MtDecoder* d,
const char* fmt, ...) {
va_list argp;
upb_Status_SetErrorMessage(d->status, "Error building mini table: ");
va_start(argp, fmt);
upb_Status_VAppendErrorFormat(d->status, fmt, argp);
va_end(argp);
UPB_LONGJMP(d->err, 1);
}
static void upb_MtDecoder_CheckOutOfMemory(upb_MtDecoder* d, const void* ptr) {
if (!ptr) upb_MtDecoder_ErrorFormat(d, "Out of memory");
}
// In each field's offset, we temporarily store a presence classifier:
enum PresenceClass {
kNoPresence = 0,
kHasbitPresence = 1,
kRequiredPresence = 2,
kOneofBase = 3,
// Negative values refer to a specific oneof with that number. Positive
// values >= kOneofBase indicate that this field is in a oneof, and specify
// the next field in this oneof's linked list.
};
static const char* upb_MiniTable_DecodeBase92Varint(upb_MtDecoder* d,
const char* ptr,
char first_ch, uint8_t min,
uint8_t max,
uint32_t* out_val) {
uint32_t val = 0;
uint32_t shift = 0;
const int bits_per_char =
_upb_Log2Ceiling(upb_FromBase92(max) - upb_FromBase92(min));
char ch = first_ch;
while (1) {
uint32_t bits = upb_FromBase92(ch) - upb_FromBase92(min);
UPB_ASSERT(shift < 32 - bits_per_char);
val |= bits << shift;
if (ptr == d->end || *ptr < min || max < *ptr) {
*out_val = val;
return ptr;
}
ch = *ptr++;
shift += bits_per_char;
}
}
static bool upb_MiniTable_HasSub(char type, uint64_t msg_modifiers) {
return type == kUpb_EncodedType_Message || type == kUpb_EncodedType_Group ||
(type == kUpb_EncodedType_Enum &&
(msg_modifiers & kUpb_MessageModifier_HasClosedEnums));
}
static void upb_MiniTable_SetField(upb_MtDecoder* d, uint8_t ch,
upb_MiniTable_Field* field,
uint64_t msg_modifiers,
uint32_t* sub_count) {
static const char kUpb_EncodedToFieldRep[] = {
[kUpb_EncodedType_Double] = kUpb_FieldRep_8Byte,
[kUpb_EncodedType_Float] = kUpb_FieldRep_4Byte,
[kUpb_EncodedType_Int64] = kUpb_FieldRep_8Byte,
[kUpb_EncodedType_UInt64] = kUpb_FieldRep_8Byte,
[kUpb_EncodedType_Int32] = kUpb_FieldRep_4Byte,
[kUpb_EncodedType_Fixed64] = kUpb_FieldRep_8Byte,
[kUpb_EncodedType_Fixed32] = kUpb_FieldRep_4Byte,
[kUpb_EncodedType_Bool] = kUpb_FieldRep_1Byte,
[kUpb_EncodedType_String] = kUpb_FieldRep_StringView,
[kUpb_EncodedType_Group] = kUpb_FieldRep_Pointer,
[kUpb_EncodedType_Message] = kUpb_FieldRep_Pointer,
[kUpb_EncodedType_Bytes] = kUpb_FieldRep_StringView,
[kUpb_EncodedType_UInt32] = kUpb_FieldRep_4Byte,
[kUpb_EncodedType_Enum] = kUpb_FieldRep_4Byte,
[kUpb_EncodedType_SFixed32] = kUpb_FieldRep_4Byte,
[kUpb_EncodedType_SFixed64] = kUpb_FieldRep_8Byte,
[kUpb_EncodedType_SInt32] = kUpb_FieldRep_4Byte,
[kUpb_EncodedType_SInt64] = kUpb_FieldRep_8Byte,
};
static const char kUpb_EncodedToType[] = {
[kUpb_EncodedType_Double] = kUpb_FieldType_Double,
[kUpb_EncodedType_Float] = kUpb_FieldType_Float,
[kUpb_EncodedType_Int64] = kUpb_FieldType_Int64,
[kUpb_EncodedType_UInt64] = kUpb_FieldType_UInt64,
[kUpb_EncodedType_Int32] = kUpb_FieldType_Int32,
[kUpb_EncodedType_Fixed64] = kUpb_FieldType_Fixed64,
[kUpb_EncodedType_Fixed32] = kUpb_FieldType_Fixed32,
[kUpb_EncodedType_Bool] = kUpb_FieldType_Bool,
[kUpb_EncodedType_String] = kUpb_FieldType_String,
[kUpb_EncodedType_Group] = kUpb_FieldType_Group,
[kUpb_EncodedType_Message] = kUpb_FieldType_Message,
[kUpb_EncodedType_Bytes] = kUpb_FieldType_Bytes,
[kUpb_EncodedType_UInt32] = kUpb_FieldType_UInt32,
[kUpb_EncodedType_Enum] = kUpb_FieldType_Enum,
[kUpb_EncodedType_SFixed32] = kUpb_FieldType_SFixed32,
[kUpb_EncodedType_SFixed64] = kUpb_FieldType_SFixed64,
[kUpb_EncodedType_SInt32] = kUpb_FieldType_SInt32,
[kUpb_EncodedType_SInt64] = kUpb_FieldType_SInt64,
};
int8_t type = upb_FromBase92(ch);
if (ch >= upb_ToBase92(kUpb_EncodedType_RepeatedBase)) {
type -= kUpb_EncodedType_RepeatedBase;
field->mode = kUpb_FieldMode_Array;
field->mode |= kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift;
field->offset = kNoPresence;
} else {
field->mode = kUpb_FieldMode_Scalar;
field->mode |= kUpb_EncodedToFieldRep[type] << kUpb_FieldRep_Shift;
field->offset = kHasbitPresence;
}
if (type >= 18) {
upb_MtDecoder_ErrorFormat(d, "Invalid field type: %d", (int)type);
UPB_UNREACHABLE();
}
field->descriptortype = kUpb_EncodedToType[type];
if (upb_MiniTable_HasSub(type, msg_modifiers)) {
field->submsg_index = sub_count ? (*sub_count)++ : 0;
}
}
static void upb_MtDecoder_ModifyField(upb_MtDecoder* d, uint32_t mod,
upb_MiniTable_Field* field) {
if (mod & kUpb_EncodedFieldModifier_IsUnpacked) {
field->mode &= ~kUpb_LabelFlags_IsPacked;
} else {
field->mode |= kUpb_LabelFlags_IsPacked;
}
bool singular = mod & kUpb_EncodedFieldModifier_IsProto3Singular;
bool required = mod & kUpb_EncodedFieldModifier_IsRequired;
// Validate.
if ((singular || required) && field->offset != kHasbitPresence) {
upb_MtDecoder_ErrorFormat(
d, "Invalid modifier(s) for repeated field %" PRIu32, field->number);
UPB_UNREACHABLE();
}
if (singular && required) {
upb_MtDecoder_ErrorFormat(
d, "Field %" PRIu32 " cannot be both singular and required",
field->number);
UPB_UNREACHABLE();
}
if (singular) field->offset = kNoPresence;
if (required) field->offset = kRequiredPresence;
}
static void upb_MtDecoder_PushItem(upb_MtDecoder* d, upb_LayoutItem item) {
if (d->vec.size == d->vec.capacity) {
size_t new_cap = UPB_MAX(8, d->vec.size * 2);
d->vec.data = realloc(d->vec.data, new_cap * sizeof(*d->vec.data));
upb_MtDecoder_CheckOutOfMemory(d, d->vec.data);
d->vec.capacity = new_cap;
}
d->vec.data[d->vec.size++] = item;
}
static void upb_MtDecoder_PushOneof(upb_MtDecoder* d, upb_LayoutItem item) {
if (item.field_index == kUpb_LayoutItem_IndexSentinel) {
upb_MtDecoder_ErrorFormat(d, "Empty oneof");
UPB_UNREACHABLE();
}
item.field_index -= kOneofBase;
// Push oneof data.
item.type = kUpb_LayoutItemType_OneofField;
upb_MtDecoder_PushItem(d, item);
// Push oneof case.
item.rep = kUpb_FieldRep_4Byte; // Field Number.
item.type = kUpb_LayoutItemType_OneofCase;
upb_MtDecoder_PushItem(d, item);
}
static const char* upb_MtDecoder_DecodeOneofField(upb_MtDecoder* d,
const char* ptr,
char first_ch,
upb_LayoutItem* item) {
uint32_t field_num;
ptr = upb_MiniTable_DecodeBase92Varint(
d, ptr, first_ch, kUpb_EncodedValue_MinOneofField,
kUpb_EncodedValue_MaxOneofField, &field_num);
upb_MiniTable_Field* f =
(void*)upb_MiniTable_FindFieldByNumber(d->table, field_num);
if (!f) {
upb_MtDecoder_ErrorFormat(d,
"Couldn't add field number %" PRIu32
" to oneof, no such field number.",
field_num);
UPB_UNREACHABLE();
}
if (f->offset != kHasbitPresence) {
upb_MtDecoder_ErrorFormat(
d,
"Cannot add repeated, required, or singular field %" PRIu32
" to oneof.",
field_num);
UPB_UNREACHABLE();
}
// Oneof storage must be large enough to accommodate the largest member.
item->rep = UPB_MAX(item->rep, f->mode >> kUpb_FieldRep_Shift);
// Prepend this field to the linked list.
f->offset = item->field_index;
item->field_index = (f - d->fields) + kOneofBase;
return ptr;
}
static const char* upb_MtDecoder_DecodeOneofs(upb_MtDecoder* d,
const char* ptr) {
upb_LayoutItem item = {.rep = 0,
.field_index = kUpb_LayoutItem_IndexSentinel};
while (ptr < d->end) {
char ch = *ptr++;
if (ch == kUpb_EncodedValue_FieldSeparator) {
// Field separator, no action needed.
} else if (ch == kUpb_EncodedValue_OneofSeparator) {
// End of oneof.
upb_MtDecoder_PushOneof(d, item);
item.field_index = kUpb_LayoutItem_IndexSentinel; // Move to next oneof.
} else {
ptr = upb_MtDecoder_DecodeOneofField(d, ptr, ch, &item);
}
}
// Push final oneof.
upb_MtDecoder_PushOneof(d, item);
return ptr;
}
static const char* upb_MtDecoder_ParseModifier(upb_MtDecoder* d,
const char* ptr, char first_ch,
uint16_t field_count,
uint64_t* msg_modifiers) {
uint32_t mod;
ptr = upb_MiniTable_DecodeBase92Varint(d, ptr, first_ch,
kUpb_EncodedValue_MinModifier,
kUpb_EncodedValue_MaxModifier, &mod);
if (field_count == 0) {
if (!d->table) {
upb_MtDecoder_ErrorFormat(d, "Extensions cannot have message modifiers");
UPB_UNREACHABLE();
}
*msg_modifiers = mod;
} else {
upb_MiniTable_Field* field = &d->fields[field_count - 1];
upb_MtDecoder_ModifyField(d, mod, field);
}
return ptr;
}
static void upb_MtDecoder_AllocateSubs(upb_MtDecoder* d, uint32_t sub_count) {
size_t subs_bytes = sizeof(*d->table->subs) * sub_count;
d->table->subs = upb_Arena_Malloc(d->arena, subs_bytes);
upb_MtDecoder_CheckOutOfMemory(d, d->table->subs);
}
static void upb_MtDecoder_Parse(upb_MtDecoder* d, const char* ptr, size_t len,
void* fields, size_t field_size,
uint16_t* field_count, uint32_t* sub_count) {
uint64_t msg_modifiers = 0;
uint32_t last_field_number = 0;
bool need_dense_below = d->table != NULL;
d->end = UPB_PTRADD(ptr, len);
while (ptr < d->end) {
char ch = *ptr++;
if (ch <= kUpb_EncodedValue_MaxField) {
upb_MiniTable_Field* field = fields;
*field_count += 1;
fields = (char*)fields + field_size;
field->number = ++last_field_number;
upb_MiniTable_SetField(d, ch, field, msg_modifiers, sub_count);
} else if (kUpb_EncodedValue_MinModifier <= ch &&
ch <= kUpb_EncodedValue_MaxModifier) {
ptr =
upb_MtDecoder_ParseModifier(d, ptr, ch, *field_count, &msg_modifiers);
} else if (ch == kUpb_EncodedValue_End) {
if (!d->table) {
upb_MtDecoder_ErrorFormat(d, "Extensions cannot have oneofs.");
UPB_UNREACHABLE();
}
ptr = upb_MtDecoder_DecodeOneofs(d, ptr);
} else if (kUpb_EncodedValue_MinSkip <= ch &&
ch <= kUpb_EncodedValue_MaxSkip) {
if (need_dense_below) {
d->table->dense_below = d->table->field_count;
need_dense_below = false;
}
uint32_t skip;
ptr = upb_MiniTable_DecodeBase92Varint(d, ptr, ch,
kUpb_EncodedValue_MinSkip,
kUpb_EncodedValue_MaxSkip, &skip);
last_field_number += skip;
last_field_number--; // Next field seen will increment.
}
}
if (need_dense_below) {
d->table->dense_below = d->table->field_count;
}
}
static void upb_MtDecoder_ParseMessage(upb_MtDecoder* d, const char* data,
size_t len) {
// Buffer length is an upper bound on the number of fields. We will return
// what we don't use.
d->fields = upb_Arena_Malloc(d->arena, sizeof(*d->fields) * len);
upb_MtDecoder_CheckOutOfMemory(d, d->fields);
uint32_t sub_count = 0;
d->table->field_count = 0;
d->table->fields = d->fields;
upb_MtDecoder_Parse(d, data, len, d->fields, sizeof(*d->fields),
&d->table->field_count, &sub_count);
upb_Arena_ShrinkLast(d->arena, d->fields, sizeof(*d->fields) * len,
sizeof(*d->fields) * d->table->field_count);
d->table->fields = d->fields;
upb_MtDecoder_AllocateSubs(d, sub_count);
}
int upb_MtDecoder_CompareFields(const void* _a, const void* _b) {
const upb_LayoutItem* a = _a;
const upb_LayoutItem* b = _b;
// Currently we just sort by:
// 1. rep (smallest fields first)
// 2. type (oneof cases first)
// 2. field_index (smallest numbers first)
// The main goal of this is to reduce space lost to padding.
// Later we may have more subtle reasons to prefer a different ordering.
const int rep_bits = _upb_Log2Ceiling(kUpb_FieldRep_Max);
const int type_bits = _upb_Log2Ceiling(kUpb_LayoutItemType_Max);
const int idx_bits = (sizeof(a->field_index) * 8);
UPB_ASSERT(idx_bits + rep_bits + type_bits < 32);
#define UPB_COMBINE(rep, ty, idx) (((rep << type_bits) | ty) << idx_bits) | idx
uint32_t a_packed = UPB_COMBINE(a->rep, a->type, a->field_index);
uint32_t b_packed = UPB_COMBINE(b->rep, b->type, b->field_index);
assert(a_packed != b_packed);
#undef UPB_COMBINE
return a_packed < b_packed ? -1 : 1;
}
static bool upb_MtDecoder_SortLayoutItems(upb_MtDecoder* d) {
// Add items for all non-oneof fields (oneofs were already added).
int n = d->table->field_count;
for (int i = 0; i < n; i++) {
upb_MiniTable_Field* f = &d->fields[i];
if (f->offset >= kOneofBase) continue;
upb_LayoutItem item = {.field_index = i,
.rep = f->mode >> kUpb_FieldRep_Shift,
.type = kUpb_LayoutItemType_Field};
upb_MtDecoder_PushItem(d, item);
}
if (d->vec.size) {
qsort(d->vec.data, d->vec.size, sizeof(*d->vec.data),
upb_MtDecoder_CompareFields);
}
return true;
}
static size_t upb_MiniTable_DivideRoundUp(size_t n, size_t d) {
return (n + d - 1) / d;
}
static void upb_MtDecoder_AssignHasbits(upb_MiniTable* ret) {
int n = ret->field_count;
int last_hasbit = 0; // 0 cannot be used.
// First assign required fields, which must have the lowest hasbits.
for (int i = 0; i < n; i++) {
upb_MiniTable_Field* field = (upb_MiniTable_Field*)&ret->fields[i];
if (field->offset == kRequiredPresence) {
field->presence = ++last_hasbit;
}
}
ret->required_count = last_hasbit;
// Next assign non-required hasbit fields.
for (int i = 0; i < n; i++) {
upb_MiniTable_Field* field = (upb_MiniTable_Field*)&ret->fields[i];
if (field->offset == kHasbitPresence) {
field->presence = ++last_hasbit;
}
}
ret->size = upb_MiniTable_DivideRoundUp(last_hasbit, 8);
}
size_t upb_MtDecoder_SizeOfRep(upb_FieldRep rep,
upb_MiniTablePlatform platform) {
static const uint8_t kRepToSize32[] = {
[kUpb_FieldRep_1Byte] = 1, [kUpb_FieldRep_4Byte] = 4,
[kUpb_FieldRep_Pointer] = 4, [kUpb_FieldRep_StringView] = 8,
[kUpb_FieldRep_8Byte] = 8,
};
static const uint8_t kRepToSize64[] = {
[kUpb_FieldRep_1Byte] = 1, [kUpb_FieldRep_4Byte] = 4,
[kUpb_FieldRep_Pointer] = 8, [kUpb_FieldRep_StringView] = 16,
[kUpb_FieldRep_8Byte] = 8,
};
UPB_ASSERT(sizeof(upb_StringView) ==
UPB_SIZE(kRepToSize32, kRepToSize64)[kUpb_FieldRep_StringView]);
return platform == kUpb_MiniTablePlatform_32Bit ? kRepToSize32[rep]
: kRepToSize64[rep];
}
size_t upb_MtDecoder_AlignOfRep(upb_FieldRep rep,
upb_MiniTablePlatform platform) {
static const uint8_t kRepToAlign32[] = {
[kUpb_FieldRep_1Byte] = 1, [kUpb_FieldRep_4Byte] = 4,
[kUpb_FieldRep_Pointer] = 4, [kUpb_FieldRep_StringView] = 4,
[kUpb_FieldRep_8Byte] = 8,
};
static const uint8_t kRepToAlign64[] = {
[kUpb_FieldRep_1Byte] = 1, [kUpb_FieldRep_4Byte] = 4,
[kUpb_FieldRep_Pointer] = 8, [kUpb_FieldRep_StringView] = 8,
[kUpb_FieldRep_8Byte] = 8,
};
UPB_ASSERT(UPB_ALIGN_OF(upb_StringView) ==
UPB_SIZE(kRepToAlign32, kRepToAlign64)[kUpb_FieldRep_StringView]);
return platform == kUpb_MiniTablePlatform_32Bit ? kRepToAlign32[rep]
: kRepToAlign64[rep];
}
size_t upb_MtDecoder_Place(upb_MtDecoder* d, upb_FieldRep rep) {
size_t size = upb_MtDecoder_SizeOfRep(rep, d->platform);
size_t align = upb_MtDecoder_AlignOfRep(rep, d->platform);
size_t ret = UPB_ALIGN_UP(d->table->size, align);
d->table->size = ret + size;
return ret;
}
static bool upb_MtDecoder_AssignOffsets(upb_MtDecoder* d) {
upb_LayoutItem* end = UPB_PTRADD(d->vec.data, d->vec.size);
// Compute offsets.
for (upb_LayoutItem* item = d->vec.data; item < end; item++) {
item->offset = upb_MtDecoder_Place(d, item->rep);
}
// Assign oneof case offsets. We must do these first, since assigning
// actual offsets will overwrite the links of the linked list.
for (upb_LayoutItem* item = d->vec.data; item < end; item++) {
if (item->type != kUpb_LayoutItemType_OneofCase) continue;
upb_MiniTable_Field* f = &d->fields[item->field_index];
while (true) {
f->presence = ~item->offset;
if (f->offset == kUpb_LayoutItem_IndexSentinel) break;
UPB_ASSERT(f->offset - kOneofBase < d->table->field_count);
f = &d->fields[f->offset - kOneofBase];
}
}
// Assign offsets.
for (upb_LayoutItem* item = d->vec.data; item < end; item++) {
upb_MiniTable_Field* f = &d->fields[item->field_index];
switch (item->type) {
case kUpb_LayoutItemType_OneofField:
while (true) {
uint16_t next_offset = f->offset;
f->offset = item->offset;
if (next_offset == kUpb_LayoutItem_IndexSentinel) break;
f = &d->fields[next_offset - kOneofBase];
}
break;
case kUpb_LayoutItemType_Field:
f->offset = item->offset;
break;
default:
break;
}
}
return true;
}
upb_MiniTable* upb_MiniTable_BuildWithBuf(const char* data, size_t len,
upb_MiniTablePlatform platform,
upb_Arena* arena, void** buf,
size_t* buf_size,
upb_Status* status) {
upb_MtDecoder decoder = {
.platform = platform,
.vec =
{
.data = *buf,
.capacity = *buf_size / sizeof(*decoder.vec.data),
.size = 0,
},
.arena = arena,
.status = status,
.table = upb_Arena_Malloc(arena, sizeof(*decoder.table)),
};
if (UPB_SETJMP(decoder.err)) {
decoder.table = NULL;
goto done;
}
upb_MtDecoder_CheckOutOfMemory(&decoder, decoder.table);
decoder.table->size = 0;
decoder.table->field_count = 0;
decoder.table->ext = kUpb_ExtMode_NonExtendable;
decoder.table->dense_below = 0;
decoder.table->table_mask = 0;
decoder.table->required_count = 0;
upb_MtDecoder_ParseMessage(&decoder, data, len);
upb_MtDecoder_AssignHasbits(decoder.table);
upb_MtDecoder_SortLayoutItems(&decoder);
upb_MtDecoder_AssignOffsets(&decoder);
done:
*buf = decoder.vec.data;
*buf_size = decoder.vec.capacity / sizeof(*decoder.vec.data);
return decoder.table;
}
upb_MiniTable* upb_MiniTable_BuildMessageSet(upb_MiniTablePlatform platform,
upb_Arena* arena) {
upb_MiniTable* ret = upb_Arena_Malloc(arena, sizeof(*ret));
if (!ret) return NULL;
ret->size = 0;
ret->field_count = 0;
ret->ext = kUpb_ExtMode_IsMessageSet;
ret->dense_below = 0;
ret->table_mask = 0;
ret->required_count = 0;
return ret;
}
upb_MiniTable* upb_MiniTable_BuildMapEntry(upb_FieldType key_type,
upb_FieldType value_type,
upb_MiniTablePlatform platform,
upb_Arena* arena) {
upb_MiniTable* ret = upb_Arena_Malloc(arena, sizeof(*ret));
upb_MiniTable_Field* fields = upb_Arena_Malloc(arena, sizeof(*fields) * 2);
if (!ret || !fields) return NULL;
upb_MiniTable_Sub* subs = NULL;
if (value_type == kUpb_FieldType_Message ||
value_type == kUpb_FieldType_Group) {
subs = upb_Arena_Malloc(arena, sizeof(*subs));
if (!subs) return NULL;
}
size_t field_size =
upb_MtDecoder_SizeOfRep(kUpb_FieldRep_StringView, platform);
fields[0].number = 1;
fields[1].number = 2;
fields[0].mode = kUpb_FieldMode_Scalar;
fields[1].mode = kUpb_FieldMode_Scalar;
fields[0].presence = 0;
fields[1].presence = 0;
fields[0].descriptortype = key_type;
fields[1].descriptortype = value_type;
fields[0].offset = 0;
fields[1].offset = field_size;
fields[1].submsg_index = 0;
ret->size = UPB_ALIGN_UP(2 * field_size, 8);
ret->field_count = 2;
ret->ext = kUpb_ExtMode_NonExtendable | kUpb_ExtMode_IsMapEntry;
ret->dense_below = 2;
ret->table_mask = 0;
ret->required_count = 0;
ret->subs = subs;
ret->fields = fields;
return ret;
}
upb_MiniTable_Extension* upb_MiniTable_BuildExtensions(const char* data,
size_t len,
size_t* ext_count,
upb_Arena* arena,
upb_Status* status) {
upb_MtDecoder decoder = {
.arena = arena,
.status = status,
.table = NULL,
};
upb_MiniTable_Extension* exts;
if (UPB_SETJMP(decoder.err)) {
exts = NULL;
*ext_count = 0;
goto done;
}
uint16_t count = 0;
exts = upb_Arena_Malloc(arena, len);
upb_MtDecoder_CheckOutOfMemory(&decoder, exts);
upb_MtDecoder_Parse(&decoder, data, len, exts, sizeof(*exts), &count, NULL);
upb_Arena_ShrinkLast(arena, exts, sizeof(*exts) * len, sizeof(*exts) * count);
done:
*ext_count = count;
return exts;
}
upb_MiniTable* upb_MiniTable_Build(const char* data, size_t len,
upb_MiniTablePlatform platform,
upb_Arena* arena, upb_Status* status) {
void* buf = NULL;
size_t size = 0;
upb_MiniTable* ret = upb_MiniTable_BuildWithBuf(data, len, platform, arena,
&buf, &size, status);
free(buf);
return ret;
}
void upb_MiniTable_SetSubMessage(upb_MiniTable* table,
upb_MiniTable_Field* field,
const upb_MiniTable* sub) {
UPB_ASSERT((uintptr_t)table->fields <= (uintptr_t)field &&
(uintptr_t)field <
(uintptr_t)(table->fields + table->field_count));
if (sub->ext & kUpb_ExtMode_IsMapEntry) {
field->mode =
(kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift) | kUpb_FieldMode_Map;
}
upb_MiniTable_Sub* table_sub = (void*)&table->subs[field->submsg_index];
table_sub->submsg = sub;
}
void upb_MiniTable_SetSubEnum(upb_MiniTable* table, upb_MiniTable_Field* field,
const upb_MiniTable_Enum* sub) {
UPB_ASSERT((uintptr_t)table->fields <= (uintptr_t)field &&
(uintptr_t)field <
(uintptr_t)(table->fields + table->field_count));
upb_MiniTable_Sub* table_sub = (void*)&table->subs[field->submsg_index];
table_sub->subenum = sub;
}

@ -0,0 +1,164 @@
/*
* Copyright (c) 2009-2022, Google LLC
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Google LLC nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT,
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef UPB_MINI_TABLE_H_
#define UPB_MINI_TABLE_H_
#include "upb/msg_internal.h"
// Must be last.
#include "upb/port_def.inc"
#ifdef __cplusplus
extern "C" {
#endif
const upb_MiniTable_Field* upb_MiniTable_FindFieldByNumber(
const upb_MiniTable* table, uint32_t number);
typedef enum {
kUpb_MessageModifier_DefaultIsPacked = 1,
kUpb_MessageModifier_HasClosedEnums = 2,
kUpb_MessageModifier_IsExtendable = 4,
kUpb_MessageModifier_IsMapEntry = 8,
} kUpb_MessageModifier;
typedef enum {
kUpb_FieldModifier_IsRepeated = 1,
kUpb_FieldModifier_IsPacked = 2,
kUpb_FieldModifier_IsProto3Singular = 4,
kUpb_FieldModifier_IsRequired = 8,
} kUpb_FieldModifier;
/** upb_MtDataEncoder *********************************************************/
// Functions to encode a string in a format that can be loaded by
// upb_MiniTable_Build().
typedef struct {
char* end; // Limit of the buffer passed as a parameter.
// Aliased to internal-only members in .cc.
char internal[32];
} upb_MtDataEncoder;
// If the input buffer has at least this many bytes available, the encoder call
// is guaranteed to succeed (as long as field number order is maintained).
#define kUpb_MtDataEncoder_MinSize 16
// Encodes field/oneof information for a given message. The sequence of calls
// should look like:
//
// upb_MtDataEncoder e;
// char buf[256];
// char* ptr = buf;
// e.end = ptr + sizeof(buf);
// ptr = upb_MtDataEncoder_StartMessage(&e, ptr);
// // Fields *must* be in field number order.
// ptr = upb_MtDataEncoder_PutField(&e, ptr, ...);
// ptr = upb_MtDataEncoder_PutField(&e, ptr, ...);
// ptr = upb_MtDataEncoder_PutField(&e, ptr, ...);
//
// // If oneofs are present. Oneofs must be encoded after regular fields.
// ptr = upb_MiniTable_StartOneof(&e, ptr)
// ptr = upb_MiniTable_PutOneofField(&e, ptr, ...);
// ptr = upb_MiniTable_PutOneofField(&e, ptr, ...);
//
// ptr = upb_MiniTable_StartOneof(&e, ptr);
// ptr = upb_MiniTable_PutOneofField(&e, ptr, ...);
// ptr = upb_MiniTable_PutOneofField(&e, ptr, ...);
//
// Oneofs must be encoded after all regular fields.
char* upb_MtDataEncoder_StartMessage(upb_MtDataEncoder* e, char* buf,
uint64_t msg_mod);
char* upb_MtDataEncoder_PutField(upb_MtDataEncoder* e, char* buf,
upb_FieldType type, uint32_t field_num,
uint64_t field_mod);
char* upb_MtDataEncoder_StartOneof(upb_MtDataEncoder* e, char* buf);
char* upb_MtDataEncoder_PutOneofField(upb_MtDataEncoder* e, char* buf,
uint32_t field_num);
/** upb_MiniTable *************************************************************/
typedef enum {
kUpb_MiniTablePlatform_32Bit,
kUpb_MiniTablePlatform_64Bit,
kUpb_MiniTablePlatform_Native =
UPB_SIZE(kUpb_MiniTablePlatform_32Bit, kUpb_MiniTablePlatform_64Bit),
} upb_MiniTablePlatform;
// Builds a mini table from the data encoded in the buffer [data, len]. If any
// errors occur, returns NULL and sets a status message. In the success case,
// the caller must call upb_MiniTable_SetSub*() for all message or proto2 enum
// fields to link the table to the appropriate sub-tables.
upb_MiniTable* upb_MiniTable_Build(const char* data, size_t len,
upb_MiniTablePlatform platform,
upb_Arena* arena, upb_Status* status);
void upb_MiniTable_SetSubMessage(upb_MiniTable* table,
upb_MiniTable_Field* field,
const upb_MiniTable* sub);
void upb_MiniTable_SetSubEnum(upb_MiniTable* table, upb_MiniTable_Field* field,
const upb_MiniTable_Enum* sub);
upb_MiniTable_Extension* upb_MiniTable_BuildExtensions(const char* data,
size_t len,
size_t* ext_count,
upb_Arena* arena,
upb_Status* status);
void upb_MiniTable_ResolveExtension(upb_MiniTable_Extension* ext,
const upb_MiniTable* extendee,
upb_MiniTable_Sub sub);
// Special-case functions for MessageSet layout and map entries.
upb_MiniTable* upb_MiniTable_BuildMessageSet(upb_MiniTablePlatform platform,
upb_Arena* arena);
upb_MiniTable* upb_MiniTable_BuildMapEntry(upb_FieldType key_type,
upb_FieldType value_type,
upb_MiniTablePlatform platform,
upb_Arena* arena);
// Like upb_MiniTable_Build(), but the user provides a buffer of layout data so
// it can be reused from call to call, avoiding repeated realloc()/free().
//
// The caller owns `*buf` both before and after the call, and must free() it
// when it is no longer in use. The function will realloc() `*buf` as
// necessary, updating `*size` accordingly.
upb_MiniTable* upb_MiniTable_BuildWithBuf(const char* data, size_t len,
upb_MiniTablePlatform platform,
upb_Arena* arena, void** buf,
size_t* buf_size, upb_Status* status);
// For testing only.
char upb_ToBase92(int8_t ch);
char upb_FromBase92(uint8_t ch);
#ifdef __cplusplus
} /* extern "C" */
#endif
#include "upb/port_undef.inc"
#endif /* UPB_MINI_TABLE_H_ */

@ -0,0 +1,99 @@
/*
* Copyright (c) 2009-2021, Google LLC
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Google LLC nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT,
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef UPB_MINI_TABLE_HPP_
#define UPB_MINI_TABLE_HPP_
#include <assert.h>
#include <string>
#include "upb/mini_table.h"
namespace upb {
class MtDataEncoder {
public:
MtDataEncoder() : appender_(&encoder_) {}
bool StartMessage(uint64_t msg_mod) {
return appender_([=](char* buf) {
return upb_MtDataEncoder_StartMessage(&encoder_, buf, msg_mod);
});
}
bool PutField(upb_FieldType type, uint32_t field_num, uint64_t field_mod) {
return appender_([=](char* buf) {
return upb_MtDataEncoder_PutField(&encoder_, buf, type, field_num,
field_mod);
});
}
bool StartOneof() {
return appender_([=](char* buf) {
return upb_MtDataEncoder_StartOneof(&encoder_, buf);
});
}
bool PutOneofField(uint32_t field_num) {
return appender_([=](char* buf) {
return upb_MtDataEncoder_PutOneofField(&encoder_, buf, field_num);
});
}
const std::string& data() const { return appender_.data(); }
private:
class StringAppender {
public:
StringAppender(upb_MtDataEncoder* e) { e->end = buf_ + sizeof(buf_); }
template <class T>
bool operator()(T&& func) {
char* end = func(buf_);
if (!end) return false;
// C++ does not guarantee that string has doubling growth behavior, but
// we need it to avoid O(n^2).
str_.reserve(_upb_Log2CeilingSize(str_.size() + (end - buf_)));
str_.append(buf_, end - buf_);
return true;
}
const std::string& data() const { return str_; }
private:
char buf_[kUpb_MtDataEncoder_MinSize];
std::string str_;
};
upb_MtDataEncoder encoder_;
StringAppender appender_;
};
} // namespace upb
#endif /* UPB_MINI_TABLE_HPP_ */

@ -0,0 +1,174 @@
/*
* Copyright (c) 2009-2021, Google LLC
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Google LLC nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT,
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "upb/mini_table.hpp"
#include "absl/container/flat_hash_set.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "upb/msg_internal.h"
#include "upb/upb.hpp"
class MiniTableTest : public testing::TestWithParam<upb_MiniTablePlatform> {};
TEST_P(MiniTableTest, Empty) {
upb::Arena arena;
upb::Status status;
upb_MiniTable* table =
upb_MiniTable_Build(NULL, 0, GetParam(), arena.ptr(), status.ptr());
ASSERT_NE(nullptr, table);
EXPECT_EQ(0, table->field_count);
EXPECT_EQ(0, table->required_count);
}
TEST_P(MiniTableTest, AllScalarTypes) {
upb::Arena arena;
upb::MtDataEncoder e;
ASSERT_TRUE(e.StartMessage(0));
int count = 0;
for (int i = kUpb_FieldType_Double; i < kUpb_FieldType_SInt64; i++) {
ASSERT_TRUE(e.PutField(static_cast<upb_FieldType>(i), i, 0));
count++;
}
upb::Status status;
upb_MiniTable* table = upb_MiniTable_Build(
e.data().data(), e.data().size(), GetParam(), arena.ptr(), status.ptr());
ASSERT_NE(nullptr, table);
EXPECT_EQ(count, table->field_count);
absl::flat_hash_set<size_t> offsets;
for (int i = 0; i < 16; i++) {
const upb_MiniTable_Field* f = &table->fields[i];
EXPECT_EQ(i + 1, f->number);
EXPECT_EQ(i + kUpb_FieldType_Double, f->descriptortype);
EXPECT_EQ(kUpb_FieldMode_Scalar, f->mode & kUpb_FieldMode_Mask);
EXPECT_TRUE(offsets.insert(f->offset).second);
EXPECT_TRUE(f->offset < table->size);
}
EXPECT_EQ(0, table->required_count);
}
TEST_P(MiniTableTest, AllRepeatedTypes) {
upb::Arena arena;
upb::MtDataEncoder e;
ASSERT_TRUE(e.StartMessage(0));
int count = 0;
for (int i = kUpb_FieldType_Double; i < kUpb_FieldType_SInt64; i++) {
ASSERT_TRUE(e.PutField(static_cast<upb_FieldType>(i), i,
kUpb_FieldModifier_IsRepeated));
count++;
}
upb::Status status;
upb_MiniTable* table = upb_MiniTable_Build(
e.data().data(), e.data().size(), GetParam(), arena.ptr(), status.ptr());
ASSERT_NE(nullptr, table);
EXPECT_EQ(count, table->field_count);
absl::flat_hash_set<size_t> offsets;
for (int i = 0; i < 16; i++) {
const upb_MiniTable_Field* f = &table->fields[i];
EXPECT_EQ(i + 1, f->number);
EXPECT_EQ(i + kUpb_FieldType_Double, f->descriptortype);
EXPECT_EQ(kUpb_FieldMode_Array, f->mode & kUpb_FieldMode_Mask);
EXPECT_TRUE(offsets.insert(f->offset).second);
EXPECT_TRUE(f->offset < table->size);
}
EXPECT_EQ(0, table->required_count);
}
TEST_P(MiniTableTest, Skips) {
upb::Arena arena;
upb::MtDataEncoder e;
ASSERT_TRUE(e.StartMessage(0));
int count = 0;
std::vector<int> field_numbers;
for (int i = 0; i < 25; i++) {
int field_number = 1 << i;
field_numbers.push_back(field_number);
ASSERT_TRUE(e.PutField(kUpb_FieldType_Float, field_number, 0));
count++;
}
upb::Status status;
upb_MiniTable* table = upb_MiniTable_Build(
e.data().data(), e.data().size(), GetParam(), arena.ptr(), status.ptr());
ASSERT_NE(nullptr, table);
EXPECT_EQ(count, table->field_count);
absl::flat_hash_set<size_t> offsets;
for (size_t i = 0; i < field_numbers.size(); i++) {
const upb_MiniTable_Field* f = &table->fields[i];
EXPECT_EQ(field_numbers[i], f->number);
EXPECT_EQ(kUpb_FieldType_Float, f->descriptortype);
EXPECT_EQ(kUpb_FieldMode_Scalar, f->mode & kUpb_FieldMode_Mask);
EXPECT_TRUE(offsets.insert(f->offset).second);
EXPECT_TRUE(f->offset < table->size);
}
EXPECT_EQ(0, table->required_count);
}
TEST_P(MiniTableTest, AllScalarTypesOneof) {
upb::Arena arena;
upb::MtDataEncoder e;
ASSERT_TRUE(e.StartMessage(0));
int count = 0;
for (int i = kUpb_FieldType_Double; i < kUpb_FieldType_SInt64; i++) {
ASSERT_TRUE(e.PutField(static_cast<upb_FieldType>(i), i, 0));
count++;
}
ASSERT_TRUE(e.StartOneof());
for (int i = kUpb_FieldType_Double; i < kUpb_FieldType_SInt64; i++) {
ASSERT_TRUE(e.PutOneofField(i));
}
upb::Status status;
upb_MiniTable* table = upb_MiniTable_Build(
e.data().data(), e.data().size(), GetParam(), arena.ptr(), status.ptr());
ASSERT_NE(nullptr, table) << status.error_message();
EXPECT_EQ(count, table->field_count);
absl::flat_hash_set<size_t> offsets;
for (int i = 0; i < 16; i++) {
const upb_MiniTable_Field* f = &table->fields[i];
EXPECT_EQ(i + 1, f->number);
EXPECT_EQ(i + kUpb_FieldType_Double, f->descriptortype);
EXPECT_EQ(kUpb_FieldMode_Scalar, f->mode & kUpb_FieldMode_Mask);
// For a oneof all fields have the same offset.
EXPECT_EQ(table->fields[0].offset, f->offset);
// All presence fields should point to the same oneof case offset.
size_t case_ofs = _upb_oneofcase_ofs(f);
EXPECT_EQ(table->fields[0].presence, f->presence);
EXPECT_TRUE(f->offset < table->size);
EXPECT_TRUE(case_ofs < table->size);
EXPECT_TRUE(case_ofs != f->offset);
}
EXPECT_EQ(0, table->required_count);
}
INSTANTIATE_TEST_SUITE_P(Platforms, MiniTableTest,
testing::Values(kUpb_MiniTablePlatform_32Bit,
kUpb_MiniTablePlatform_64Bit));
TEST(MiniTablePlatformIndependentTest, Base92Roundtrip) {
for (char i = 0; i < 92; i++) {
EXPECT_EQ(i, upb_FromBase92(upb_ToBase92(i)));
}
}

@ -55,7 +55,7 @@ static bool realloc_internal(upb_Message* msg, size_t need, upb_Arena* arena) {
upb_Message_Internal* in = upb_Message_Getinternal(msg);
if (!in->internal) {
/* No internal data, allocate from scratch. */
size_t size = UPB_MAX(128, _upb_Log2Ceilingsize(need + overhead));
size_t size = UPB_MAX(128, _upb_Log2CeilingSize(need + overhead));
upb_Message_InternalData* internal = upb_Arena_Malloc(arena, size);
if (!internal) return false;
internal->size = size;
@ -64,7 +64,7 @@ static bool realloc_internal(upb_Message* msg, size_t need, upb_Arena* arena) {
in->internal = internal;
} else if (in->internal->ext_begin - in->internal->unknown_end < need) {
/* Internal data is too small, reallocate. */
size_t new_size = _upb_Log2Ceilingsize(in->internal->size + need);
size_t new_size = _upb_Log2CeilingSize(in->internal->size + need);
size_t ext_bytes = in->internal->size - in->internal->ext_begin;
size_t new_ext_begin = new_size - ext_bytes;
upb_Message_InternalData* internal =
@ -310,7 +310,7 @@ bool _upb_mapsorter_pushmap(_upb_mapsorter* s, upb_FieldType key_type,
/* Grow s->entries if necessary. */
if (sorted->end > s->cap) {
s->cap = _upb_Log2Ceilingsize(sorted->end);
s->cap = _upb_Log2CeilingSize(sorted->end);
s->entries = realloc(s->entries, s->cap * sizeof(*s->entries));
if (!s->entries) return false;
}

@ -92,22 +92,16 @@ typedef enum {
kUpb_LabelFlags_IsExtension = 8,
} upb_LabelFlags;
/* Representation in the message. Derivable from descriptortype and mode, but
* fast access helps the serializer. */
// Note: we sort by this number when calculating layout order.
typedef enum {
kUpb_FieldRep_1Byte = 0,
kUpb_FieldRep_4Byte = 1,
kUpb_FieldRep_8Byte = 2,
kUpb_FieldRep_StringView = 3,
kUpb_FieldRep_StringView = 2,
kUpb_FieldRep_Pointer = 3,
kUpb_FieldRep_8Byte = 4,
#if UINTPTR_MAX == 0xffffffff
kUpb_FieldRep_Pointer = kUpb_FieldRep_4Byte,
#else
kUpb_FieldRep_Pointer = kUpb_FieldRep_8Byte,
#endif
kUpb_FieldRep_Shift =
6, /* Bit offset of the rep in upb_MiniTable_Field.mode */
kUpb_FieldRep_Shift = 5, // Bit offset of the rep in upb_MiniTable_Field.mode
kUpb_FieldRep_Max = kUpb_FieldRep_8Byte,
} upb_FieldRep;
UPB_INLINE upb_FieldMode upb_FieldMode_Get(const upb_MiniTable_Field* field) {

@ -223,8 +223,32 @@ UPB_INLINE void* upb_Arena_Malloc(upb_Arena* a, size_t size) {
return ret;
}
// Shrinks the last alloc from arena.
// REQUIRES: (ptr, oldsize) was the last malloc/realloc from this arena.
// We could also add a upb_Arena_TryShrinkLast() which is simply a no-op if
// this was not the last alloc.
UPB_INLINE void upb_Arena_ShrinkLast(upb_Arena* a, void* ptr, size_t oldsize,
size_t size) {
_upb_ArenaHead* h = (_upb_ArenaHead*)a;
oldsize = UPB_ALIGN_MALLOC(oldsize);
size = UPB_ALIGN_MALLOC(size);
UPB_ASSERT((char*)ptr + oldsize == h->ptr); // Must be the last alloc.
UPB_ASSERT(size <= oldsize);
h->ptr = (char*)ptr + size;
}
UPB_INLINE void* upb_Arena_Realloc(upb_Arena* a, void* ptr, size_t oldsize,
size_t size) {
_upb_ArenaHead* h = (_upb_ArenaHead*)a;
oldsize = UPB_ALIGN_MALLOC(oldsize);
size = UPB_ALIGN_MALLOC(size);
if (size <= oldsize) {
if ((char*)ptr + oldsize == h->ptr) {
upb_Arena_ShrinkLast(a, ptr, oldsize, size);
}
return ptr;
}
void* ret = upb_Arena_Malloc(a, size);
if (ret && oldsize > 0) {
@ -332,7 +356,7 @@ UPB_INLINE int _upb_Log2Ceiling(int x) {
#endif
}
UPB_INLINE int _upb_Log2Ceilingsize(int x) { return 1 << _upb_Log2Ceiling(x); }
UPB_INLINE int _upb_Log2CeilingSize(int x) { return 1 << _upb_Log2Ceiling(x); }
#include "upb/port_undef.inc"

Loading…
Cancel
Save