pull/13171/head
Joshua Haberman 3 years ago
parent 85e5c76cb7
commit 5dfbc684dd
  1. 19
      BUILD
  2. 6
      rename.sed
  3. 2
      run_sed.sh
  4. 16
      upb/encode.c
  5. 458
      upb/mini_table.c
  6. 103
      upb/mini_table.h
  7. 149
      upb/mini_table_test.cc
  8. 6
      upb/msg.c
  9. 3
      upb/msg.h
  10. 37
      upb/msg_internal.h
  11. 1
      upbc/BUILD

19
BUILD

@ -107,6 +107,25 @@ cc_library(
],
)
cc_library(
name = "mini_table",
srcs = ["upb/mini_table.c", "upb/internal/mini_table.h"],
hdrs = ["upb/mini_table.h"],
copts = UPB_DEFAULT_COPTS,
visibility = ["//visibility:public"],
deps = [":upb"],
)
cc_test(
name = "mini_table_test",
srcs = ["upb/mini_table_test.cc"],
deps = [
":mini_table",
"@com_google_googletest//:gtest_main",
"@com_google_absl//absl/container:flat_hash_set",
],
)
cc_library(
name = "fastdecode",
srcs = [

@ -105,9 +105,9 @@ s/upb_enumdef_containingtype/upb_EnumDef_ContainingType/g;
s/upb_enumdef_default/upb_EnumDef_Default/g;
s/upb_enumdef_valuecount/upb_EnumDef_ValueCount/g;
s/upb_enumdef_value/upb_EnumDef_Value/g;
s/upb_enumdef_lookupnamez/upb_EnumDef_FindValueByName/g;
s/upb_enumdef_lookupname/upb_EnumDef_FindValueByNameWithSize/g;
s/upb_enumdef_lookupnum/upb_EnumDef_FindValueByNumber/g;
s/upb_enumdef_ntoiz\b/upb_EnumDef_FindValueByName/g;
s/upb_enumdef_ntoi\b/upb_EnumDef_FindValueByNameWithSize/g;
s/upb_enumdef_iton\b/upb_EnumDef_FindValueByNumber/g;
s/upb_enumdef_checknum/upb_EnumDef_CheckNumber/g;
s/upb_enumdef/upb_EnumDef/g;

@ -1,5 +1,5 @@
shopt -s globstar
sed -E -i -f rename.sed **/*.c **/*.cc **/*.h **/*.hpp **/*.py
sed -E -i -f $(dirname $0)/rename.sed **/*.c **/*.cc **/*.h **/*.hpp **/*.py
# Since sed can't handle multi-line patterns:
perl -i -pe 'BEGIN{undef $/;} s/\bupb_decode\(([^,\)]+),([^,]+),([^,]+),([^,]+),([^,\)]+)\)/upb_Decode(\1, \2, \3, \4, NULL, 0, \5)/smg' **/*.c **/*.cc **/*.h **/*.hpp

@ -442,23 +442,29 @@ static bool encode_shouldencode(upb_encstate* e, const upb_Message* msg,
if (f->presence == 0) {
/* Proto3 presence or map/array. */
const void* mem = UPB_PTR_AT(msg, f->offset, void);
switch (f->mode >> upb_FieldRep_Shift) {
case upb_FieldRep_1Byte: {
switch (f->mode >> kUpb_FieldRep_Shift) {
case kUpb_FieldRep_1Byte: {
char ch;
memcpy(&ch, mem, 1);
return ch != 0;
}
case upb_FieldRep_4Byte: {
#if UINTPTR_MAX == 0xffffffff
case upb_FieldRep_Pointer:
#endif
case kUpb_FieldRep_4Byte: {
uint32_t u32;
memcpy(&u32, mem, 4);
return u32 != 0;
}
case upb_FieldRep_8Byte: {
#if UINTPTR_MAX != 0xffffffff
case kUpb_FieldRep_Pointer:
#endif
case kUpb_FieldRep_8Byte: {
uint64_t u64;
memcpy(&u64, mem, 8);
return u64 != 0;
}
case upb_FieldRep_StringView: {
case kUpb_FieldRep_StringView: {
const upb_StringView* str = (const upb_StringView*)mem;
return str->size != 0;
}

@ -0,0 +1,458 @@
/*
* Copyright (c) 2009-2021, Google LLC
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Google LLC nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT,
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "upb/mini_table.h"
#include "upb/internal/mini_table.h"
#include "upb/msg_internal.h"
#include "upb/upb.h"
// Must be last.
#include "upb/port_def.inc"
/** upb_MiniTable *************************************************************/
enum upb_EncodedType {
kUpb_EncodedType_Double = 0,
kUpb_EncodedType_Float = 1,
kUpb_EncodedType_Fixed32 = 2,
kUpb_EncodedType_Fixed64 = 3,
kUpb_EncodedType_SFixed32 = 4,
kUpb_EncodedType_SFixed64 = 5,
kUpb_EncodedType_Int32 = 6,
kUpb_EncodedType_UInt32 = 7,
kUpb_EncodedType_SInt32 = 8,
kUpb_EncodedType_Int64 = 9,
kUpb_EncodedType_UInt64 = 10,
kUpb_EncodedType_SInt64 = 11,
kUpb_EncodedType_Enum = 12,
kUpb_EncodedType_Bool = 13,
kUpb_EncodedType_Bytes = 14,
kUpb_EncodedType_String = 15,
kUpb_EncodedType_Group = 16,
kUpb_EncodedType_Message = 17,
kUpb_EncodedType_RepeatedBase = 20,
};
enum {
kUpb_EncodedValue_MinField = ' ',
kUpb_EncodedValue_MaxField = 'K',
kUpb_EncodedValue_MinModifier = 'L',
kUpb_EncodedValue_MaxModifier = '[',
kUpb_EncodedValue_End = '^',
kUpb_EncodedValue_MinSkip = '_',
kUpb_EncodedValue_MaxSkip = '~',
};
static const int8_t kUpb_FromBase92[] = {
0, 1, -1, 2, 3, 4, 5, -1, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54,
55, 56, 57, -1, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72,
73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91,
};
static const char kUpb_ToBase92[] = {
' ', '!', '#', '$', '%', '&', '(', ')', '*', '+', ',', '-', '.', '/',
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<', '=',
'>', '?', '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K',
'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y',
'Z', '[', ']', '^', '_', '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h',
'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
'w', 'x', 'y', 'z', '{', '|', '}', '~',
};
char upb_ToBase92(char ch) {
assert(0 <= ch && ch < 92);
return kUpb_ToBase92[ch];
}
char upb_FromBase92(char ch) {
if (' ' > ch || ch > '~') return -1;
return kUpb_FromBase92[ch - ' '];
}
static const char kUpb_EncodedToFieldRep[] = {
[kUpb_EncodedType_Double] = kUpb_FieldRep_8Byte,
[kUpb_EncodedType_Float] = kUpb_FieldRep_4Byte,
[kUpb_EncodedType_Int64] = kUpb_FieldRep_8Byte,
[kUpb_EncodedType_UInt64] = kUpb_FieldRep_8Byte,
[kUpb_EncodedType_Int32] = kUpb_FieldRep_4Byte,
[kUpb_EncodedType_Fixed64] = kUpb_FieldRep_8Byte,
[kUpb_EncodedType_Fixed32] = kUpb_FieldRep_4Byte,
[kUpb_EncodedType_Bool] = kUpb_FieldRep_1Byte,
[kUpb_EncodedType_String] = kUpb_FieldRep_StringView,
[kUpb_EncodedType_Group] = kUpb_FieldRep_Pointer,
[kUpb_EncodedType_Message] = kUpb_FieldRep_Pointer,
[kUpb_EncodedType_Bytes] = kUpb_FieldRep_StringView,
[kUpb_EncodedType_UInt32] = kUpb_FieldRep_4Byte,
[kUpb_EncodedType_Enum] = kUpb_FieldRep_4Byte,
[kUpb_EncodedType_SFixed32] = kUpb_FieldRep_4Byte,
[kUpb_EncodedType_SFixed64] = kUpb_FieldRep_8Byte,
[kUpb_EncodedType_SInt32] = kUpb_FieldRep_4Byte,
[kUpb_EncodedType_SInt64] = kUpb_FieldRep_8Byte,
};
static const char kUpb_EncodedToType[] = {
[kUpb_EncodedType_Double] = kUpb_FieldType_Double,
[kUpb_EncodedType_Float] = kUpb_FieldType_Float,
[kUpb_EncodedType_Int64] = kUpb_FieldType_Int64,
[kUpb_EncodedType_UInt64] = kUpb_FieldType_UInt64,
[kUpb_EncodedType_Int32] = kUpb_FieldType_Int32,
[kUpb_EncodedType_Fixed64] = kUpb_FieldType_Fixed64,
[kUpb_EncodedType_Fixed32] = kUpb_FieldType_Fixed32,
[kUpb_EncodedType_Bool] = kUpb_FieldType_Bool,
[kUpb_EncodedType_String] = kUpb_FieldType_String,
[kUpb_EncodedType_Group] = kUpb_FieldType_Group,
[kUpb_EncodedType_Message] = kUpb_FieldType_Message,
[kUpb_EncodedType_Bytes] = kUpb_FieldType_Bytes,
[kUpb_EncodedType_UInt32] = kUpb_FieldType_UInt32,
[kUpb_EncodedType_Enum] = kUpb_FieldType_Enum,
[kUpb_EncodedType_SFixed32] = kUpb_FieldType_SFixed32,
[kUpb_EncodedType_SFixed64] = kUpb_FieldType_SFixed64,
[kUpb_EncodedType_SInt32] = kUpb_FieldType_SInt32,
[kUpb_EncodedType_SInt64] = kUpb_FieldType_SInt64,
};
const upb_MiniTable_Field* upb_MiniTable_FindFieldByNumber(
const upb_MiniTable* table, uint32_t number) {
int n = table->field_count;
for (int i = 0; i < n; i++) {
if (table->fields[i].number == number) {
return &table->fields[i];
}
}
return NULL;
}
static uint32_t upb_MiniTable_DecodeVarInt(const char** ptr, const char* end,
char ch, uint8_t min, uint8_t max) {
uint32_t val = 0;
uint32_t shift = 0;
while (1) {
val |= (kUpb_FromBase92[ch] - kUpb_FromBase92[min]) << shift;
if (*ptr < end || **ptr < min || **ptr > max) return val;
ch = *(*ptr)++;
shift += _upb_Log2Ceiling(max - min);
}
}
static bool upb_MiniTable_HasSub(char type, bool is_proto2) {
return type == kUpb_EncodedType_Message || type == kUpb_EncodedType_Group ||
(type == kUpb_EncodedType_Enum && is_proto2);
}
// In each field's offset, we temporarily store a presence classifier:
enum PresenceClass {
kNoPresence = 0,
kHasbitPresence = 1,
kRequiredPresence = 2,
// Negative values refer to a specific oneof with that number.
// Positive values >=3 indicate that this field is in a oneof, and specify
// the next field in this oneof's linked list.
};
#include <stdio.h>
static bool upb_MiniTable_SetField(uint8_t ch, upb_MiniTable_Field* field,
bool is_proto2, uint32_t* sub_count) {
fprintf(stderr, "MiniTable_SetField: %d\n", (int)ch);
int8_t type = upb_FromBase92(ch);
if (ch >= kUpb_ToBase92[kUpb_EncodedType_RepeatedBase]) {
type -= kUpb_EncodedType_RepeatedBase;
fprintf(stderr, "Type1: %d\n", (int)type);
field->mode = kUpb_FieldMode_Array;
field->mode |= kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift;
field->offset = kNoPresence;
} else {
fprintf(stderr, "Type2: %d\n", (int)type);
field->mode = kUpb_FieldMode_Scalar;
field->mode |= kUpb_EncodedToFieldRep[type] << kUpb_FieldRep_Shift;
field->offset = kHasbitPresence;
}
if (type >= 18) return false;
field->descriptortype = kUpb_EncodedToType[type];
if (upb_MiniTable_HasSub(ch, is_proto2)) {
field->submsg_index = (*sub_count)++;
}
return true;
}
static bool upb_MiniTable_SetModifier(uint32_t mod, upb_MiniTable_Field* field) {
if (mod & 0x1) {
field->mode &= ~upb_LabelFlags_IsPacked;
} else {
field->mode |= upb_LabelFlags_IsPacked;
}
if (mod & 0x2) {
// Proto3 singular field.
if (field->offset != kHasbitPresence) return false;
field->offset = kNoPresence;
}
if (mod & 0x4) {
field->offset = kRequiredPresence;
}
return true;
}
static bool upb_MiniTable_PushItem(upb_LayoutItemVector* vec,
upb_LayoutItem item) {
if (vec->size == vec->capacity) {
size_t new_cap = UPB_MAX(8, vec->size * 2);
vec->data = realloc(vec->data, new_cap * sizeof(*vec->data));
if (!vec->data) return false;
vec->capacity = new_cap;
}
vec->data[vec->size++] = item;
return true;
}
static bool upb_MiniTable_PushOneof(upb_LayoutItemVector* vec,
upb_LayoutItem item) {
// Push oneof data.
item.is_case = false;
if (!upb_MiniTable_PushItem(vec, item)) return false;
// Push oneof case.
item.rep = kUpb_FieldRep_4Byte; // Field Number.
item.is_case = true;
return upb_MiniTable_PushItem(vec, item);
}
static bool upb_MiniTable_DecodeOneofs(const char** ptr, const char* end,
upb_MiniTable* ret,
upb_LayoutItemVector* vec) {
upb_LayoutItem item = {.rep = 0, .field_or_oneof = -1};
while (*ptr < end) {
char ch = *(*ptr)++;
if (ch == '|') {
// Field separator, no action needed.
} else if (ch == '~') {
// End of oneof.
if (!upb_MiniTable_PushOneof(vec, item)) return false;
item.field_or_oneof--; // Move to next oneof.
} else {
uint32_t field_num =
upb_MiniTable_DecodeVarInt(ptr, end, *(*ptr)++, 0, 63);
upb_MiniTable_Field* f =
(upb_MiniTable_Field*)upb_MiniTable_FindFieldByNumber(ret, field_num);
if (!f) return false;
// Oneof storage must be large enough to accommodate the largest member.
item.rep = UPB_MAX(item.rep, f->mode >> kUpb_FieldRep_Shift);
f->offset = item.field_or_oneof;
}
}
// Push final oneof.
return upb_MiniTable_PushOneof(vec, item);
}
#define UPB_COMPARE_INTEGERS(a, b) ((a) < (b) ? -1 : ((a) == (b) ? 0 : 1))
int upb_MiniTable_CompareFields(const void* _a, const void* _b) {
const upb_LayoutItem* a = _a;
const upb_LayoutItem* b = _b;
// Currently we just sort by:
// 1. rep (descending, so largest fields are first)
// 2. is_case (descending, so oneof cases are first)
// 2. field_number (ascending, so smallest numbers are first)
//
// The main goal of this is to reduce space lost to padding.
if (a->rep != b->rep) return UPB_COMPARE_INTEGERS(a->rep, b->rep);
if (a->is_case != b->is_case) {
return UPB_COMPARE_INTEGERS(a->is_case, b->is_case);
}
return UPB_COMPARE_INTEGERS(b->field_or_oneof, a->field_or_oneof);
}
#undef UPB_COMPARE_INTEGERS
static bool upb_MiniTable_SortLayoutItems(upb_MiniTable* table,
upb_LayoutItemVector* vec) {
// Add items for all fields that are not in a oneof.
int n = table->field_count;
for (int i = 0; i < n; i++) {
upb_MiniTable_Field* f = (upb_MiniTable_Field*)&table->fields[i];
upb_LayoutItem item = {.field_or_oneof = i,
.rep = f->mode >> kUpb_FieldRep_Shift};
if (!upb_MiniTable_PushItem(vec, item)) return false;
}
qsort(vec->data, vec->size, sizeof(*vec->data), upb_MiniTable_CompareFields);
return true;
}
void upb_MiniTable_AllocateHasbits(upb_MiniTable* ret) {
int n = ret->field_count;
int last_hasbit = 0; // 0 cannot be used.
// First assign required fields, which must have the lowest hasbits.
for (int i = 0; i < n; i++) {
upb_MiniTable_Field* field = (upb_MiniTable_Field*)&ret->fields[i];
if (field->offset == kRequiredPresence) {
field->presence = ++last_hasbit;
}
}
ret->required_count = last_hasbit;
// Next assign non-required hasbit fields.
for (int i = 0; i < n; i++) {
upb_MiniTable_Field* field = (upb_MiniTable_Field*)&ret->fields[i];
if (field->offset == kHasbitPresence) {
field->presence = ++last_hasbit;
}
}
}
upb_MiniTable* _upb_MiniTable_BuildWithoutOffsets(const char* data, size_t len,
upb_Arena* arena,
upb_LayoutItemVector* vec,
upb_Status* status) {
upb_MiniTable* ret = upb_Arena_Malloc(arena, sizeof(*ret));
// `len` is an upper bound on the number of fields. We will return what we
// don't use.
upb_MiniTable_Field* fields = upb_Arena_Malloc(arena, sizeof(*fields) * len);
if (!fields) return NULL;
ret->field_count = 0;
ret->fields = fields;
const char* ptr = data;
const char* end = data + len;
uint32_t last_field_number = 0;
uint32_t sub_count = 0;
bool is_proto2 = false; // TODO
while (ptr < end) {
char ch = *ptr++;
if (ch <= kUpb_EncodedValue_MaxField) {
// Field type.
upb_MiniTable_Field* field = &fields[ret->field_count++];
field->number = ++last_field_number;
if (!upb_MiniTable_SetField(ch, field, is_proto2, &sub_count)) {
return NULL;
}
} else if (kUpb_EncodedValue_MinModifier <= ch &&
ch <= kUpb_EncodedValue_MaxModifier) {
// Modifier.
if (ret->field_count == 0) return NULL;
uint32_t mod = upb_MiniTable_DecodeVarInt(&ptr, end, ch,
kUpb_EncodedValue_MinModifier,
kUpb_EncodedValue_MaxModifier);
upb_MiniTable_Field* field = &fields[ret->field_count - 1];
upb_MiniTable_SetModifier(mod, field);
} else if (ch == kUpb_EncodedValue_End) {
// Oneof groups.
if (!upb_MiniTable_DecodeOneofs(&ptr, end, ret, vec)) return NULL;
break;
} else if (kUpb_EncodedValue_MinSkip <= ch &&
ch <= kUpb_EncodedValue_MaxSkip) {
// Skip.
last_field_number += upb_MiniTable_DecodeVarInt(
&ptr, end, ch, kUpb_EncodedValue_MinSkip, kUpb_EncodedValue_MaxSkip);
}
}
fprintf(stderr, "Done!\n");
// Return unused memory from fields array.
upb_Arena_Realloc(arena, fields, sizeof(*fields) * len,
sizeof(*fields) * ret->field_count);
size_t subs_bytes = sizeof(*ret->subs) * sub_count;
ret->subs = upb_Arena_Malloc(arena, subs_bytes);
if (!ret->subs) return NULL;
// Initialize to zero we can test later that the user set all subs.
memset((void*)ret->subs, 0, subs_bytes);
fprintf(stderr, "Allocate?\n");
upb_MiniTable_AllocateHasbits(ret);
fprintf(stderr, "Allocate!\n");
fprintf(stderr, "Sort?\n");
if (!upb_MiniTable_SortLayoutItems(ret, vec)) return NULL;
fprintf(stderr, "Sort!\n");
return ret;
}
size_t upb_MiniTable_Place(upb_MiniTable* table, upb_FieldRep rep) {
static const size_t kRepToSize[] = {
[kUpb_FieldRep_1Byte] = 1,
[kUpb_FieldRep_4Byte] = 4,
[kUpb_FieldRep_Pointer] = sizeof(void*),
[kUpb_FieldRep_StringView] = sizeof(upb_StringView),
[kUpb_FieldRep_8Byte] = 8,
};
size_t size = kRepToSize[rep];
size_t ret = UPB_ALIGN_UP(table->size, size);
table->size = ret + size;
return ret;
}
static bool upb_MiniTable_AssignOffsets(upb_MiniTable* ret,
upb_LayoutItemVector* vec) {
int n = vec->size;
for (int i = 0; i < n; i++) {
upb_LayoutItem* item = &vec->data[i];
if (item->field_or_oneof >= 0) {
upb_MiniTable_Field* f =
(upb_MiniTable_Field*)&ret->fields[item->field_or_oneof];
f->offset = upb_MiniTable_Place(ret, item->rep);
}
}
return true;
}
upb_MiniTable* upb_MiniTable_BuildWithBuf(const char* data, size_t len,
upb_Arena* arena, void** buf,
size_t* buf_size,
upb_Status* status) {
upb_LayoutItemVector vec = {.data = *buf,
.capacity = *buf_size / sizeof(*vec.data)};
upb_MiniTable* ret =
_upb_MiniTable_BuildWithoutOffsets(data, len, arena, &vec, status);
if (!ret) goto err;
fprintf(stderr, "Assign Offsets?\n");
if (!upb_MiniTable_AssignOffsets(ret, &vec)) goto err;
fprintf(stderr, "Assign Offsets! %p\n", (void*)ret);
done:
*buf = vec.data;
*buf_size = vec.capacity / sizeof(*vec.data);
return ret;
err:
ret = NULL;
goto done;
}
upb_MiniTable* upb_MiniTable_Build(const char* data, size_t len,
upb_Arena* arena, upb_Status* status) {
void* buf = NULL;
size_t size = 0;
upb_MiniTable* ret =
upb_MiniTable_BuildWithBuf(data, len, arena, &buf, &size, status);
free(buf);
return ret;
}

@ -0,0 +1,103 @@
/*
* Copyright (c) 2009-2021, Google LLC
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Google LLC nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT,
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef UPB_MINI_TABLE_H_
#define UPB_MINI_TABLE_H_
#include "upb/msg_internal.h"
#ifdef __cplusplus
extern "C" {
#endif
const upb_MiniTable_Field* upb_MiniTable_FindFieldByNumber(
const upb_MiniTable* table, uint32_t number);
/** upb_MiniTable *************************************************************/
// Functions to encode a string in a format that can be loaded by
// upb_MiniTable_Build().
typedef enum {
kUpb_MessageModifier_DefaultIsPacked = 1,
kUpb_MessageModifier_IsMessageSet = 2,
kUpb_MessageModifier_IsExtendable = 4,
kUpb_MessageModifier_HasClosedEnums = 8,
} kUpb_MessageModifier;
typedef enum {
kUpb_FieldModifier_IsRepeated = 1,
kUpb_FieldModifier_IsPacked = 2,
} kUpb_FieldModifier;
typedef struct {
char* buf;
char* end;
// Aliased to internal-only members in .cc.
char internal[32];
} upb_MtDataEncoder;
// If the input buffer has at least this many bytes available, the encoder call
// is guaranteed to succeed (as long as field number order is maintained).
#define kUpb_MtDataEncoder_MinSize 16
// Note: For the main field list, fields *must* be in field number order.
// For the oneof field list, order doesn't matter.
char* upb_MtDataEncoder_StartMessage(upb_MtDataEncoder* e, uint64_t msg_mod);
char* upb_MtDataEncoder_PutField(upb_MtDataEncoder* e, upb_FieldType type,
uint32_t field_num, uint64_t field_mod);
char* upb_MiniTable_StartOneof(upb_MtDataEncoder* e);
char* upb_MiniTable_PutOneofField(upb_MtDataEncoder* e, uint32_t field_num);
// Builds a mini table from the data encoded in the buffer [data, len]. If any
// errors occur, returns NULL and sets a status message. In the success case,
// the caller must call upb_MiniTable_SetSub*() for all message or proto2 enum
// fields to link the table to the appropriate sub-tables.
upb_MiniTable* upb_MiniTable_Build(const char* data, size_t len,
upb_Arena* arena, upb_Status* status);
void upb_MiniTable_SetSubMessage(upb_MiniTable* table,
const upb_MiniTable_Field* field,
const upb_MiniTable* sub);
void upb_MiniTable_SetSubEnum(upb_MiniTable* table,
const upb_MiniTable_Field* field,
const upb_MiniTable_Enum* sub);
// Like upb_MiniTable_Build(), but the user provides a buffer of layout data so
// it can be reused from call to call, avoiding repeated realloc()/free().
//
// The caller owns `*buf` both before and after the call, and must free() it
// when it is no longer in use. The function will realloc() `*buf` as
// necessary, updating `*size` accordingly.
upb_MiniTable* upb_MiniTable_BuildWithBuf(const char* data, size_t len,
upb_Arena* arena, void** buf,
size_t* buf_size, upb_Status* status);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* UPB_MINI_TABLE_H_ */

@ -0,0 +1,149 @@
/*
* Copyright (c) 2009-2021, Google LLC
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Google LLC nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT,
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "upb/mini_table.h"
#include "absl/container/flat_hash_set.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "upb/msg_internal.h"
#include "upb/upb.hpp"
// We can consider putting these in a standard upb .hpp header.
static void EncodeField(upb_FieldType type, uint64_t modifiers,
std::string* str) {
char buf[16];
char* end =
upb_MiniTable_EncodeField(type, modifiers, buf, buf + sizeof(buf));
assert(end);
str->append(buf, end - buf);
}
static void EncodeSkip(uint32_t skip, std::string* str) {
char buf[16];
char* end = upb_MiniTable_EncodeSkip(skip, buf, buf + sizeof(buf));
assert(end);
str->append(buf, end - buf);
}
static void StartOneofs(std::string* str) {
char buf[16];
char* end = upb_MiniTable_StartOneofs(buf, buf + sizeof(buf));
assert(end);
str->append(buf, end - buf);
}
static void EncodeOneofField(uint32_t field_num, std::string* str) {
char buf[16];
char* end =
upb_MiniTable_EncodeOneofField(field_num, buf, buf + sizeof(buf));
assert(end);
str->append(buf, end - buf);
}
static void EncodeOneofFieldSeparator(std::string* str) {
char buf[16];
char* end = upb_MiniTable_EncodeOneofFieldSeparator(buf, buf + sizeof(buf));
assert(end);
str->append(buf, end - buf);
}
static void EncodeOneofSeparator(std::string* str) {
char buf[16];
char* end = upb_MiniTable_EncodeOneofSeparator(buf, buf + sizeof(buf));
assert(end);
str->append(buf, end - buf);
}
TEST(MiniTable, Empty) {
upb::Arena arena;
upb_MiniTable* table = upb_MiniTable_Build(NULL, 0, arena.ptr());
ASSERT_NE(nullptr, table);
EXPECT_EQ(0, table->field_count);
EXPECT_EQ(0, table->required_count);
}
TEST(MiniTable, AllScalarTypes) {
upb::Arena arena;
std::string input;
for (int i = kUpb_FieldType_Double ; i < kUpb_FieldType_SInt64; i++) {
EncodeField(i, &input);
}
fprintf(stderr, "YO: %s\n", input.c_str());
upb::Status status;
upb_MiniTable* table = upb_MiniTable_Build(input.data(), input.size(),
arena.ptr(), status.ptr());
ASSERT_NE(nullptr, table);
EXPECT_EQ(16, table->field_count);
absl::flat_hash_set<size_t> offsets;
for (int i = 0; i < 16; i++) {
const upb_MiniTable_Field* f = &table->fields[i];
EXPECT_EQ(i + 1, f->number);
EXPECT_EQ(kUpb_FieldMode_Scalar, f->mode & kUpb_FieldMode_Mask);
EXPECT_TRUE(offsets.insert(f->offset).second);
EXPECT_TRUE(f->offset < table->size);
}
EXPECT_EQ(0, table->required_count);
}
TEST(MiniTable, AllRepeatedTypes) {
upb::Arena arena;
std::string input;
const size_t base = kUpb_EncodedType_RepeatedBase;
input.push_back(upb_ToBase92(base + kUpb_EncodedType_Double));
input.push_back(upb_ToBase92(base + kUpb_EncodedType_Float));
input.push_back(upb_ToBase92(base + kUpb_EncodedType_Fixed32));
input.push_back(upb_ToBase92(base + kUpb_EncodedType_Fixed64));
input.push_back(upb_ToBase92(base + kUpb_EncodedType_SFixed32));
input.push_back(upb_ToBase92(base + kUpb_EncodedType_SFixed64));
input.push_back(upb_ToBase92(base + kUpb_EncodedType_Int32));
input.push_back(upb_ToBase92(base + kUpb_EncodedType_UInt32));
input.push_back(upb_ToBase92(base + kUpb_EncodedType_SInt32));
input.push_back(upb_ToBase92(base + kUpb_EncodedType_Int64));
input.push_back(upb_ToBase92(base + kUpb_EncodedType_UInt64));
input.push_back(upb_ToBase92(base + kUpb_EncodedType_SInt64));
input.push_back(upb_ToBase92(base + kUpb_EncodedType_Enum));
input.push_back(upb_ToBase92(base + kUpb_EncodedType_Bool));
input.push_back(upb_ToBase92(base + kUpb_EncodedType_Bytes));
input.push_back(upb_ToBase92(base + kUpb_EncodedType_String));
upb_MiniTable* table = upb_MiniTable_Build(input.data(), input.size(), arena.ptr());
ASSERT_NE(nullptr, table);
EXPECT_EQ(16, table->field_count);
absl::flat_hash_set<size_t> offsets;
for (int i = 0; i < 16; i++) {
const upb_MiniTable_Field* f = &table->fields[i];
EXPECT_EQ(i + 1, f->number);
EXPECT_EQ(kUpb_FieldMode_Array, f->mode & kUpb_FieldMode_Mask);
EXPECT_TRUE(offsets.insert(f->offset).second);
EXPECT_TRUE(f->offset < table->size);
}
EXPECT_EQ(0, table->required_count);
}
TEST(MiniTable, Skips) {
}

@ -31,8 +31,7 @@
#include "upb/port_def.inc"
#include "upb/table_internal.h"
/** upb_Message
* *******************************************************************/
/** upb_Message ***************************************************************/
static const size_t overhead = sizeof(upb_Message_InternalData);
@ -368,8 +367,7 @@ bool _upb_mapsorter_pushmap(_upb_mapsorter* s, upb_FieldType key_type,
return true;
}
/** upb_ExtensionRegistry
* ****************************************************************/
/** upb_ExtensionRegistry *****************************************************/
struct upb_ExtensionRegistry {
upb_Arena* arena;

@ -44,8 +44,7 @@
extern "C" {
#endif
/** upb_Message
* *******************************************************************/
/** upb_Message ***************************************************************/
typedef void upb_Message;

@ -63,7 +63,7 @@ typedef struct {
uint16_t submsg_index; // undefined if descriptortype != MESSAGE/GROUP/ENUM
uint8_t descriptortype;
uint8_t mode; /* upb_FieldMode | upb_LabelFlags |
(upb_FieldRep << upb_FieldRep_Shift) */
(upb_FieldRep << kUpb_FieldRep_Shift) */
} upb_MiniTable_Field;
typedef enum {
@ -82,21 +82,14 @@ enum upb_LabelFlags {
/* Representation in the message. Derivable from descriptortype and mode, but
* fast access helps the serializer. */
enum upb_FieldRep {
upb_FieldRep_1Byte = 0,
upb_FieldRep_4Byte = 1,
upb_FieldRep_8Byte = 2,
upb_FieldRep_StringView = 3,
#if UINTPTR_MAX == 0xffffffff
upb_FieldRep_Pointer = upb_FieldRep_4Byte,
#else
upb_FieldRep_Pointer = upb_FieldRep_8Byte,
#endif
upb_FieldRep_Shift =
6, /* Bit offset of the rep in upb_MiniTable_Field.mode */
};
typedef enum {
kUpb_FieldRep_1Byte = 0,
kUpb_FieldRep_4Byte = 1,
kUpb_FieldRep_Pointer = 2,
kUpb_FieldRep_StringView = 3,
kUpb_FieldRep_8Byte = 4,
kUpb_FieldRep_Shift = 5, // Bit offset of the rep in upb_MiniTable_Field.mode
} upb_FieldRep;
UPB_INLINE upb_FieldMode upb_FieldMode_Get(const upb_MiniTable_Field* field) {
return (upb_FieldMode)(field->mode & 3);
@ -213,8 +206,7 @@ UPB_INLINE uint64_t upb_MiniTable_requiredmask(const upb_MiniTable* l) {
return ((1ULL << n) - 1) << 1;
}
/** upb_ExtensionRegistry
* ****************************************************************/
/** upb_ExtensionRegistry *****************************************************/
/* Adds the given extension info for message type |l| and field number |num|
* into the registry. Returns false if this message type and field number were
@ -229,8 +221,7 @@ const upb_MiniTable_Extension* _upb_extreg_get(const upb_ExtensionRegistry* r,
const upb_MiniTable* l,
uint32_t num);
/** upb_Message
* *******************************************************************/
/** upb_Message ***************************************************************/
/* Internal members of a upb_Message that track unknown fields and/or
* extensions. We can change this without breaking binary compatibility. We put
@ -303,8 +294,7 @@ void _upb_Message_DiscardUnknown_shallow(upb_Message* msg);
bool _upb_Message_AddUnknown(upb_Message* msg, const char* data, size_t len,
upb_Arena* arena);
/** upb_Message_Extension
* ***************************************************************/
/** upb_Message_Extension *****************************************************/
/* The internal representation of an extension is self-describing: it contains
* enough information that we can serialize it to binary format without needing
@ -761,8 +751,7 @@ UPB_INLINE void _upb_msg_map_set_value(void* msg, const void* val,
}
}
/** _upb_mapsorter
* *************************************************************/
/** _upb_mapsorter ************************************************************/
/* _upb_mapsorter sorts maps and provides ordered iteration over the entries.
* Since maps can be recursive (map values can be messages which contain other

@ -52,6 +52,7 @@ cc_binary(
visibility = ["//visibility:public"],
deps = [
":common",
"//:mini_table",
"@com_google_absl//absl/base:core_headers",
"@com_google_absl//absl/container:flat_hash_map",
"@com_google_absl//absl/container:flat_hash_set",

Loading…
Cancel
Save