From 5dfbc684dd3873c1ba670e15c445a1e87bb0f7ba Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Mon, 24 Jan 2022 11:20:08 -0800 Subject: [PATCH] WIP. --- BUILD | 19 ++ rename.sed | 6 +- run_sed.sh | 2 +- upb/encode.c | 16 +- upb/mini_table.c | 458 +++++++++++++++++++++++++++++++++++++++++ upb/mini_table.h | 103 +++++++++ upb/mini_table_test.cc | 149 ++++++++++++++ upb/msg.c | 6 +- upb/msg.h | 3 +- upb/msg_internal.h | 37 ++-- upbc/BUILD | 1 + 11 files changed, 761 insertions(+), 39 deletions(-) create mode 100644 upb/mini_table.c create mode 100644 upb/mini_table.h create mode 100644 upb/mini_table_test.cc diff --git a/BUILD b/BUILD index 2625740310..ddfef3e351 100644 --- a/BUILD +++ b/BUILD @@ -107,6 +107,25 @@ cc_library( ], ) +cc_library( + name = "mini_table", + srcs = ["upb/mini_table.c", "upb/internal/mini_table.h"], + hdrs = ["upb/mini_table.h"], + copts = UPB_DEFAULT_COPTS, + visibility = ["//visibility:public"], + deps = [":upb"], +) + +cc_test( + name = "mini_table_test", + srcs = ["upb/mini_table_test.cc"], + deps = [ + ":mini_table", + "@com_google_googletest//:gtest_main", + "@com_google_absl//absl/container:flat_hash_set", + ], +) + cc_library( name = "fastdecode", srcs = [ diff --git a/rename.sed b/rename.sed index 771bf74ae2..815c62b497 100644 --- a/rename.sed +++ b/rename.sed @@ -105,9 +105,9 @@ s/upb_enumdef_containingtype/upb_EnumDef_ContainingType/g; s/upb_enumdef_default/upb_EnumDef_Default/g; s/upb_enumdef_valuecount/upb_EnumDef_ValueCount/g; s/upb_enumdef_value/upb_EnumDef_Value/g; -s/upb_enumdef_lookupnamez/upb_EnumDef_FindValueByName/g; -s/upb_enumdef_lookupname/upb_EnumDef_FindValueByNameWithSize/g; -s/upb_enumdef_lookupnum/upb_EnumDef_FindValueByNumber/g; +s/upb_enumdef_ntoiz\b/upb_EnumDef_FindValueByName/g; +s/upb_enumdef_ntoi\b/upb_EnumDef_FindValueByNameWithSize/g; +s/upb_enumdef_iton\b/upb_EnumDef_FindValueByNumber/g; s/upb_enumdef_checknum/upb_EnumDef_CheckNumber/g; s/upb_enumdef/upb_EnumDef/g; diff --git a/run_sed.sh b/run_sed.sh index ecf872b022..25f3ce9912 100644 --- a/run_sed.sh +++ b/run_sed.sh @@ -1,5 +1,5 @@ shopt -s globstar -sed -E -i -f rename.sed **/*.c **/*.cc **/*.h **/*.hpp **/*.py +sed -E -i -f $(dirname $0)/rename.sed **/*.c **/*.cc **/*.h **/*.hpp **/*.py # Since sed can't handle multi-line patterns: perl -i -pe 'BEGIN{undef $/;} s/\bupb_decode\(([^,\)]+),([^,]+),([^,]+),([^,]+),([^,\)]+)\)/upb_Decode(\1, \2, \3, \4, NULL, 0, \5)/smg' **/*.c **/*.cc **/*.h **/*.hpp diff --git a/upb/encode.c b/upb/encode.c index f03124d230..d800df84b5 100644 --- a/upb/encode.c +++ b/upb/encode.c @@ -442,23 +442,29 @@ static bool encode_shouldencode(upb_encstate* e, const upb_Message* msg, if (f->presence == 0) { /* Proto3 presence or map/array. */ const void* mem = UPB_PTR_AT(msg, f->offset, void); - switch (f->mode >> upb_FieldRep_Shift) { - case upb_FieldRep_1Byte: { + switch (f->mode >> kUpb_FieldRep_Shift) { + case kUpb_FieldRep_1Byte: { char ch; memcpy(&ch, mem, 1); return ch != 0; } - case upb_FieldRep_4Byte: { +#if UINTPTR_MAX == 0xffffffff + case upb_FieldRep_Pointer: +#endif + case kUpb_FieldRep_4Byte: { uint32_t u32; memcpy(&u32, mem, 4); return u32 != 0; } - case upb_FieldRep_8Byte: { +#if UINTPTR_MAX != 0xffffffff + case kUpb_FieldRep_Pointer: +#endif + case kUpb_FieldRep_8Byte: { uint64_t u64; memcpy(&u64, mem, 8); return u64 != 0; } - case upb_FieldRep_StringView: { + case kUpb_FieldRep_StringView: { const upb_StringView* str = (const upb_StringView*)mem; return str->size != 0; } diff --git a/upb/mini_table.c b/upb/mini_table.c new file mode 100644 index 0000000000..2c13b1a9bf --- /dev/null +++ b/upb/mini_table.c @@ -0,0 +1,458 @@ +/* + * Copyright (c) 2009-2021, Google LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Google LLC nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "upb/mini_table.h" + +#include "upb/internal/mini_table.h" +#include "upb/msg_internal.h" +#include "upb/upb.h" + +// Must be last. +#include "upb/port_def.inc" + +/** upb_MiniTable *************************************************************/ + +enum upb_EncodedType { + kUpb_EncodedType_Double = 0, + kUpb_EncodedType_Float = 1, + kUpb_EncodedType_Fixed32 = 2, + kUpb_EncodedType_Fixed64 = 3, + kUpb_EncodedType_SFixed32 = 4, + kUpb_EncodedType_SFixed64 = 5, + kUpb_EncodedType_Int32 = 6, + kUpb_EncodedType_UInt32 = 7, + kUpb_EncodedType_SInt32 = 8, + kUpb_EncodedType_Int64 = 9, + kUpb_EncodedType_UInt64 = 10, + kUpb_EncodedType_SInt64 = 11, + kUpb_EncodedType_Enum = 12, + kUpb_EncodedType_Bool = 13, + kUpb_EncodedType_Bytes = 14, + kUpb_EncodedType_String = 15, + kUpb_EncodedType_Group = 16, + kUpb_EncodedType_Message = 17, + + kUpb_EncodedType_RepeatedBase = 20, +}; + +enum { + kUpb_EncodedValue_MinField = ' ', + kUpb_EncodedValue_MaxField = 'K', + kUpb_EncodedValue_MinModifier = 'L', + kUpb_EncodedValue_MaxModifier = '[', + kUpb_EncodedValue_End = '^', + kUpb_EncodedValue_MinSkip = '_', + kUpb_EncodedValue_MaxSkip = '~', +}; + +static const int8_t kUpb_FromBase92[] = { + 0, 1, -1, 2, 3, 4, 5, -1, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, + 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, + 55, 56, 57, -1, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, + 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, +}; + +static const char kUpb_ToBase92[] = { + ' ', '!', '#', '$', '%', '&', '(', ')', '*', '+', ',', '-', '.', '/', + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<', '=', + '>', '?', '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', + 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', + 'Z', '[', ']', '^', '_', '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', + 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', + 'w', 'x', 'y', 'z', '{', '|', '}', '~', +}; + +char upb_ToBase92(char ch) { + assert(0 <= ch && ch < 92); + return kUpb_ToBase92[ch]; +} + +char upb_FromBase92(char ch) { + if (' ' > ch || ch > '~') return -1; + return kUpb_FromBase92[ch - ' ']; +} + +static const char kUpb_EncodedToFieldRep[] = { + [kUpb_EncodedType_Double] = kUpb_FieldRep_8Byte, + [kUpb_EncodedType_Float] = kUpb_FieldRep_4Byte, + [kUpb_EncodedType_Int64] = kUpb_FieldRep_8Byte, + [kUpb_EncodedType_UInt64] = kUpb_FieldRep_8Byte, + [kUpb_EncodedType_Int32] = kUpb_FieldRep_4Byte, + [kUpb_EncodedType_Fixed64] = kUpb_FieldRep_8Byte, + [kUpb_EncodedType_Fixed32] = kUpb_FieldRep_4Byte, + [kUpb_EncodedType_Bool] = kUpb_FieldRep_1Byte, + [kUpb_EncodedType_String] = kUpb_FieldRep_StringView, + [kUpb_EncodedType_Group] = kUpb_FieldRep_Pointer, + [kUpb_EncodedType_Message] = kUpb_FieldRep_Pointer, + [kUpb_EncodedType_Bytes] = kUpb_FieldRep_StringView, + [kUpb_EncodedType_UInt32] = kUpb_FieldRep_4Byte, + [kUpb_EncodedType_Enum] = kUpb_FieldRep_4Byte, + [kUpb_EncodedType_SFixed32] = kUpb_FieldRep_4Byte, + [kUpb_EncodedType_SFixed64] = kUpb_FieldRep_8Byte, + [kUpb_EncodedType_SInt32] = kUpb_FieldRep_4Byte, + [kUpb_EncodedType_SInt64] = kUpb_FieldRep_8Byte, +}; + +static const char kUpb_EncodedToType[] = { + [kUpb_EncodedType_Double] = kUpb_FieldType_Double, + [kUpb_EncodedType_Float] = kUpb_FieldType_Float, + [kUpb_EncodedType_Int64] = kUpb_FieldType_Int64, + [kUpb_EncodedType_UInt64] = kUpb_FieldType_UInt64, + [kUpb_EncodedType_Int32] = kUpb_FieldType_Int32, + [kUpb_EncodedType_Fixed64] = kUpb_FieldType_Fixed64, + [kUpb_EncodedType_Fixed32] = kUpb_FieldType_Fixed32, + [kUpb_EncodedType_Bool] = kUpb_FieldType_Bool, + [kUpb_EncodedType_String] = kUpb_FieldType_String, + [kUpb_EncodedType_Group] = kUpb_FieldType_Group, + [kUpb_EncodedType_Message] = kUpb_FieldType_Message, + [kUpb_EncodedType_Bytes] = kUpb_FieldType_Bytes, + [kUpb_EncodedType_UInt32] = kUpb_FieldType_UInt32, + [kUpb_EncodedType_Enum] = kUpb_FieldType_Enum, + [kUpb_EncodedType_SFixed32] = kUpb_FieldType_SFixed32, + [kUpb_EncodedType_SFixed64] = kUpb_FieldType_SFixed64, + [kUpb_EncodedType_SInt32] = kUpb_FieldType_SInt32, + [kUpb_EncodedType_SInt64] = kUpb_FieldType_SInt64, +}; + +const upb_MiniTable_Field* upb_MiniTable_FindFieldByNumber( + const upb_MiniTable* table, uint32_t number) { + int n = table->field_count; + for (int i = 0; i < n; i++) { + if (table->fields[i].number == number) { + return &table->fields[i]; + } + } + return NULL; +} + +static uint32_t upb_MiniTable_DecodeVarInt(const char** ptr, const char* end, + char ch, uint8_t min, uint8_t max) { + uint32_t val = 0; + uint32_t shift = 0; + while (1) { + val |= (kUpb_FromBase92[ch] - kUpb_FromBase92[min]) << shift; + if (*ptr < end || **ptr < min || **ptr > max) return val; + ch = *(*ptr)++; + shift += _upb_Log2Ceiling(max - min); + } +} + +static bool upb_MiniTable_HasSub(char type, bool is_proto2) { + return type == kUpb_EncodedType_Message || type == kUpb_EncodedType_Group || + (type == kUpb_EncodedType_Enum && is_proto2); +} + +// In each field's offset, we temporarily store a presence classifier: +enum PresenceClass { + kNoPresence = 0, + kHasbitPresence = 1, + kRequiredPresence = 2, + // Negative values refer to a specific oneof with that number. + // Positive values >=3 indicate that this field is in a oneof, and specify + // the next field in this oneof's linked list. +}; + +#include +static bool upb_MiniTable_SetField(uint8_t ch, upb_MiniTable_Field* field, + bool is_proto2, uint32_t* sub_count) { + fprintf(stderr, "MiniTable_SetField: %d\n", (int)ch); + int8_t type = upb_FromBase92(ch); + if (ch >= kUpb_ToBase92[kUpb_EncodedType_RepeatedBase]) { + type -= kUpb_EncodedType_RepeatedBase; + fprintf(stderr, "Type1: %d\n", (int)type); + field->mode = kUpb_FieldMode_Array; + field->mode |= kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift; + field->offset = kNoPresence; + } else { + fprintf(stderr, "Type2: %d\n", (int)type); + field->mode = kUpb_FieldMode_Scalar; + field->mode |= kUpb_EncodedToFieldRep[type] << kUpb_FieldRep_Shift; + field->offset = kHasbitPresence; + } + if (type >= 18) return false; + field->descriptortype = kUpb_EncodedToType[type]; + if (upb_MiniTable_HasSub(ch, is_proto2)) { + field->submsg_index = (*sub_count)++; + } + return true; +} + +static bool upb_MiniTable_SetModifier(uint32_t mod, upb_MiniTable_Field* field) { + if (mod & 0x1) { + field->mode &= ~upb_LabelFlags_IsPacked; + } else { + field->mode |= upb_LabelFlags_IsPacked; + } + if (mod & 0x2) { + // Proto3 singular field. + if (field->offset != kHasbitPresence) return false; + field->offset = kNoPresence; + } + if (mod & 0x4) { + field->offset = kRequiredPresence; + } + return true; +} + +static bool upb_MiniTable_PushItem(upb_LayoutItemVector* vec, + upb_LayoutItem item) { + if (vec->size == vec->capacity) { + size_t new_cap = UPB_MAX(8, vec->size * 2); + vec->data = realloc(vec->data, new_cap * sizeof(*vec->data)); + if (!vec->data) return false; + vec->capacity = new_cap; + } + vec->data[vec->size++] = item; + return true; +} + +static bool upb_MiniTable_PushOneof(upb_LayoutItemVector* vec, + upb_LayoutItem item) { + // Push oneof data. + item.is_case = false; + if (!upb_MiniTable_PushItem(vec, item)) return false; + + // Push oneof case. + item.rep = kUpb_FieldRep_4Byte; // Field Number. + item.is_case = true; + return upb_MiniTable_PushItem(vec, item); +} + +static bool upb_MiniTable_DecodeOneofs(const char** ptr, const char* end, + upb_MiniTable* ret, + upb_LayoutItemVector* vec) { + upb_LayoutItem item = {.rep = 0, .field_or_oneof = -1}; + while (*ptr < end) { + char ch = *(*ptr)++; + if (ch == '|') { + // Field separator, no action needed. + } else if (ch == '~') { + // End of oneof. + if (!upb_MiniTable_PushOneof(vec, item)) return false; + item.field_or_oneof--; // Move to next oneof. + } else { + uint32_t field_num = + upb_MiniTable_DecodeVarInt(ptr, end, *(*ptr)++, 0, 63); + upb_MiniTable_Field* f = + (upb_MiniTable_Field*)upb_MiniTable_FindFieldByNumber(ret, field_num); + if (!f) return false; + // Oneof storage must be large enough to accommodate the largest member. + item.rep = UPB_MAX(item.rep, f->mode >> kUpb_FieldRep_Shift); + f->offset = item.field_or_oneof; + } + } + + // Push final oneof. + return upb_MiniTable_PushOneof(vec, item); +} + +#define UPB_COMPARE_INTEGERS(a, b) ((a) < (b) ? -1 : ((a) == (b) ? 0 : 1)) + +int upb_MiniTable_CompareFields(const void* _a, const void* _b) { + const upb_LayoutItem* a = _a; + const upb_LayoutItem* b = _b; + // Currently we just sort by: + // 1. rep (descending, so largest fields are first) + // 2. is_case (descending, so oneof cases are first) + // 2. field_number (ascending, so smallest numbers are first) + // + // The main goal of this is to reduce space lost to padding. + if (a->rep != b->rep) return UPB_COMPARE_INTEGERS(a->rep, b->rep); + if (a->is_case != b->is_case) { + return UPB_COMPARE_INTEGERS(a->is_case, b->is_case); + } + return UPB_COMPARE_INTEGERS(b->field_or_oneof, a->field_or_oneof); +} + +#undef UPB_COMPARE_INTEGERS + +static bool upb_MiniTable_SortLayoutItems(upb_MiniTable* table, + upb_LayoutItemVector* vec) { + // Add items for all fields that are not in a oneof. + int n = table->field_count; + for (int i = 0; i < n; i++) { + upb_MiniTable_Field* f = (upb_MiniTable_Field*)&table->fields[i]; + upb_LayoutItem item = {.field_or_oneof = i, + .rep = f->mode >> kUpb_FieldRep_Shift}; + if (!upb_MiniTable_PushItem(vec, item)) return false; + } + + qsort(vec->data, vec->size, sizeof(*vec->data), upb_MiniTable_CompareFields); + + return true; +} + +void upb_MiniTable_AllocateHasbits(upb_MiniTable* ret) { + int n = ret->field_count; + int last_hasbit = 0; // 0 cannot be used. + + // First assign required fields, which must have the lowest hasbits. + for (int i = 0; i < n; i++) { + upb_MiniTable_Field* field = (upb_MiniTable_Field*)&ret->fields[i]; + if (field->offset == kRequiredPresence) { + field->presence = ++last_hasbit; + } + } + ret->required_count = last_hasbit; + + // Next assign non-required hasbit fields. + for (int i = 0; i < n; i++) { + upb_MiniTable_Field* field = (upb_MiniTable_Field*)&ret->fields[i]; + if (field->offset == kHasbitPresence) { + field->presence = ++last_hasbit; + } + } +} + +upb_MiniTable* _upb_MiniTable_BuildWithoutOffsets(const char* data, size_t len, + upb_Arena* arena, + upb_LayoutItemVector* vec, + upb_Status* status) { + upb_MiniTable* ret = upb_Arena_Malloc(arena, sizeof(*ret)); + // `len` is an upper bound on the number of fields. We will return what we + // don't use. + upb_MiniTable_Field* fields = upb_Arena_Malloc(arena, sizeof(*fields) * len); + if (!fields) return NULL; + ret->field_count = 0; + ret->fields = fields; + + const char* ptr = data; + const char* end = data + len; + uint32_t last_field_number = 0; + uint32_t sub_count = 0; + bool is_proto2 = false; // TODO + + while (ptr < end) { + char ch = *ptr++; + if (ch <= kUpb_EncodedValue_MaxField) { + // Field type. + upb_MiniTable_Field* field = &fields[ret->field_count++]; + field->number = ++last_field_number; + if (!upb_MiniTable_SetField(ch, field, is_proto2, &sub_count)) { + return NULL; + } + } else if (kUpb_EncodedValue_MinModifier <= ch && + ch <= kUpb_EncodedValue_MaxModifier) { + // Modifier. + if (ret->field_count == 0) return NULL; + uint32_t mod = upb_MiniTable_DecodeVarInt(&ptr, end, ch, + kUpb_EncodedValue_MinModifier, + kUpb_EncodedValue_MaxModifier); + upb_MiniTable_Field* field = &fields[ret->field_count - 1]; + upb_MiniTable_SetModifier(mod, field); + } else if (ch == kUpb_EncodedValue_End) { + // Oneof groups. + if (!upb_MiniTable_DecodeOneofs(&ptr, end, ret, vec)) return NULL; + break; + } else if (kUpb_EncodedValue_MinSkip <= ch && + ch <= kUpb_EncodedValue_MaxSkip) { + // Skip. + last_field_number += upb_MiniTable_DecodeVarInt( + &ptr, end, ch, kUpb_EncodedValue_MinSkip, kUpb_EncodedValue_MaxSkip); + } + } + fprintf(stderr, "Done!\n"); + + // Return unused memory from fields array. + upb_Arena_Realloc(arena, fields, sizeof(*fields) * len, + sizeof(*fields) * ret->field_count); + + size_t subs_bytes = sizeof(*ret->subs) * sub_count; + ret->subs = upb_Arena_Malloc(arena, subs_bytes); + if (!ret->subs) return NULL; + // Initialize to zero we can test later that the user set all subs. + memset((void*)ret->subs, 0, subs_bytes); + + fprintf(stderr, "Allocate?\n"); + upb_MiniTable_AllocateHasbits(ret); + fprintf(stderr, "Allocate!\n"); + fprintf(stderr, "Sort?\n"); + if (!upb_MiniTable_SortLayoutItems(ret, vec)) return NULL; + fprintf(stderr, "Sort!\n"); + return ret; +} + +size_t upb_MiniTable_Place(upb_MiniTable* table, upb_FieldRep rep) { + static const size_t kRepToSize[] = { + [kUpb_FieldRep_1Byte] = 1, + [kUpb_FieldRep_4Byte] = 4, + [kUpb_FieldRep_Pointer] = sizeof(void*), + [kUpb_FieldRep_StringView] = sizeof(upb_StringView), + [kUpb_FieldRep_8Byte] = 8, + }; + size_t size = kRepToSize[rep]; + size_t ret = UPB_ALIGN_UP(table->size, size); + table->size = ret + size; + return ret; +} + +static bool upb_MiniTable_AssignOffsets(upb_MiniTable* ret, + upb_LayoutItemVector* vec) { + int n = vec->size; + for (int i = 0; i < n; i++) { + upb_LayoutItem* item = &vec->data[i]; + if (item->field_or_oneof >= 0) { + upb_MiniTable_Field* f = + (upb_MiniTable_Field*)&ret->fields[item->field_or_oneof]; + f->offset = upb_MiniTable_Place(ret, item->rep); + } + } + return true; +} + +upb_MiniTable* upb_MiniTable_BuildWithBuf(const char* data, size_t len, + upb_Arena* arena, void** buf, + size_t* buf_size, + upb_Status* status) { + upb_LayoutItemVector vec = {.data = *buf, + .capacity = *buf_size / sizeof(*vec.data)}; + + upb_MiniTable* ret = + _upb_MiniTable_BuildWithoutOffsets(data, len, arena, &vec, status); + if (!ret) goto err; + fprintf(stderr, "Assign Offsets?\n"); + if (!upb_MiniTable_AssignOffsets(ret, &vec)) goto err; + fprintf(stderr, "Assign Offsets! %p\n", (void*)ret); +done: + *buf = vec.data; + *buf_size = vec.capacity / sizeof(*vec.data); + return ret; +err: + ret = NULL; + goto done; +} + +upb_MiniTable* upb_MiniTable_Build(const char* data, size_t len, + upb_Arena* arena, upb_Status* status) { + void* buf = NULL; + size_t size = 0; + upb_MiniTable* ret = + upb_MiniTable_BuildWithBuf(data, len, arena, &buf, &size, status); + free(buf); + return ret; +} diff --git a/upb/mini_table.h b/upb/mini_table.h new file mode 100644 index 0000000000..e35f26fc68 --- /dev/null +++ b/upb/mini_table.h @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2009-2021, Google LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Google LLC nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef UPB_MINI_TABLE_H_ +#define UPB_MINI_TABLE_H_ + +#include "upb/msg_internal.h" + +#ifdef __cplusplus +extern "C" { +#endif + +const upb_MiniTable_Field* upb_MiniTable_FindFieldByNumber( + const upb_MiniTable* table, uint32_t number); + +/** upb_MiniTable *************************************************************/ + +// Functions to encode a string in a format that can be loaded by +// upb_MiniTable_Build(). + +typedef enum { + kUpb_MessageModifier_DefaultIsPacked = 1, + kUpb_MessageModifier_IsMessageSet = 2, + kUpb_MessageModifier_IsExtendable = 4, + kUpb_MessageModifier_HasClosedEnums = 8, +} kUpb_MessageModifier; + +typedef enum { + kUpb_FieldModifier_IsRepeated = 1, + kUpb_FieldModifier_IsPacked = 2, +} kUpb_FieldModifier; + +typedef struct { + char* buf; + char* end; + // Aliased to internal-only members in .cc. + char internal[32]; +} upb_MtDataEncoder; + +// If the input buffer has at least this many bytes available, the encoder call +// is guaranteed to succeed (as long as field number order is maintained). +#define kUpb_MtDataEncoder_MinSize 16 + +// Note: For the main field list, fields *must* be in field number order. +// For the oneof field list, order doesn't matter. +char* upb_MtDataEncoder_StartMessage(upb_MtDataEncoder* e, uint64_t msg_mod); +char* upb_MtDataEncoder_PutField(upb_MtDataEncoder* e, upb_FieldType type, + uint32_t field_num, uint64_t field_mod); +char* upb_MiniTable_StartOneof(upb_MtDataEncoder* e); +char* upb_MiniTable_PutOneofField(upb_MtDataEncoder* e, uint32_t field_num); + +// Builds a mini table from the data encoded in the buffer [data, len]. If any +// errors occur, returns NULL and sets a status message. In the success case, +// the caller must call upb_MiniTable_SetSub*() for all message or proto2 enum +// fields to link the table to the appropriate sub-tables. +upb_MiniTable* upb_MiniTable_Build(const char* data, size_t len, + upb_Arena* arena, upb_Status* status); +void upb_MiniTable_SetSubMessage(upb_MiniTable* table, + const upb_MiniTable_Field* field, + const upb_MiniTable* sub); +void upb_MiniTable_SetSubEnum(upb_MiniTable* table, + const upb_MiniTable_Field* field, + const upb_MiniTable_Enum* sub); + +// Like upb_MiniTable_Build(), but the user provides a buffer of layout data so +// it can be reused from call to call, avoiding repeated realloc()/free(). +// +// The caller owns `*buf` both before and after the call, and must free() it +// when it is no longer in use. The function will realloc() `*buf` as +// necessary, updating `*size` accordingly. +upb_MiniTable* upb_MiniTable_BuildWithBuf(const char* data, size_t len, + upb_Arena* arena, void** buf, + size_t* buf_size, upb_Status* status); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* UPB_MINI_TABLE_H_ */ diff --git a/upb/mini_table_test.cc b/upb/mini_table_test.cc new file mode 100644 index 0000000000..d4497446c0 --- /dev/null +++ b/upb/mini_table_test.cc @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2009-2021, Google LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Google LLC nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "upb/mini_table.h" + +#include "absl/container/flat_hash_set.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "upb/msg_internal.h" +#include "upb/upb.hpp" + +// We can consider putting these in a standard upb .hpp header. + +static void EncodeField(upb_FieldType type, uint64_t modifiers, + std::string* str) { + char buf[16]; + char* end = + upb_MiniTable_EncodeField(type, modifiers, buf, buf + sizeof(buf)); + assert(end); + str->append(buf, end - buf); +} + +static void EncodeSkip(uint32_t skip, std::string* str) { + char buf[16]; + char* end = upb_MiniTable_EncodeSkip(skip, buf, buf + sizeof(buf)); + assert(end); + str->append(buf, end - buf); +} + +static void StartOneofs(std::string* str) { + char buf[16]; + char* end = upb_MiniTable_StartOneofs(buf, buf + sizeof(buf)); + assert(end); + str->append(buf, end - buf); +} + +static void EncodeOneofField(uint32_t field_num, std::string* str) { + char buf[16]; + char* end = + upb_MiniTable_EncodeOneofField(field_num, buf, buf + sizeof(buf)); + assert(end); + str->append(buf, end - buf); +} + +static void EncodeOneofFieldSeparator(std::string* str) { + char buf[16]; + char* end = upb_MiniTable_EncodeOneofFieldSeparator(buf, buf + sizeof(buf)); + assert(end); + str->append(buf, end - buf); +} + +static void EncodeOneofSeparator(std::string* str) { + char buf[16]; + char* end = upb_MiniTable_EncodeOneofSeparator(buf, buf + sizeof(buf)); + assert(end); + str->append(buf, end - buf); +} + +TEST(MiniTable, Empty) { + upb::Arena arena; + upb_MiniTable* table = upb_MiniTable_Build(NULL, 0, arena.ptr()); + ASSERT_NE(nullptr, table); + EXPECT_EQ(0, table->field_count); + EXPECT_EQ(0, table->required_count); +} + +TEST(MiniTable, AllScalarTypes) { + upb::Arena arena; + std::string input; + for (int i = kUpb_FieldType_Double ; i < kUpb_FieldType_SInt64; i++) { + EncodeField(i, &input); + } + fprintf(stderr, "YO: %s\n", input.c_str()); + upb::Status status; + upb_MiniTable* table = upb_MiniTable_Build(input.data(), input.size(), + arena.ptr(), status.ptr()); + ASSERT_NE(nullptr, table); + EXPECT_EQ(16, table->field_count); + absl::flat_hash_set offsets; + for (int i = 0; i < 16; i++) { + const upb_MiniTable_Field* f = &table->fields[i]; + EXPECT_EQ(i + 1, f->number); + EXPECT_EQ(kUpb_FieldMode_Scalar, f->mode & kUpb_FieldMode_Mask); + EXPECT_TRUE(offsets.insert(f->offset).second); + EXPECT_TRUE(f->offset < table->size); + } + EXPECT_EQ(0, table->required_count); +} + +TEST(MiniTable, AllRepeatedTypes) { + upb::Arena arena; + std::string input; + const size_t base = kUpb_EncodedType_RepeatedBase; + input.push_back(upb_ToBase92(base + kUpb_EncodedType_Double)); + input.push_back(upb_ToBase92(base + kUpb_EncodedType_Float)); + input.push_back(upb_ToBase92(base + kUpb_EncodedType_Fixed32)); + input.push_back(upb_ToBase92(base + kUpb_EncodedType_Fixed64)); + input.push_back(upb_ToBase92(base + kUpb_EncodedType_SFixed32)); + input.push_back(upb_ToBase92(base + kUpb_EncodedType_SFixed64)); + input.push_back(upb_ToBase92(base + kUpb_EncodedType_Int32)); + input.push_back(upb_ToBase92(base + kUpb_EncodedType_UInt32)); + input.push_back(upb_ToBase92(base + kUpb_EncodedType_SInt32)); + input.push_back(upb_ToBase92(base + kUpb_EncodedType_Int64)); + input.push_back(upb_ToBase92(base + kUpb_EncodedType_UInt64)); + input.push_back(upb_ToBase92(base + kUpb_EncodedType_SInt64)); + input.push_back(upb_ToBase92(base + kUpb_EncodedType_Enum)); + input.push_back(upb_ToBase92(base + kUpb_EncodedType_Bool)); + input.push_back(upb_ToBase92(base + kUpb_EncodedType_Bytes)); + input.push_back(upb_ToBase92(base + kUpb_EncodedType_String)); + upb_MiniTable* table = upb_MiniTable_Build(input.data(), input.size(), arena.ptr()); + ASSERT_NE(nullptr, table); + EXPECT_EQ(16, table->field_count); + absl::flat_hash_set offsets; + for (int i = 0; i < 16; i++) { + const upb_MiniTable_Field* f = &table->fields[i]; + EXPECT_EQ(i + 1, f->number); + EXPECT_EQ(kUpb_FieldMode_Array, f->mode & kUpb_FieldMode_Mask); + EXPECT_TRUE(offsets.insert(f->offset).second); + EXPECT_TRUE(f->offset < table->size); + } + EXPECT_EQ(0, table->required_count); +} + +TEST(MiniTable, Skips) { +} diff --git a/upb/msg.c b/upb/msg.c index 3734fc5491..60e98b95b4 100644 --- a/upb/msg.c +++ b/upb/msg.c @@ -31,8 +31,7 @@ #include "upb/port_def.inc" #include "upb/table_internal.h" -/** upb_Message - * *******************************************************************/ +/** upb_Message ***************************************************************/ static const size_t overhead = sizeof(upb_Message_InternalData); @@ -368,8 +367,7 @@ bool _upb_mapsorter_pushmap(_upb_mapsorter* s, upb_FieldType key_type, return true; } -/** upb_ExtensionRegistry - * ****************************************************************/ +/** upb_ExtensionRegistry *****************************************************/ struct upb_ExtensionRegistry { upb_Arena* arena; diff --git a/upb/msg.h b/upb/msg.h index 99928393ae..c984b137b1 100644 --- a/upb/msg.h +++ b/upb/msg.h @@ -44,8 +44,7 @@ extern "C" { #endif -/** upb_Message - * *******************************************************************/ +/** upb_Message ***************************************************************/ typedef void upb_Message; diff --git a/upb/msg_internal.h b/upb/msg_internal.h index 88c17108f1..213b690a44 100644 --- a/upb/msg_internal.h +++ b/upb/msg_internal.h @@ -63,7 +63,7 @@ typedef struct { uint16_t submsg_index; // undefined if descriptortype != MESSAGE/GROUP/ENUM uint8_t descriptortype; uint8_t mode; /* upb_FieldMode | upb_LabelFlags | - (upb_FieldRep << upb_FieldRep_Shift) */ + (upb_FieldRep << kUpb_FieldRep_Shift) */ } upb_MiniTable_Field; typedef enum { @@ -82,21 +82,14 @@ enum upb_LabelFlags { /* Representation in the message. Derivable from descriptortype and mode, but * fast access helps the serializer. */ -enum upb_FieldRep { - upb_FieldRep_1Byte = 0, - upb_FieldRep_4Byte = 1, - upb_FieldRep_8Byte = 2, - upb_FieldRep_StringView = 3, - -#if UINTPTR_MAX == 0xffffffff - upb_FieldRep_Pointer = upb_FieldRep_4Byte, -#else - upb_FieldRep_Pointer = upb_FieldRep_8Byte, -#endif - - upb_FieldRep_Shift = - 6, /* Bit offset of the rep in upb_MiniTable_Field.mode */ -}; +typedef enum { + kUpb_FieldRep_1Byte = 0, + kUpb_FieldRep_4Byte = 1, + kUpb_FieldRep_Pointer = 2, + kUpb_FieldRep_StringView = 3, + kUpb_FieldRep_8Byte = 4, + kUpb_FieldRep_Shift = 5, // Bit offset of the rep in upb_MiniTable_Field.mode +} upb_FieldRep; UPB_INLINE upb_FieldMode upb_FieldMode_Get(const upb_MiniTable_Field* field) { return (upb_FieldMode)(field->mode & 3); @@ -213,8 +206,7 @@ UPB_INLINE uint64_t upb_MiniTable_requiredmask(const upb_MiniTable* l) { return ((1ULL << n) - 1) << 1; } -/** upb_ExtensionRegistry - * ****************************************************************/ +/** upb_ExtensionRegistry *****************************************************/ /* Adds the given extension info for message type |l| and field number |num| * into the registry. Returns false if this message type and field number were @@ -229,8 +221,7 @@ const upb_MiniTable_Extension* _upb_extreg_get(const upb_ExtensionRegistry* r, const upb_MiniTable* l, uint32_t num); -/** upb_Message - * *******************************************************************/ +/** upb_Message ***************************************************************/ /* Internal members of a upb_Message that track unknown fields and/or * extensions. We can change this without breaking binary compatibility. We put @@ -303,8 +294,7 @@ void _upb_Message_DiscardUnknown_shallow(upb_Message* msg); bool _upb_Message_AddUnknown(upb_Message* msg, const char* data, size_t len, upb_Arena* arena); -/** upb_Message_Extension - * ***************************************************************/ +/** upb_Message_Extension *****************************************************/ /* The internal representation of an extension is self-describing: it contains * enough information that we can serialize it to binary format without needing @@ -761,8 +751,7 @@ UPB_INLINE void _upb_msg_map_set_value(void* msg, const void* val, } } -/** _upb_mapsorter - * *************************************************************/ +/** _upb_mapsorter ************************************************************/ /* _upb_mapsorter sorts maps and provides ordered iteration over the entries. * Since maps can be recursive (map values can be messages which contain other diff --git a/upbc/BUILD b/upbc/BUILD index 8fd69901cb..1fd5443bf6 100644 --- a/upbc/BUILD +++ b/upbc/BUILD @@ -52,6 +52,7 @@ cc_binary( visibility = ["//visibility:public"], deps = [ ":common", + "//:mini_table", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/container:flat_hash_set",