From 8059da24d2096224915150f8fadc0f53e52ba5ff Mon Sep 17 00:00:00 2001 From: Adam Cozzette Date: Wed, 16 Aug 2023 07:57:14 -0700 Subject: [PATCH] Start adding upb test runs to CI for protobuf repo This change moves the upb Fastbuild, Optimized, and FastTable test runs over to the protobuf repo CI in preparation for moving the upb codebase itself. There are a bunch more test runs to move, but this initial change handles the easy ones first. I also updated our pinned upb version to the current head to pick up some recent fixes. PiperOrigin-RevId: 557486174 --- .github/workflows/test_runner.yml | 8 + .github/workflows/test_upb.yml | 41 + build_defs/upb.patch | 11 - php/ext/google/protobuf/php-upb.c | 13794 ++++++++++++------------ php/ext/google/protobuf/php-upb.h | 890 +- protobuf_deps.bzl | 5 +- ruby/ext/google/protobuf_c/ruby-upb.c | 13794 ++++++++++++------------ ruby/ext/google/protobuf_c/ruby-upb.h | 826 +- 8 files changed, 14713 insertions(+), 14656 deletions(-) create mode 100644 .github/workflows/test_upb.yml delete mode 100644 build_defs/upb.patch diff --git a/.github/workflows/test_runner.yml b/.github/workflows/test_runner.yml index 27cb54e5b7..3b328a89e3 100644 --- a/.github/workflows/test_runner.yml +++ b/.github/workflows/test_runner.yml @@ -176,6 +176,14 @@ jobs: safe-checkout: ${{ needs.check-tag.outputs.checkout-sha }} secrets: inherit + upb: + name: μpb + needs: [check-tag] + uses: ./.github/workflows/test_upb.yml + with: + safe-checkout: ${{ needs.check-tag.outputs.checkout-sha }} + secrets: inherit + staleness: name: Staleness needs: [check-tag] diff --git a/.github/workflows/test_upb.yml b/.github/workflows/test_upb.yml new file mode 100644 index 0000000000..d942e5add3 --- /dev/null +++ b/.github/workflows/test_upb.yml @@ -0,0 +1,41 @@ +name: μpb Tests + +on: + workflow_call: + inputs: + safe-checkout: + required: true + description: "The SHA key for the commit we want to run over" + type: string + +permissions: + contents: read + +jobs: + linux: + strategy: + fail-fast: false # Don't cancel all jobs if one fails. + matrix: + config: + - { name: "Fastbuild" } + - { name: "Optimized", flags: "-c opt" } + - { name: "FastTable", flags: "--@upb//:fasttable_enabled=true" } + include: + # Set defaults + - image: us-docker.pkg.dev/protobuf-build/containers/test/linux/sanitize@sha256:04cd765285bc52cbbf51d66c8c66d8603579cf0f19cc42df26b09d2c270541fb + - targets: "@upb//..." + name: ${{ matrix.config.name }} + runs-on: ubuntu-latest + + steps: + - name: Checkout pending changes + uses: actions/checkout@ac593985615ec2ede58e132d2e21d2b1cbd6127c # v3.3.0 + with: + ref: ${{ inputs.safe-checkout }} + - name: Run tests + uses: protocolbuffers/protobuf-ci/bazel-docker@v2 + with: + image: ${{ matrix.image }} + credentials: ${{ secrets.GAR_SERVICE_ACCOUNT }} + bazel-cache: upb-bazel + bazel: test --cxxopt=-std=c++17 --host_cxxopt=-std=c++17 ${{ matrix.targets }} ${{ matrix.config.flags }} diff --git a/build_defs/upb.patch b/build_defs/upb.patch deleted file mode 100644 index c3ed912698..0000000000 --- a/build_defs/upb.patch +++ /dev/null @@ -1,11 +0,0 @@ ---- upbc/bootstrap_compiler.bzl -+++ upbc/bootstrap_compiler.bzl -@@ -20,7 +20,7 @@ _upbc_base = "//upbc:protoc-gen-upb" - - # begin:github_only - _is_google3 = False --_extra_proto_path = "-Iexternal/com_google_protobuf/src " -+_extra_proto_path = "-Isrc " - # end:github_only - - def _upbc(stage): diff --git a/php/ext/google/protobuf/php-upb.c b/php/ext/google/protobuf/php-upb.c index 9e5a82e1e6..8e4ede8362 100644 --- a/php/ext/google/protobuf/php-upb.c +++ b/php/ext/google/protobuf/php-upb.c @@ -373,8 +373,8 @@ void upb_Status_VAppendErrorFormat(upb_Status* status, const char* fmt, status->msg[_kUpb_Status_MaxMessage - 1] = '\0'; } -#include +#include // Must be last. @@ -609,6 +609,7 @@ upb_Map* _upb_Map_New(upb_Arena* a, size_t key_size, size_t value_size) { } + // Must be last. static void _upb_mapsorter_getkeys(const void* _a, const void* _b, void* a_key, @@ -6332,6164 +6333,5381 @@ size_t upb_Message_ExtensionCount(const upb_Message* msg) { return count; } -#include // Must be last. -/* The upb core does not generally have a concept of default instances. However - * for descriptor options we make an exception since the max size is known and - * modest (<200 bytes). All types can share a default instance since it is - * initialized to zeroes. - * - * We have to allocate an extra pointer for upb's internal metadata. */ -static const char opt_default_buf[_UPB_MAXOPT_SIZE + sizeof(void*)] = {0}; -const char* kUpbDefOptDefault = &opt_default_buf[sizeof(void*)]; - -const char* _upb_DefBuilder_FullToShort(const char* fullname) { - const char* p; - - if (fullname == NULL) { - return NULL; - } else if ((p = strrchr(fullname, '.')) == NULL) { - /* No '.' in the name, return the full string. */ - return fullname; - } else { - /* Return one past the last '.'. */ - return p + 1; - } -} - -void _upb_DefBuilder_FailJmp(upb_DefBuilder* ctx) { UPB_LONGJMP(ctx->err, 1); } - -void _upb_DefBuilder_Errf(upb_DefBuilder* ctx, const char* fmt, ...) { - va_list argp; - va_start(argp, fmt); - upb_Status_VSetErrorFormat(ctx->status, fmt, argp); - va_end(argp); - _upb_DefBuilder_FailJmp(ctx); -} +typedef struct { + upb_MdDecoder base; + upb_Arena* arena; + upb_MiniTableEnum* enum_table; + uint32_t enum_value_count; + uint32_t enum_data_count; + uint32_t enum_data_capacity; +} upb_MdEnumDecoder; -void _upb_DefBuilder_OomErr(upb_DefBuilder* ctx) { - upb_Status_SetErrorMessage(ctx->status, "out of memory"); - _upb_DefBuilder_FailJmp(ctx); +static size_t upb_MiniTableEnum_Size(size_t count) { + return sizeof(upb_MiniTableEnum) + count * sizeof(uint32_t); } -// Verify a relative identifier string. The loop is branchless for speed. -static void _upb_DefBuilder_CheckIdentNotFull(upb_DefBuilder* ctx, - upb_StringView name) { - bool good = name.size > 0; - - for (size_t i = 0; i < name.size; i++) { - const char c = name.data[i]; - const char d = c | 0x20; // force lowercase - const bool is_alpha = (('a' <= d) & (d <= 'z')) | (c == '_'); - const bool is_numer = ('0' <= c) & (c <= '9') & (i != 0); - - good &= is_alpha | is_numer; +static upb_MiniTableEnum* _upb_MiniTable_AddEnumDataMember(upb_MdEnumDecoder* d, + uint32_t val) { + if (d->enum_data_count == d->enum_data_capacity) { + size_t old_sz = upb_MiniTableEnum_Size(d->enum_data_capacity); + d->enum_data_capacity = UPB_MAX(2, d->enum_data_capacity * 2); + size_t new_sz = upb_MiniTableEnum_Size(d->enum_data_capacity); + d->enum_table = upb_Arena_Realloc(d->arena, d->enum_table, old_sz, new_sz); + upb_MdDecoder_CheckOutOfMemory(&d->base, d->enum_table); } - - if (!good) _upb_DefBuilder_CheckIdentSlow(ctx, name, false); + d->enum_table->data[d->enum_data_count++] = val; + return d->enum_table; } -const char* _upb_DefBuilder_MakeFullName(upb_DefBuilder* ctx, - const char* prefix, - upb_StringView name) { - _upb_DefBuilder_CheckIdentNotFull(ctx, name); - if (prefix) { - // ret = prefix + '.' + name; - size_t n = strlen(prefix); - char* ret = _upb_DefBuilder_Alloc(ctx, n + name.size + 2); - strcpy(ret, prefix); - ret[n] = '.'; - memcpy(&ret[n + 1], name.data, name.size); - ret[n + 1 + name.size] = '\0'; - return ret; +static void upb_MiniTableEnum_BuildValue(upb_MdEnumDecoder* d, uint32_t val) { + upb_MiniTableEnum* table = d->enum_table; + d->enum_value_count++; + if (table->value_count || (val > 512 && d->enum_value_count < val / 32)) { + if (table->value_count == 0) { + assert(d->enum_data_count == table->mask_limit / 32); + } + table = _upb_MiniTable_AddEnumDataMember(d, val); + table->value_count++; } else { - char* ret = upb_strdup2(name.data, name.size, ctx->arena); - if (!ret) _upb_DefBuilder_OomErr(ctx); - return ret; + uint32_t new_mask_limit = ((val / 32) + 1) * 32; + while (table->mask_limit < new_mask_limit) { + table = _upb_MiniTable_AddEnumDataMember(d, 0); + table->mask_limit += 32; + } + table->data[val / 32] |= 1ULL << (val % 32); } } -static bool remove_component(char* base, size_t* len) { - if (*len == 0) return false; - - for (size_t i = *len - 1; i > 0; i--) { - if (base[i] == '.') { - *len = i; - return true; +static upb_MiniTableEnum* upb_MtDecoder_DoBuildMiniTableEnum( + upb_MdEnumDecoder* d, const char* data, size_t len) { + // If the string is non-empty then it must begin with a version tag. + if (len) { + if (*data != kUpb_EncodedVersion_EnumV1) { + upb_MdDecoder_ErrorJmp(&d->base, "Invalid enum version: %c", *data); } + data++; + len--; } - *len = 0; - return true; -} + upb_MdDecoder_CheckOutOfMemory(&d->base, d->enum_table); -const void* _upb_DefBuilder_ResolveAny(upb_DefBuilder* ctx, - const char* from_name_dbg, - const char* base, upb_StringView sym, - upb_deftype_t* type) { - if (sym.size == 0) goto notfound; - upb_value v; - if (sym.data[0] == '.') { - /* Symbols starting with '.' are absolute, so we do a single lookup. - * Slice to omit the leading '.' */ - if (!_upb_DefPool_LookupSym(ctx->symtab, sym.data + 1, sym.size - 1, &v)) { - goto notfound; - } - } else { - /* Remove components from base until we find an entry or run out. */ - size_t baselen = base ? strlen(base) : 0; - char* tmp = malloc(sym.size + baselen + 1); - while (1) { - char* p = tmp; - if (baselen) { - memcpy(p, base, baselen); - p[baselen] = '.'; - p += baselen + 1; - } - memcpy(p, sym.data, sym.size); - p += sym.size; - if (_upb_DefPool_LookupSym(ctx->symtab, tmp, p - tmp, &v)) { - break; - } - if (!remove_component(tmp, &baselen)) { - free(tmp); - goto notfound; + // Guarantee at least 64 bits of mask without checking mask size. + d->enum_table->mask_limit = 64; + d->enum_table = _upb_MiniTable_AddEnumDataMember(d, 0); + d->enum_table = _upb_MiniTable_AddEnumDataMember(d, 0); + + d->enum_table->value_count = 0; + + const char* ptr = data; + uint32_t base = 0; + + while (ptr < d->base.end) { + char ch = *ptr++; + if (ch <= kUpb_EncodedValue_MaxEnumMask) { + uint32_t mask = _upb_FromBase92(ch); + for (int i = 0; i < 5; i++, base++, mask >>= 1) { + if (mask & 1) upb_MiniTableEnum_BuildValue(d, base); } + } else if (kUpb_EncodedValue_MinSkip <= ch && + ch <= kUpb_EncodedValue_MaxSkip) { + uint32_t skip; + ptr = upb_MdDecoder_DecodeBase92Varint(&d->base, ptr, ch, + kUpb_EncodedValue_MinSkip, + kUpb_EncodedValue_MaxSkip, &skip); + base += skip; + } else { + upb_MdDecoder_ErrorJmp(&d->base, "Unexpected character: %c", ch); } - free(tmp); } - *type = _upb_DefType_Type(v); - return _upb_DefType_Unpack(v, *type); - -notfound: - _upb_DefBuilder_Errf(ctx, "couldn't resolve name '" UPB_STRINGVIEW_FORMAT "'", - UPB_STRINGVIEW_ARGS(sym)); + return d->enum_table; } -const void* _upb_DefBuilder_Resolve(upb_DefBuilder* ctx, - const char* from_name_dbg, const char* base, - upb_StringView sym, upb_deftype_t type) { - upb_deftype_t found_type; - const void* ret = - _upb_DefBuilder_ResolveAny(ctx, from_name_dbg, base, sym, &found_type); - if (ret && found_type != type) { - _upb_DefBuilder_Errf(ctx, - "type mismatch when resolving %s: couldn't find " - "name " UPB_STRINGVIEW_FORMAT " with type=%d", - from_name_dbg, UPB_STRINGVIEW_ARGS(sym), (int)type); - } - return ret; +static upb_MiniTableEnum* upb_MtDecoder_BuildMiniTableEnum( + upb_MdEnumDecoder* const decoder, const char* const data, size_t const len) { + if (UPB_SETJMP(decoder->base.err) != 0) return NULL; + return upb_MtDecoder_DoBuildMiniTableEnum(decoder, data, len); } -// Per ASCII this will lower-case a letter. If the result is a letter, the -// input was definitely a letter. If the output is not a letter, this may -// have transformed the character unpredictably. -static char upb_ascii_lower(char ch) { return ch | 0x20; } +upb_MiniTableEnum* upb_MiniDescriptor_BuildEnum(const char* data, size_t len, + upb_Arena* arena, + upb_Status* status) { + upb_MdEnumDecoder decoder = { + .base = + { + .end = UPB_PTRADD(data, len), + .status = status, + }, + .arena = arena, + .enum_table = upb_Arena_Malloc(arena, upb_MiniTableEnum_Size(2)), + .enum_value_count = 0, + .enum_data_count = 0, + .enum_data_capacity = 1, + }; -// isalpha() etc. from are locale-dependent, which we don't want. -static bool upb_isbetween(uint8_t c, uint8_t low, uint8_t high) { - return low <= c && c <= high; + return upb_MtDecoder_BuildMiniTableEnum(&decoder, data, len); } -static bool upb_isletter(char c) { - char lower = upb_ascii_lower(c); - return upb_isbetween(lower, 'a', 'z') || c == '_'; -} -static bool upb_isalphanum(char c) { - return upb_isletter(c) || upb_isbetween(c, '0', '9'); -} +#include +#include -static bool TryGetChar(const char** src, const char* end, char* ch) { - if (*src == end) return false; - *ch = **src; - *src += 1; - return true; -} -static int TryGetHexDigit(const char** src, const char* end) { - char ch; - if (!TryGetChar(src, end, &ch)) return -1; - if ('0' <= ch && ch <= '9') { - return ch - '0'; - } - ch = upb_ascii_lower(ch); - if ('a' <= ch && ch <= 'f') { - return ch - 'a' + 0xa; - } - *src -= 1; // Char wasn't actually a hex digit. - return -1; -} +// Must be last. -static char upb_DefBuilder_ParseHexEscape(upb_DefBuilder* ctx, - const upb_FieldDef* f, - const char** src, const char* end) { - int hex_digit = TryGetHexDigit(src, end); - if (hex_digit < 0) { - _upb_DefBuilder_Errf( - ctx, "\\x must be followed by at least one hex digit (field='%s')", - upb_FieldDef_FullName(f)); - return 0; - } - unsigned int ret = hex_digit; - while ((hex_digit = TryGetHexDigit(src, end)) >= 0) { - ret = (ret << 4) | hex_digit; - } - if (ret > 0xff) { - _upb_DefBuilder_Errf(ctx, "Value of hex escape in field %s exceeds 8 bits", - upb_FieldDef_FullName(f)); - return 0; - } - return ret; -} - -static char TryGetOctalDigit(const char** src, const char* end) { - char ch; - if (!TryGetChar(src, end, &ch)) return -1; - if ('0' <= ch && ch <= '7') { - return ch - '0'; - } - *src -= 1; // Char wasn't actually an octal digit. - return -1; -} - -static char upb_DefBuilder_ParseOctalEscape(upb_DefBuilder* ctx, - const upb_FieldDef* f, - const char** src, const char* end) { - char ch = 0; - for (int i = 0; i < 3; i++) { - char digit; - if ((digit = TryGetOctalDigit(src, end)) >= 0) { - ch = (ch << 3) | digit; - } - } - return ch; -} - -char _upb_DefBuilder_ParseEscape(upb_DefBuilder* ctx, const upb_FieldDef* f, - const char** src, const char* end) { - char ch; - if (!TryGetChar(src, end, &ch)) { - _upb_DefBuilder_Errf(ctx, "unterminated escape sequence in field %s", - upb_FieldDef_FullName(f)); - return 0; - } - switch (ch) { - case 'a': - return '\a'; - case 'b': - return '\b'; - case 'f': - return '\f'; - case 'n': - return '\n'; - case 'r': - return '\r'; - case 't': - return '\t'; - case 'v': - return '\v'; - case '\\': - return '\\'; - case '\'': - return '\''; - case '\"': - return '\"'; - case '?': - return '\?'; - case 'x': - case 'X': - return upb_DefBuilder_ParseHexEscape(ctx, f, src, end); - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - *src -= 1; - return upb_DefBuilder_ParseOctalEscape(ctx, f, src, end); - } - _upb_DefBuilder_Errf(ctx, "Unknown escape sequence: \\%c", ch); -} +// Note: we sort by this number when calculating layout order. +typedef enum { + kUpb_LayoutItemType_OneofCase, // Oneof case. + kUpb_LayoutItemType_OneofField, // Oneof field data. + kUpb_LayoutItemType_Field, // Non-oneof field data. -void _upb_DefBuilder_CheckIdentSlow(upb_DefBuilder* ctx, upb_StringView name, - bool full) { - const char* str = name.data; - const size_t len = name.size; - bool start = true; - for (size_t i = 0; i < len; i++) { - const char c = str[i]; - if (c == '.') { - if (start || !full) { - _upb_DefBuilder_Errf( - ctx, "invalid name: unexpected '.' (" UPB_STRINGVIEW_FORMAT ")", - UPB_STRINGVIEW_ARGS(name)); - } - start = true; - } else if (start) { - if (!upb_isletter(c)) { - _upb_DefBuilder_Errf(ctx, - "invalid name: path components must start with a " - "letter (" UPB_STRINGVIEW_FORMAT ")", - UPB_STRINGVIEW_ARGS(name)); - } - start = false; - } else if (!upb_isalphanum(c)) { - _upb_DefBuilder_Errf( - ctx, - "invalid name: non-alphanumeric character (" UPB_STRINGVIEW_FORMAT - ")", - UPB_STRINGVIEW_ARGS(name)); - } - } - if (start) { - _upb_DefBuilder_Errf(ctx, - "invalid name: empty part (" UPB_STRINGVIEW_FORMAT ")", - UPB_STRINGVIEW_ARGS(name)); - } + kUpb_LayoutItemType_Max = kUpb_LayoutItemType_Field, +} upb_LayoutItemType; - // We should never reach this point. - UPB_ASSERT(false); -} +#define kUpb_LayoutItem_IndexSentinel ((uint16_t)-1) +typedef struct { + // Index of the corresponding field. When this is a oneof field, the field's + // offset will be the index of the next field in a linked list. + uint16_t field_index; + uint16_t offset; + upb_FieldRep rep; + upb_LayoutItemType type; +} upb_LayoutItem; -// Must be last. +typedef struct { + upb_LayoutItem* data; + size_t size; + size_t capacity; +} upb_LayoutItemVector; -struct upb_DefPool { - upb_Arena* arena; - upb_strtable syms; // full_name -> packed def ptr - upb_strtable files; // file_name -> (upb_FileDef*) - upb_inttable exts; // (upb_MiniTableExtension*) -> (upb_FieldDef*) - upb_ExtensionRegistry* extreg; +typedef struct { + upb_MdDecoder base; + upb_MiniTable* table; + upb_MiniTableField* fields; upb_MiniTablePlatform platform; - void* scratch_data; - size_t scratch_size; - size_t bytes_loaded; + upb_LayoutItemVector vec; + upb_Arena* arena; +} upb_MtDecoder; + +// In each field's offset, we temporarily store a presence classifier: +enum PresenceClass { + kNoPresence = 0, + kHasbitPresence = 1, + kRequiredPresence = 2, + kOneofBase = 3, + // Negative values refer to a specific oneof with that number. Positive + // values >= kOneofBase indicate that this field is in a oneof, and specify + // the next field in this oneof's linked list. }; -void upb_DefPool_Free(upb_DefPool* s) { - upb_Arena_Free(s->arena); - upb_gfree(s->scratch_data); - upb_gfree(s); +static bool upb_MtDecoder_FieldIsPackable(upb_MiniTableField* field) { + return (field->mode & kUpb_FieldMode_Array) && + upb_FieldType_IsPackable(field->UPB_PRIVATE(descriptortype)); } -upb_DefPool* upb_DefPool_New(void) { - upb_DefPool* s = upb_gmalloc(sizeof(*s)); - if (!s) return NULL; - - s->arena = upb_Arena_New(); - s->bytes_loaded = 0; - - s->scratch_size = 240; - s->scratch_data = upb_gmalloc(s->scratch_size); - if (!s->scratch_data) goto err; - - if (!upb_strtable_init(&s->syms, 32, s->arena)) goto err; - if (!upb_strtable_init(&s->files, 4, s->arena)) goto err; - if (!upb_inttable_init(&s->exts, s->arena)) goto err; - - s->extreg = upb_ExtensionRegistry_New(s->arena); - if (!s->extreg) goto err; +typedef struct { + uint16_t submsg_count; + uint16_t subenum_count; +} upb_SubCounts; - s->platform = kUpb_MiniTablePlatform_Native; +static void upb_MiniTable_SetTypeAndSub(upb_MiniTableField* field, + upb_FieldType type, + upb_SubCounts* sub_counts, + uint64_t msg_modifiers, + bool is_proto3_enum) { + if (is_proto3_enum) { + UPB_ASSERT(type == kUpb_FieldType_Enum); + type = kUpb_FieldType_Int32; + field->mode |= kUpb_LabelFlags_IsAlternate; + } else if (type == kUpb_FieldType_String && + !(msg_modifiers & kUpb_MessageModifier_ValidateUtf8)) { + type = kUpb_FieldType_Bytes; + field->mode |= kUpb_LabelFlags_IsAlternate; + } - return s; + field->UPB_PRIVATE(descriptortype) = type; -err: - upb_DefPool_Free(s); - return NULL; -} + if (upb_MtDecoder_FieldIsPackable(field) && + (msg_modifiers & kUpb_MessageModifier_DefaultIsPacked)) { + field->mode |= kUpb_LabelFlags_IsPacked; + } -bool _upb_DefPool_InsertExt(upb_DefPool* s, const upb_MiniTableExtension* ext, - const upb_FieldDef* f) { - return upb_inttable_insert(&s->exts, (uintptr_t)ext, upb_value_constptr(f), - s->arena); + if (type == kUpb_FieldType_Message || type == kUpb_FieldType_Group) { + field->UPB_PRIVATE(submsg_index) = sub_counts->submsg_count++; + } else if (type == kUpb_FieldType_Enum) { + // We will need to update this later once we know the total number of + // submsg fields. + field->UPB_PRIVATE(submsg_index) = sub_counts->subenum_count++; + } else { + field->UPB_PRIVATE(submsg_index) = kUpb_NoSub; + } } -bool _upb_DefPool_InsertSym(upb_DefPool* s, upb_StringView sym, upb_value v, - upb_Status* status) { - // TODO: table should support an operation "tryinsert" to avoid the double - // lookup. - if (upb_strtable_lookup2(&s->syms, sym.data, sym.size, NULL)) { - upb_Status_SetErrorFormat(status, "duplicate symbol '%s'", sym.data); - return false; - } - if (!upb_strtable_insert(&s->syms, sym.data, sym.size, v, s->arena)) { - upb_Status_SetErrorMessage(status, "out of memory"); - return false; - } - return true; -} +static const char kUpb_EncodedToType[] = { + [kUpb_EncodedType_Double] = kUpb_FieldType_Double, + [kUpb_EncodedType_Float] = kUpb_FieldType_Float, + [kUpb_EncodedType_Int64] = kUpb_FieldType_Int64, + [kUpb_EncodedType_UInt64] = kUpb_FieldType_UInt64, + [kUpb_EncodedType_Int32] = kUpb_FieldType_Int32, + [kUpb_EncodedType_Fixed64] = kUpb_FieldType_Fixed64, + [kUpb_EncodedType_Fixed32] = kUpb_FieldType_Fixed32, + [kUpb_EncodedType_Bool] = kUpb_FieldType_Bool, + [kUpb_EncodedType_String] = kUpb_FieldType_String, + [kUpb_EncodedType_Group] = kUpb_FieldType_Group, + [kUpb_EncodedType_Message] = kUpb_FieldType_Message, + [kUpb_EncodedType_Bytes] = kUpb_FieldType_Bytes, + [kUpb_EncodedType_UInt32] = kUpb_FieldType_UInt32, + [kUpb_EncodedType_OpenEnum] = kUpb_FieldType_Enum, + [kUpb_EncodedType_SFixed32] = kUpb_FieldType_SFixed32, + [kUpb_EncodedType_SFixed64] = kUpb_FieldType_SFixed64, + [kUpb_EncodedType_SInt32] = kUpb_FieldType_SInt32, + [kUpb_EncodedType_SInt64] = kUpb_FieldType_SInt64, + [kUpb_EncodedType_ClosedEnum] = kUpb_FieldType_Enum, +}; -static const void* _upb_DefPool_Unpack(const upb_DefPool* s, const char* sym, - size_t size, upb_deftype_t type) { - upb_value v; - return upb_strtable_lookup2(&s->syms, sym, size, &v) - ? _upb_DefType_Unpack(v, type) - : NULL; -} +static void upb_MiniTable_SetField(upb_MtDecoder* d, uint8_t ch, + upb_MiniTableField* field, + uint64_t msg_modifiers, + upb_SubCounts* sub_counts) { + static const char kUpb_EncodedToFieldRep[] = { + [kUpb_EncodedType_Double] = kUpb_FieldRep_8Byte, + [kUpb_EncodedType_Float] = kUpb_FieldRep_4Byte, + [kUpb_EncodedType_Int64] = kUpb_FieldRep_8Byte, + [kUpb_EncodedType_UInt64] = kUpb_FieldRep_8Byte, + [kUpb_EncodedType_Int32] = kUpb_FieldRep_4Byte, + [kUpb_EncodedType_Fixed64] = kUpb_FieldRep_8Byte, + [kUpb_EncodedType_Fixed32] = kUpb_FieldRep_4Byte, + [kUpb_EncodedType_Bool] = kUpb_FieldRep_1Byte, + [kUpb_EncodedType_String] = kUpb_FieldRep_StringView, + [kUpb_EncodedType_Bytes] = kUpb_FieldRep_StringView, + [kUpb_EncodedType_UInt32] = kUpb_FieldRep_4Byte, + [kUpb_EncodedType_OpenEnum] = kUpb_FieldRep_4Byte, + [kUpb_EncodedType_SFixed32] = kUpb_FieldRep_4Byte, + [kUpb_EncodedType_SFixed64] = kUpb_FieldRep_8Byte, + [kUpb_EncodedType_SInt32] = kUpb_FieldRep_4Byte, + [kUpb_EncodedType_SInt64] = kUpb_FieldRep_8Byte, + [kUpb_EncodedType_ClosedEnum] = kUpb_FieldRep_4Byte, + }; -bool _upb_DefPool_LookupSym(const upb_DefPool* s, const char* sym, size_t size, - upb_value* v) { - return upb_strtable_lookup2(&s->syms, sym, size, v); -} + char pointer_rep = d->platform == kUpb_MiniTablePlatform_32Bit + ? kUpb_FieldRep_4Byte + : kUpb_FieldRep_8Byte; -upb_ExtensionRegistry* _upb_DefPool_ExtReg(const upb_DefPool* s) { - return s->extreg; + int8_t type = _upb_FromBase92(ch); + if (ch >= _upb_ToBase92(kUpb_EncodedType_RepeatedBase)) { + type -= kUpb_EncodedType_RepeatedBase; + field->mode = kUpb_FieldMode_Array; + field->mode |= pointer_rep << kUpb_FieldRep_Shift; + field->offset = kNoPresence; + } else { + field->mode = kUpb_FieldMode_Scalar; + field->offset = kHasbitPresence; + if (type == kUpb_EncodedType_Group || type == kUpb_EncodedType_Message) { + field->mode |= pointer_rep << kUpb_FieldRep_Shift; + } else if ((unsigned long)type >= sizeof(kUpb_EncodedToFieldRep)) { + upb_MdDecoder_ErrorJmp(&d->base, "Invalid field type: %d", (int)type); + } else { + field->mode |= kUpb_EncodedToFieldRep[type] << kUpb_FieldRep_Shift; + } + } + if ((unsigned long)type >= sizeof(kUpb_EncodedToType)) { + upb_MdDecoder_ErrorJmp(&d->base, "Invalid field type: %d", (int)type); + } + upb_MiniTable_SetTypeAndSub(field, kUpb_EncodedToType[type], sub_counts, + msg_modifiers, type == kUpb_EncodedType_OpenEnum); } -void** _upb_DefPool_ScratchData(const upb_DefPool* s) { - return (void**)&s->scratch_data; -} +static void upb_MtDecoder_ModifyField(upb_MtDecoder* d, + uint32_t message_modifiers, + uint32_t field_modifiers, + upb_MiniTableField* field) { + if (field_modifiers & kUpb_EncodedFieldModifier_FlipPacked) { + if (!upb_MtDecoder_FieldIsPackable(field)) { + upb_MdDecoder_ErrorJmp(&d->base, + "Cannot flip packed on unpackable field %" PRIu32, + field->number); + } + field->mode ^= kUpb_LabelFlags_IsPacked; + } -size_t* _upb_DefPool_ScratchSize(const upb_DefPool* s) { - return (size_t*)&s->scratch_size; -} + bool singular = field_modifiers & kUpb_EncodedFieldModifier_IsProto3Singular; + bool required = field_modifiers & kUpb_EncodedFieldModifier_IsRequired; -void _upb_DefPool_SetPlatform(upb_DefPool* s, upb_MiniTablePlatform platform) { - assert(upb_strtable_count(&s->files) == 0); - s->platform = platform; -} + // Validate. + if ((singular || required) && field->offset != kHasbitPresence) { + upb_MdDecoder_ErrorJmp(&d->base, + "Invalid modifier(s) for repeated field %" PRIu32, + field->number); + } + if (singular && required) { + upb_MdDecoder_ErrorJmp( + &d->base, "Field %" PRIu32 " cannot be both singular and required", + field->number); + } -const upb_MessageDef* upb_DefPool_FindMessageByName(const upb_DefPool* s, - const char* sym) { - return _upb_DefPool_Unpack(s, sym, strlen(sym), UPB_DEFTYPE_MSG); + if (singular) field->offset = kNoPresence; + if (required) { + field->offset = kRequiredPresence; + } } -const upb_MessageDef* upb_DefPool_FindMessageByNameWithSize( - const upb_DefPool* s, const char* sym, size_t len) { - return _upb_DefPool_Unpack(s, sym, len, UPB_DEFTYPE_MSG); +static void upb_MtDecoder_PushItem(upb_MtDecoder* d, upb_LayoutItem item) { + if (d->vec.size == d->vec.capacity) { + size_t new_cap = UPB_MAX(8, d->vec.size * 2); + d->vec.data = realloc(d->vec.data, new_cap * sizeof(*d->vec.data)); + upb_MdDecoder_CheckOutOfMemory(&d->base, d->vec.data); + d->vec.capacity = new_cap; + } + d->vec.data[d->vec.size++] = item; } -const upb_EnumDef* upb_DefPool_FindEnumByName(const upb_DefPool* s, - const char* sym) { - return _upb_DefPool_Unpack(s, sym, strlen(sym), UPB_DEFTYPE_ENUM); -} +static void upb_MtDecoder_PushOneof(upb_MtDecoder* d, upb_LayoutItem item) { + if (item.field_index == kUpb_LayoutItem_IndexSentinel) { + upb_MdDecoder_ErrorJmp(&d->base, "Empty oneof"); + } + item.field_index -= kOneofBase; -const upb_EnumValueDef* upb_DefPool_FindEnumByNameval(const upb_DefPool* s, - const char* sym) { - return _upb_DefPool_Unpack(s, sym, strlen(sym), UPB_DEFTYPE_ENUMVAL); + // Push oneof data. + item.type = kUpb_LayoutItemType_OneofField; + upb_MtDecoder_PushItem(d, item); + + // Push oneof case. + item.rep = kUpb_FieldRep_4Byte; // Field Number. + item.type = kUpb_LayoutItemType_OneofCase; + upb_MtDecoder_PushItem(d, item); } -const upb_FileDef* upb_DefPool_FindFileByName(const upb_DefPool* s, - const char* name) { - upb_value v; - return upb_strtable_lookup(&s->files, name, &v) ? upb_value_getconstptr(v) - : NULL; +size_t upb_MtDecoder_SizeOfRep(upb_FieldRep rep, + upb_MiniTablePlatform platform) { + static const uint8_t kRepToSize32[] = { + [kUpb_FieldRep_1Byte] = 1, + [kUpb_FieldRep_4Byte] = 4, + [kUpb_FieldRep_StringView] = 8, + [kUpb_FieldRep_8Byte] = 8, + }; + static const uint8_t kRepToSize64[] = { + [kUpb_FieldRep_1Byte] = 1, + [kUpb_FieldRep_4Byte] = 4, + [kUpb_FieldRep_StringView] = 16, + [kUpb_FieldRep_8Byte] = 8, + }; + UPB_ASSERT(sizeof(upb_StringView) == + UPB_SIZE(kRepToSize32, kRepToSize64)[kUpb_FieldRep_StringView]); + return platform == kUpb_MiniTablePlatform_32Bit ? kRepToSize32[rep] + : kRepToSize64[rep]; } -const upb_FileDef* upb_DefPool_FindFileByNameWithSize(const upb_DefPool* s, - const char* name, - size_t len) { - upb_value v; - return upb_strtable_lookup2(&s->files, name, len, &v) - ? upb_value_getconstptr(v) - : NULL; +size_t upb_MtDecoder_AlignOfRep(upb_FieldRep rep, + upb_MiniTablePlatform platform) { + static const uint8_t kRepToAlign32[] = { + [kUpb_FieldRep_1Byte] = 1, + [kUpb_FieldRep_4Byte] = 4, + [kUpb_FieldRep_StringView] = 4, + [kUpb_FieldRep_8Byte] = 8, + }; + static const uint8_t kRepToAlign64[] = { + [kUpb_FieldRep_1Byte] = 1, + [kUpb_FieldRep_4Byte] = 4, + [kUpb_FieldRep_StringView] = 8, + [kUpb_FieldRep_8Byte] = 8, + }; + UPB_ASSERT(UPB_ALIGN_OF(upb_StringView) == + UPB_SIZE(kRepToAlign32, kRepToAlign64)[kUpb_FieldRep_StringView]); + return platform == kUpb_MiniTablePlatform_32Bit ? kRepToAlign32[rep] + : kRepToAlign64[rep]; } -const upb_FieldDef* upb_DefPool_FindExtensionByNameWithSize( - const upb_DefPool* s, const char* name, size_t size) { - upb_value v; - if (!upb_strtable_lookup2(&s->syms, name, size, &v)) return NULL; +static const char* upb_MtDecoder_DecodeOneofField(upb_MtDecoder* d, + const char* ptr, + char first_ch, + upb_LayoutItem* item) { + uint32_t field_num; + ptr = upb_MdDecoder_DecodeBase92Varint( + &d->base, ptr, first_ch, kUpb_EncodedValue_MinOneofField, + kUpb_EncodedValue_MaxOneofField, &field_num); + upb_MiniTableField* f = + (void*)upb_MiniTable_FindFieldByNumber(d->table, field_num); - switch (_upb_DefType_Type(v)) { - case UPB_DEFTYPE_FIELD: - return _upb_DefType_Unpack(v, UPB_DEFTYPE_FIELD); - case UPB_DEFTYPE_MSG: { - const upb_MessageDef* m = _upb_DefType_Unpack(v, UPB_DEFTYPE_MSG); - return _upb_MessageDef_InMessageSet(m) - ? upb_MessageDef_NestedExtension(m, 0) - : NULL; - } - default: - break; + if (!f) { + upb_MdDecoder_ErrorJmp(&d->base, + "Couldn't add field number %" PRIu32 + " to oneof, no such field number.", + field_num); + } + if (f->offset != kHasbitPresence) { + upb_MdDecoder_ErrorJmp( + &d->base, + "Cannot add repeated, required, or singular field %" PRIu32 + " to oneof.", + field_num); } - return NULL; + // Oneof storage must be large enough to accommodate the largest member. + int rep = f->mode >> kUpb_FieldRep_Shift; + if (upb_MtDecoder_SizeOfRep(rep, d->platform) > + upb_MtDecoder_SizeOfRep(item->rep, d->platform)) { + item->rep = rep; + } + // Prepend this field to the linked list. + f->offset = item->field_index; + item->field_index = (f - d->fields) + kOneofBase; + return ptr; } -const upb_FieldDef* upb_DefPool_FindExtensionByName(const upb_DefPool* s, - const char* sym) { - return upb_DefPool_FindExtensionByNameWithSize(s, sym, strlen(sym)); -} +static const char* upb_MtDecoder_DecodeOneofs(upb_MtDecoder* d, + const char* ptr) { + upb_LayoutItem item = {.rep = 0, + .field_index = kUpb_LayoutItem_IndexSentinel}; + while (ptr < d->base.end) { + char ch = *ptr++; + if (ch == kUpb_EncodedValue_FieldSeparator) { + // Field separator, no action needed. + } else if (ch == kUpb_EncodedValue_OneofSeparator) { + // End of oneof. + upb_MtDecoder_PushOneof(d, item); + item.field_index = kUpb_LayoutItem_IndexSentinel; // Move to next oneof. + } else { + ptr = upb_MtDecoder_DecodeOneofField(d, ptr, ch, &item); + } + } -const upb_ServiceDef* upb_DefPool_FindServiceByName(const upb_DefPool* s, - const char* name) { - return _upb_DefPool_Unpack(s, name, strlen(name), UPB_DEFTYPE_SERVICE); + // Push final oneof. + upb_MtDecoder_PushOneof(d, item); + return ptr; } -const upb_ServiceDef* upb_DefPool_FindServiceByNameWithSize( - const upb_DefPool* s, const char* name, size_t size) { - return _upb_DefPool_Unpack(s, name, size, UPB_DEFTYPE_SERVICE); +static const char* upb_MtDecoder_ParseModifier(upb_MtDecoder* d, + const char* ptr, char first_ch, + upb_MiniTableField* last_field, + uint64_t* msg_modifiers) { + uint32_t mod; + ptr = upb_MdDecoder_DecodeBase92Varint(&d->base, ptr, first_ch, + kUpb_EncodedValue_MinModifier, + kUpb_EncodedValue_MaxModifier, &mod); + if (last_field) { + upb_MtDecoder_ModifyField(d, *msg_modifiers, mod, last_field); + } else { + if (!d->table) { + upb_MdDecoder_ErrorJmp(&d->base, + "Extensions cannot have message modifiers"); + } + *msg_modifiers = mod; + } + + return ptr; } -const upb_FileDef* upb_DefPool_FindFileContainingSymbol(const upb_DefPool* s, - const char* name) { - upb_value v; - // TODO(haberman): non-extension fields and oneofs. - if (upb_strtable_lookup(&s->syms, name, &v)) { - switch (_upb_DefType_Type(v)) { - case UPB_DEFTYPE_EXT: { - const upb_FieldDef* f = _upb_DefType_Unpack(v, UPB_DEFTYPE_EXT); - return upb_FieldDef_File(f); +static void upb_MtDecoder_AllocateSubs(upb_MtDecoder* d, + upb_SubCounts sub_counts) { + uint32_t total_count = sub_counts.submsg_count + sub_counts.subenum_count; + size_t subs_bytes = sizeof(*d->table->subs) * total_count; + upb_MiniTableSub* subs = upb_Arena_Malloc(d->arena, subs_bytes); + upb_MdDecoder_CheckOutOfMemory(&d->base, subs); + uint32_t i = 0; + for (; i < sub_counts.submsg_count; i++) { + subs[i].submsg = &_kUpb_MiniTable_Empty; + } + if (sub_counts.subenum_count) { + upb_MiniTableField* f = d->fields; + upb_MiniTableField* end_f = f + d->table->field_count; + for (; f < end_f; f++) { + if (f->UPB_PRIVATE(descriptortype) == kUpb_FieldType_Enum) { + f->UPB_PRIVATE(submsg_index) += sub_counts.submsg_count; } - case UPB_DEFTYPE_MSG: { - const upb_MessageDef* m = _upb_DefType_Unpack(v, UPB_DEFTYPE_MSG); - return upb_MessageDef_File(m); + } + for (; i < sub_counts.submsg_count + sub_counts.subenum_count; i++) { + subs[i].subenum = NULL; + } + } + d->table->subs = subs; +} + +static const char* upb_MtDecoder_Parse(upb_MtDecoder* d, const char* ptr, + size_t len, void* fields, + size_t field_size, uint16_t* field_count, + upb_SubCounts* sub_counts) { + uint64_t msg_modifiers = 0; + uint32_t last_field_number = 0; + upb_MiniTableField* last_field = NULL; + bool need_dense_below = d->table != NULL; + + d->base.end = UPB_PTRADD(ptr, len); + + while (ptr < d->base.end) { + char ch = *ptr++; + if (ch <= kUpb_EncodedValue_MaxField) { + if (!d->table && last_field) { + // For extensions, consume only a single field and then return. + return --ptr; } - case UPB_DEFTYPE_ENUM: { - const upb_EnumDef* e = _upb_DefType_Unpack(v, UPB_DEFTYPE_ENUM); - return upb_EnumDef_File(e); + upb_MiniTableField* field = fields; + *field_count += 1; + fields = (char*)fields + field_size; + field->number = ++last_field_number; + last_field = field; + upb_MiniTable_SetField(d, ch, field, msg_modifiers, sub_counts); + } else if (kUpb_EncodedValue_MinModifier <= ch && + ch <= kUpb_EncodedValue_MaxModifier) { + ptr = upb_MtDecoder_ParseModifier(d, ptr, ch, last_field, &msg_modifiers); + if (msg_modifiers & kUpb_MessageModifier_IsExtendable) { + d->table->ext |= kUpb_ExtMode_Extendable; } - case UPB_DEFTYPE_ENUMVAL: { - const upb_EnumValueDef* ev = - _upb_DefType_Unpack(v, UPB_DEFTYPE_ENUMVAL); - return upb_EnumDef_File(upb_EnumValueDef_Enum(ev)); + } else if (ch == kUpb_EncodedValue_End) { + if (!d->table) { + upb_MdDecoder_ErrorJmp(&d->base, "Extensions cannot have oneofs."); } - case UPB_DEFTYPE_SERVICE: { - const upb_ServiceDef* service = - _upb_DefType_Unpack(v, UPB_DEFTYPE_SERVICE); - return upb_ServiceDef_File(service); + ptr = upb_MtDecoder_DecodeOneofs(d, ptr); + } else if (kUpb_EncodedValue_MinSkip <= ch && + ch <= kUpb_EncodedValue_MaxSkip) { + if (need_dense_below) { + d->table->dense_below = d->table->field_count; + need_dense_below = false; } - default: - UPB_UNREACHABLE(); + uint32_t skip; + ptr = upb_MdDecoder_DecodeBase92Varint(&d->base, ptr, ch, + kUpb_EncodedValue_MinSkip, + kUpb_EncodedValue_MaxSkip, &skip); + last_field_number += skip; + last_field_number--; // Next field seen will increment. + } else { + upb_MdDecoder_ErrorJmp(&d->base, "Invalid char: %c", ch); } } - const char* last_dot = strrchr(name, '.'); - if (last_dot) { - const upb_MessageDef* parent = - upb_DefPool_FindMessageByNameWithSize(s, name, last_dot - name); - if (parent) { - const char* shortname = last_dot + 1; - if (upb_MessageDef_FindByNameWithSize(parent, shortname, - strlen(shortname), NULL, NULL)) { - return upb_MessageDef_File(parent); - } - } + if (need_dense_below) { + d->table->dense_below = d->table->field_count; } - return NULL; + return ptr; } -static void remove_filedef(upb_DefPool* s, upb_FileDef* file) { - intptr_t iter = UPB_INTTABLE_BEGIN; - upb_StringView key; - upb_value val; - while (upb_strtable_next2(&s->syms, &key, &val, &iter)) { - const upb_FileDef* f; - switch (_upb_DefType_Type(val)) { - case UPB_DEFTYPE_EXT: - f = upb_FieldDef_File(_upb_DefType_Unpack(val, UPB_DEFTYPE_EXT)); - break; - case UPB_DEFTYPE_MSG: - f = upb_MessageDef_File(_upb_DefType_Unpack(val, UPB_DEFTYPE_MSG)); - break; - case UPB_DEFTYPE_ENUM: - f = upb_EnumDef_File(_upb_DefType_Unpack(val, UPB_DEFTYPE_ENUM)); - break; - case UPB_DEFTYPE_ENUMVAL: - f = upb_EnumDef_File(upb_EnumValueDef_Enum( - _upb_DefType_Unpack(val, UPB_DEFTYPE_ENUMVAL))); - break; - case UPB_DEFTYPE_SERVICE: - f = upb_ServiceDef_File(_upb_DefType_Unpack(val, UPB_DEFTYPE_SERVICE)); - break; - default: - UPB_UNREACHABLE(); - } +static void upb_MtDecoder_ParseMessage(upb_MtDecoder* d, const char* data, + size_t len) { + // Buffer length is an upper bound on the number of fields. We will return + // what we don't use. + d->fields = upb_Arena_Malloc(d->arena, sizeof(*d->fields) * len); + upb_MdDecoder_CheckOutOfMemory(&d->base, d->fields); - if (f == file) upb_strtable_removeiter(&s->syms, &iter); - } + upb_SubCounts sub_counts = {0, 0}; + d->table->field_count = 0; + d->table->fields = d->fields; + upb_MtDecoder_Parse(d, data, len, d->fields, sizeof(*d->fields), + &d->table->field_count, &sub_counts); + + upb_Arena_ShrinkLast(d->arena, d->fields, sizeof(*d->fields) * len, + sizeof(*d->fields) * d->table->field_count); + d->table->fields = d->fields; + upb_MtDecoder_AllocateSubs(d, sub_counts); } -static const upb_FileDef* upb_DefBuilder_AddFileToPool( - upb_DefBuilder* const builder, upb_DefPool* const s, - const UPB_DESC(FileDescriptorProto) * const file_proto, - const upb_StringView name, upb_Status* const status) { - if (UPB_SETJMP(builder->err) != 0) { - UPB_ASSERT(!upb_Status_IsOk(status)); - if (builder->file) { - remove_filedef(s, builder->file); - builder->file = NULL; - } - } else if (!builder->arena || !builder->tmp_arena) { - _upb_DefBuilder_OomErr(builder); - } else { - _upb_FileDef_Create(builder, file_proto); - upb_strtable_insert(&s->files, name.data, name.size, - upb_value_constptr(builder->file), builder->arena); - UPB_ASSERT(upb_Status_IsOk(status)); - upb_Arena_Fuse(s->arena, builder->arena); +int upb_MtDecoder_CompareFields(const void* _a, const void* _b) { + const upb_LayoutItem* a = _a; + const upb_LayoutItem* b = _b; + // Currently we just sort by: + // 1. rep (smallest fields first) + // 2. type (oneof cases first) + // 2. field_index (smallest numbers first) + // The main goal of this is to reduce space lost to padding. + // Later we may have more subtle reasons to prefer a different ordering. + const int rep_bits = upb_Log2Ceiling(kUpb_FieldRep_Max); + const int type_bits = upb_Log2Ceiling(kUpb_LayoutItemType_Max); + const int idx_bits = (sizeof(a->field_index) * 8); + UPB_ASSERT(idx_bits + rep_bits + type_bits < 32); +#define UPB_COMBINE(rep, ty, idx) (((rep << type_bits) | ty) << idx_bits) | idx + uint32_t a_packed = UPB_COMBINE(a->rep, a->type, a->field_index); + uint32_t b_packed = UPB_COMBINE(b->rep, b->type, b->field_index); + assert(a_packed != b_packed); +#undef UPB_COMBINE + return a_packed < b_packed ? -1 : 1; +} + +static bool upb_MtDecoder_SortLayoutItems(upb_MtDecoder* d) { + // Add items for all non-oneof fields (oneofs were already added). + int n = d->table->field_count; + for (int i = 0; i < n; i++) { + upb_MiniTableField* f = &d->fields[i]; + if (f->offset >= kOneofBase) continue; + upb_LayoutItem item = {.field_index = i, + .rep = f->mode >> kUpb_FieldRep_Shift, + .type = kUpb_LayoutItemType_Field}; + upb_MtDecoder_PushItem(d, item); } - if (builder->arena) upb_Arena_Free(builder->arena); - if (builder->tmp_arena) upb_Arena_Free(builder->tmp_arena); - return builder->file; + if (d->vec.size) { + qsort(d->vec.data, d->vec.size, sizeof(*d->vec.data), + upb_MtDecoder_CompareFields); + } + + return true; } -static const upb_FileDef* _upb_DefPool_AddFile( - upb_DefPool* s, const UPB_DESC(FileDescriptorProto) * file_proto, - const upb_MiniTableFile* layout, upb_Status* status) { - const upb_StringView name = UPB_DESC(FileDescriptorProto_name)(file_proto); +static size_t upb_MiniTable_DivideRoundUp(size_t n, size_t d) { + return (n + d - 1) / d; +} - // Determine whether we already know about this file. - { - upb_value v; - if (upb_strtable_lookup2(&s->files, name.data, name.size, &v)) { - upb_Status_SetErrorFormat(status, - "duplicate file name " UPB_STRINGVIEW_FORMAT, - UPB_STRINGVIEW_ARGS(name)); - return NULL; +static void upb_MtDecoder_AssignHasbits(upb_MtDecoder* d) { + upb_MiniTable* ret = d->table; + int n = ret->field_count; + int last_hasbit = 0; // 0 cannot be used. + + // First assign required fields, which must have the lowest hasbits. + for (int i = 0; i < n; i++) { + upb_MiniTableField* field = (upb_MiniTableField*)&ret->fields[i]; + if (field->offset == kRequiredPresence) { + field->presence = ++last_hasbit; + } else if (field->offset == kNoPresence) { + field->presence = 0; } } + ret->required_count = last_hasbit; - upb_DefBuilder ctx = { - .symtab = s, - .layout = layout, - .platform = s->platform, - .msg_count = 0, - .enum_count = 0, - .ext_count = 0, - .status = status, - .file = NULL, - .arena = upb_Arena_New(), - .tmp_arena = upb_Arena_New(), - }; + if (ret->required_count > 63) { + upb_MdDecoder_ErrorJmp(&d->base, "Too many required fields"); + } - return upb_DefBuilder_AddFileToPool(&ctx, s, file_proto, name, status); -} + // Next assign non-required hasbit fields. + for (int i = 0; i < n; i++) { + upb_MiniTableField* field = (upb_MiniTableField*)&ret->fields[i]; + if (field->offset == kHasbitPresence) { + field->presence = ++last_hasbit; + } + } -const upb_FileDef* upb_DefPool_AddFile(upb_DefPool* s, - const UPB_DESC(FileDescriptorProto) * - file_proto, - upb_Status* status) { - return _upb_DefPool_AddFile(s, file_proto, NULL, status); + ret->size = last_hasbit ? upb_MiniTable_DivideRoundUp(last_hasbit + 1, 8) : 0; } -bool _upb_DefPool_LoadDefInitEx(upb_DefPool* s, const _upb_DefPool_Init* init, - bool rebuild_minitable) { - /* Since this function should never fail (it would indicate a bug in upb) we - * print errors to stderr instead of returning error status to the user. */ - _upb_DefPool_Init** deps = init->deps; - UPB_DESC(FileDescriptorProto) * file; - upb_Arena* arena; - upb_Status status; - - upb_Status_Clear(&status); - - if (upb_DefPool_FindFileByName(s, init->filename)) { - return true; +size_t upb_MtDecoder_Place(upb_MtDecoder* d, upb_FieldRep rep) { + size_t size = upb_MtDecoder_SizeOfRep(rep, d->platform); + size_t align = upb_MtDecoder_AlignOfRep(rep, d->platform); + size_t ret = UPB_ALIGN_UP(d->table->size, align); + static const size_t max = UINT16_MAX; + size_t new_size = ret + size; + if (new_size > max) { + upb_MdDecoder_ErrorJmp( + &d->base, "Message size exceeded maximum size of %zu bytes", max); } + d->table->size = new_size; + return ret; +} - arena = upb_Arena_New(); +static void upb_MtDecoder_AssignOffsets(upb_MtDecoder* d) { + upb_LayoutItem* end = UPB_PTRADD(d->vec.data, d->vec.size); - for (; *deps; deps++) { - if (!_upb_DefPool_LoadDefInitEx(s, *deps, rebuild_minitable)) goto err; + // Compute offsets. + for (upb_LayoutItem* item = d->vec.data; item < end; item++) { + item->offset = upb_MtDecoder_Place(d, item->rep); } - file = UPB_DESC(FileDescriptorProto_parse_ex)( - init->descriptor.data, init->descriptor.size, NULL, - kUpb_DecodeOption_AliasString, arena); - s->bytes_loaded += init->descriptor.size; - - if (!file) { - upb_Status_SetErrorFormat( - &status, - "Failed to parse compiled-in descriptor for file '%s'. This should " - "never happen.", - init->filename); - goto err; + // Assign oneof case offsets. We must do these first, since assigning + // actual offsets will overwrite the links of the linked list. + for (upb_LayoutItem* item = d->vec.data; item < end; item++) { + if (item->type != kUpb_LayoutItemType_OneofCase) continue; + upb_MiniTableField* f = &d->fields[item->field_index]; + while (true) { + f->presence = ~item->offset; + if (f->offset == kUpb_LayoutItem_IndexSentinel) break; + UPB_ASSERT(f->offset - kOneofBase < d->table->field_count); + f = &d->fields[f->offset - kOneofBase]; + } } - const upb_MiniTableFile* mt = rebuild_minitable ? NULL : init->layout; - if (!_upb_DefPool_AddFile(s, file, mt, &status)) { - goto err; + // Assign offsets. + for (upb_LayoutItem* item = d->vec.data; item < end; item++) { + upb_MiniTableField* f = &d->fields[item->field_index]; + switch (item->type) { + case kUpb_LayoutItemType_OneofField: + while (true) { + uint16_t next_offset = f->offset; + f->offset = item->offset; + if (next_offset == kUpb_LayoutItem_IndexSentinel) break; + f = &d->fields[next_offset - kOneofBase]; + } + break; + case kUpb_LayoutItemType_Field: + f->offset = item->offset; + break; + default: + break; + } } - upb_Arena_Free(arena); - return true; - -err: - fprintf(stderr, - "Error loading compiled-in descriptor for file '%s' (this should " - "never happen): %s\n", - init->filename, upb_Status_ErrorMessage(&status)); - upb_Arena_Free(arena); - return false; + // The fasttable parser (supported on 64-bit only) depends on this being a + // multiple of 8 in order to satisfy UPB_MALLOC_ALIGN, which is also 8. + // + // On 32-bit we could potentially make this smaller, but there is no + // compelling reason to optimize this right now. + d->table->size = UPB_ALIGN_UP(d->table->size, 8); } -size_t _upb_DefPool_BytesLoaded(const upb_DefPool* s) { - return s->bytes_loaded; -} +static void upb_MtDecoder_ValidateEntryField(upb_MtDecoder* d, + const upb_MiniTableField* f, + uint32_t expected_num) { + const char* name = expected_num == 1 ? "key" : "val"; + if (f->number != expected_num) { + upb_MdDecoder_ErrorJmp(&d->base, + "map %s did not have expected number (%d vs %d)", + name, expected_num, (int)f->number); + } -upb_Arena* _upb_DefPool_Arena(const upb_DefPool* s) { return s->arena; } + if (upb_IsRepeatedOrMap(f)) { + upb_MdDecoder_ErrorJmp( + &d->base, "map %s cannot be repeated or map, or be in oneof", name); + } -const upb_FieldDef* upb_DefPool_FindExtensionByMiniTable( - const upb_DefPool* s, const upb_MiniTableExtension* ext) { - upb_value v; - bool ok = upb_inttable_lookup(&s->exts, (uintptr_t)ext, &v); - UPB_ASSERT(ok); - return upb_value_getconstptr(v); -} + uint32_t not_ok_types; + if (expected_num == 1) { + not_ok_types = (1 << kUpb_FieldType_Float) | (1 << kUpb_FieldType_Double) | + (1 << kUpb_FieldType_Message) | (1 << kUpb_FieldType_Group) | + (1 << kUpb_FieldType_Bytes) | (1 << kUpb_FieldType_Enum); + } else { + not_ok_types = 1 << kUpb_FieldType_Group; + } -const upb_FieldDef* upb_DefPool_FindExtensionByNumber(const upb_DefPool* s, - const upb_MessageDef* m, - int32_t fieldnum) { - const upb_MiniTable* t = upb_MessageDef_MiniTable(m); - const upb_MiniTableExtension* ext = - upb_ExtensionRegistry_Lookup(s->extreg, t, fieldnum); - return ext ? upb_DefPool_FindExtensionByMiniTable(s, ext) : NULL; + if ((1 << upb_MiniTableField_Type(f)) & not_ok_types) { + upb_MdDecoder_ErrorJmp(&d->base, "map %s cannot have type %d", name, + (int)f->UPB_PRIVATE(descriptortype)); + } } -const upb_ExtensionRegistry* upb_DefPool_ExtensionRegistry( - const upb_DefPool* s) { - return s->extreg; -} +static void upb_MtDecoder_ParseMap(upb_MtDecoder* d, const char* data, + size_t len) { + upb_MtDecoder_ParseMessage(d, data, len); + upb_MtDecoder_AssignHasbits(d); -const upb_FieldDef** upb_DefPool_GetAllExtensions(const upb_DefPool* s, - const upb_MessageDef* m, - size_t* count) { - size_t n = 0; - intptr_t iter = UPB_INTTABLE_BEGIN; - uintptr_t key; - upb_value val; - // This is O(all exts) instead of O(exts for m). If we need this to be - // efficient we may need to make extreg into a two-level table, or have a - // second per-message index. - while (upb_inttable_next(&s->exts, &key, &val, &iter)) { - const upb_FieldDef* f = upb_value_getconstptr(val); - if (upb_FieldDef_ContainingType(f) == m) n++; - } - const upb_FieldDef** exts = malloc(n * sizeof(*exts)); - iter = UPB_INTTABLE_BEGIN; - size_t i = 0; - while (upb_inttable_next(&s->exts, &key, &val, &iter)) { - const upb_FieldDef* f = upb_value_getconstptr(val); - if (upb_FieldDef_ContainingType(f) == m) exts[i++] = f; + if (UPB_UNLIKELY(d->table->field_count != 2)) { + upb_MdDecoder_ErrorJmp(&d->base, "%hu fields in map", + d->table->field_count); + UPB_UNREACHABLE(); } - *count = n; - return exts; -} -bool _upb_DefPool_LoadDefInit(upb_DefPool* s, const _upb_DefPool_Init* init) { - return _upb_DefPool_LoadDefInitEx(s, init, false); -} + upb_LayoutItem* end = UPB_PTRADD(d->vec.data, d->vec.size); + for (upb_LayoutItem* item = d->vec.data; item < end; item++) { + if (item->type == kUpb_LayoutItemType_OneofCase) { + upb_MdDecoder_ErrorJmp(&d->base, "Map entry cannot have oneof"); + } + } + upb_MtDecoder_ValidateEntryField(d, &d->table->fields[0], 1); + upb_MtDecoder_ValidateEntryField(d, &d->table->fields[1], 2); -// Must be last. + // Map entries have a pre-determined layout, regardless of types. + // NOTE: sync with mini_table/message_internal.h. + const size_t kv_size = d->platform == kUpb_MiniTablePlatform_32Bit ? 8 : 16; + const size_t hasbit_size = 8; + d->fields[0].offset = hasbit_size; + d->fields[1].offset = hasbit_size + kv_size; + d->table->size = UPB_ALIGN_UP(hasbit_size + kv_size + kv_size, 8); -upb_deftype_t _upb_DefType_Type(upb_value v) { - const uintptr_t num = (uintptr_t)upb_value_getconstptr(v); - return num & UPB_DEFTYPE_MASK; + // Map entries have a special bit set to signal it's a map entry, used in + // upb_MiniTable_SetSubMessage() below. + d->table->ext |= kUpb_ExtMode_IsMapEntry; } -upb_value _upb_DefType_Pack(const void* ptr, upb_deftype_t type) { - uintptr_t num = (uintptr_t)ptr; - UPB_ASSERT((num & UPB_DEFTYPE_MASK) == 0); - num |= type; - return upb_value_constptr((const void*)num); -} +static void upb_MtDecoder_ParseMessageSet(upb_MtDecoder* d, const char* data, + size_t len) { + if (len > 0) { + upb_MdDecoder_ErrorJmp(&d->base, "Invalid message set encode length: %zu", + len); + } -const void* _upb_DefType_Unpack(upb_value v, upb_deftype_t type) { - uintptr_t num = (uintptr_t)upb_value_getconstptr(v); - return (num & UPB_DEFTYPE_MASK) == type - ? (const void*)(num & ~UPB_DEFTYPE_MASK) - : NULL; + upb_MiniTable* ret = d->table; + ret->size = 0; + ret->field_count = 0; + ret->ext = kUpb_ExtMode_IsMessageSet; + ret->dense_below = 0; + ret->table_mask = -1; + ret->required_count = 0; } +static upb_MiniTable* upb_MtDecoder_DoBuildMiniTableWithBuf( + upb_MtDecoder* decoder, const char* data, size_t len, void** buf, + size_t* buf_size) { + upb_MdDecoder_CheckOutOfMemory(&decoder->base, decoder->table); -// Must be last. + decoder->table->size = 0; + decoder->table->field_count = 0; + decoder->table->ext = kUpb_ExtMode_NonExtendable; + decoder->table->dense_below = 0; + decoder->table->table_mask = -1; + decoder->table->required_count = 0; -bool _upb_DescState_Grow(upb_DescState* d, upb_Arena* a) { - const size_t oldbufsize = d->bufsize; - const int used = d->ptr - d->buf; + // Strip off and verify the version tag. + if (!len--) goto done; + const char vers = *data++; - if (!d->buf) { - d->buf = upb_Arena_Malloc(a, d->bufsize); - if (!d->buf) return false; - d->ptr = d->buf; - d->e.end = d->buf + d->bufsize; - } + switch (vers) { + case kUpb_EncodedVersion_MapV1: + upb_MtDecoder_ParseMap(decoder, data, len); + break; - if (oldbufsize - used < kUpb_MtDataEncoder_MinSize) { - d->bufsize *= 2; - d->buf = upb_Arena_Realloc(a, d->buf, oldbufsize, d->bufsize); - if (!d->buf) return false; - d->ptr = d->buf + used; - d->e.end = d->buf + d->bufsize; - } + case kUpb_EncodedVersion_MessageV1: + upb_MtDecoder_ParseMessage(decoder, data, len); + upb_MtDecoder_AssignHasbits(decoder); + upb_MtDecoder_SortLayoutItems(decoder); + upb_MtDecoder_AssignOffsets(decoder); + break; - return true; -} + case kUpb_EncodedVersion_MessageSetV1: + upb_MtDecoder_ParseMessageSet(decoder, data, len); + break; + default: + upb_MdDecoder_ErrorJmp(&decoder->base, "Invalid message version: %c", + vers); + } -// Must be last. +done: + *buf = decoder->vec.data; + *buf_size = decoder->vec.capacity * sizeof(*decoder->vec.data); + return decoder->table; +} -struct upb_EnumDef { - const UPB_DESC(EnumOptions) * opts; - const upb_MiniTableEnum* layout; // Only for proto2. - const upb_FileDef* file; - const upb_MessageDef* containing_type; // Could be merged with "file". - const char* full_name; - upb_strtable ntoi; - upb_inttable iton; - const upb_EnumValueDef* values; - const upb_EnumReservedRange* res_ranges; - const upb_StringView* res_names; - int value_count; - int res_range_count; - int res_name_count; - int32_t defaultval; - bool is_closed; - bool is_sorted; // Whether all of the values are defined in ascending order. -}; +static upb_MiniTable* upb_MtDecoder_BuildMiniTableWithBuf( + upb_MtDecoder* const decoder, const char* const data, const size_t len, + void** const buf, size_t* const buf_size) { + if (UPB_SETJMP(decoder->base.err) != 0) { + *buf = decoder->vec.data; + *buf_size = decoder->vec.capacity * sizeof(*decoder->vec.data); + return NULL; + } -upb_EnumDef* _upb_EnumDef_At(const upb_EnumDef* e, int i) { - return (upb_EnumDef*)&e[i]; + return upb_MtDecoder_DoBuildMiniTableWithBuf(decoder, data, len, buf, + buf_size); } -const upb_MiniTableEnum* _upb_EnumDef_MiniTable(const upb_EnumDef* e) { - return e->layout; -} +upb_MiniTable* upb_MiniTable_BuildWithBuf(const char* data, size_t len, + upb_MiniTablePlatform platform, + upb_Arena* arena, void** buf, + size_t* buf_size, + upb_Status* status) { + upb_MtDecoder decoder = { + .base = {.status = status}, + .platform = platform, + .vec = + { + .data = *buf, + .capacity = *buf_size / sizeof(*decoder.vec.data), + .size = 0, + }, + .arena = arena, + .table = upb_Arena_Malloc(arena, sizeof(*decoder.table)), + }; -bool _upb_EnumDef_Insert(upb_EnumDef* e, upb_EnumValueDef* v, upb_Arena* a) { - const char* name = upb_EnumValueDef_Name(v); - const upb_value val = upb_value_constptr(v); - bool ok = upb_strtable_insert(&e->ntoi, name, strlen(name), val, a); - if (!ok) return false; + return upb_MtDecoder_BuildMiniTableWithBuf(&decoder, data, len, buf, + buf_size); +} - // Multiple enumerators can have the same number, first one wins. - const int number = upb_EnumValueDef_Number(v); - if (!upb_inttable_lookup(&e->iton, number, NULL)) { - return upb_inttable_insert(&e->iton, number, val, a); +static const char* upb_MtDecoder_DoBuildMiniTableExtension( + upb_MtDecoder* decoder, const char* data, size_t len, + upb_MiniTableExtension* ext, const upb_MiniTable* extendee, + upb_MiniTableSub sub) { + // If the string is non-empty then it must begin with a version tag. + if (len) { + if (*data != kUpb_EncodedVersion_ExtensionV1) { + upb_MdDecoder_ErrorJmp(&decoder->base, "Invalid ext version: %c", *data); + } + data++; + len--; } - return true; -} -const UPB_DESC(EnumOptions) * upb_EnumDef_Options(const upb_EnumDef* e) { - return e->opts; -} + uint16_t count = 0; + upb_SubCounts sub_counts = {0, 0}; + const char* ret = upb_MtDecoder_Parse(decoder, data, len, ext, sizeof(*ext), + &count, &sub_counts); + if (!ret || count != 1) return NULL; -bool upb_EnumDef_HasOptions(const upb_EnumDef* e) { - return e->opts != (void*)kUpbDefOptDefault; -} + upb_MiniTableField* f = &ext->field; -const char* upb_EnumDef_FullName(const upb_EnumDef* e) { return e->full_name; } + f->mode |= kUpb_LabelFlags_IsExtension; + f->offset = 0; + f->presence = 0; -const char* upb_EnumDef_Name(const upb_EnumDef* e) { - return _upb_DefBuilder_FullToShort(e->full_name); -} + if (extendee->ext & kUpb_ExtMode_IsMessageSet) { + // Extensions of MessageSet must be messages. + if (!upb_IsSubMessage(f)) return NULL; -const upb_FileDef* upb_EnumDef_File(const upb_EnumDef* e) { return e->file; } + // Extensions of MessageSet must be non-repeating. + if ((f->mode & kUpb_FieldMode_Mask) == kUpb_FieldMode_Array) return NULL; + } -const upb_MessageDef* upb_EnumDef_ContainingType(const upb_EnumDef* e) { - return e->containing_type; -} + ext->extendee = extendee; + ext->sub = sub; -int32_t upb_EnumDef_Default(const upb_EnumDef* e) { - UPB_ASSERT(upb_EnumDef_FindValueByNumber(e, e->defaultval)); - return e->defaultval; + return ret; } -int upb_EnumDef_ReservedRangeCount(const upb_EnumDef* e) { - return e->res_range_count; +static const char* upb_MtDecoder_BuildMiniTableExtension( + upb_MtDecoder* const decoder, const char* const data, const size_t len, + upb_MiniTableExtension* const ext, const upb_MiniTable* const extendee, + const upb_MiniTableSub sub) { + if (UPB_SETJMP(decoder->base.err) != 0) return NULL; + return upb_MtDecoder_DoBuildMiniTableExtension(decoder, data, len, ext, + extendee, sub); } -const upb_EnumReservedRange* upb_EnumDef_ReservedRange(const upb_EnumDef* e, - int i) { - UPB_ASSERT(0 <= i && i < e->res_range_count); - return _upb_EnumReservedRange_At(e->res_ranges, i); -} +const char* _upb_MiniTableExtension_Init(const char* data, size_t len, + upb_MiniTableExtension* ext, + const upb_MiniTable* extendee, + upb_MiniTableSub sub, + upb_MiniTablePlatform platform, + upb_Status* status) { + upb_MtDecoder decoder = { + .base = {.status = status}, + .arena = NULL, + .table = NULL, + .platform = platform, + }; -int upb_EnumDef_ReservedNameCount(const upb_EnumDef* e) { - return e->res_name_count; + return upb_MtDecoder_BuildMiniTableExtension(&decoder, data, len, ext, + extendee, sub); } -upb_StringView upb_EnumDef_ReservedName(const upb_EnumDef* e, int i) { - UPB_ASSERT(0 <= i && i < e->res_name_count); - return e->res_names[i]; -} +upb_MiniTableExtension* _upb_MiniTableExtension_Build( + const char* data, size_t len, const upb_MiniTable* extendee, + upb_MiniTableSub sub, upb_MiniTablePlatform platform, upb_Arena* arena, + upb_Status* status) { + upb_MiniTableExtension* ext = + upb_Arena_Malloc(arena, sizeof(upb_MiniTableExtension)); + if (UPB_UNLIKELY(!ext)) return NULL; -int upb_EnumDef_ValueCount(const upb_EnumDef* e) { return e->value_count; } + const char* ptr = _upb_MiniTableExtension_Init(data, len, ext, extendee, sub, + platform, status); + if (UPB_UNLIKELY(!ptr)) return NULL; -const upb_EnumValueDef* upb_EnumDef_FindValueByName(const upb_EnumDef* e, - const char* name) { - return upb_EnumDef_FindValueByNameWithSize(e, name, strlen(name)); + return ext; } -const upb_EnumValueDef* upb_EnumDef_FindValueByNameWithSize( - const upb_EnumDef* e, const char* name, size_t size) { - upb_value v; - return upb_strtable_lookup2(&e->ntoi, name, size, &v) - ? upb_value_getconstptr(v) - : NULL; +upb_MiniTable* _upb_MiniTable_Build(const char* data, size_t len, + upb_MiniTablePlatform platform, + upb_Arena* arena, upb_Status* status) { + void* buf = NULL; + size_t size = 0; + upb_MiniTable* ret = upb_MiniTable_BuildWithBuf(data, len, platform, arena, + &buf, &size, status); + free(buf); + return ret; } -const upb_EnumValueDef* upb_EnumDef_FindValueByNumber(const upb_EnumDef* e, - int32_t num) { - upb_value v; - return upb_inttable_lookup(&e->iton, num, &v) ? upb_value_getconstptr(v) - : NULL; -} - -bool upb_EnumDef_CheckNumber(const upb_EnumDef* e, int32_t num) { - // We could use upb_EnumDef_FindValueByNumber(e, num) != NULL, but we expect - // this to be faster (especially for small numbers). - return upb_MiniTableEnum_CheckValue(e->layout, num); -} - -const upb_EnumValueDef* upb_EnumDef_Value(const upb_EnumDef* e, int i) { - UPB_ASSERT(0 <= i && i < e->value_count); - return _upb_EnumValueDef_At(e->values, i); -} -bool upb_EnumDef_IsClosed(const upb_EnumDef* e) { return e->is_closed; } +// Must be last. -bool upb_EnumDef_MiniDescriptorEncode(const upb_EnumDef* e, upb_Arena* a, - upb_StringView* out) { - upb_DescState s; - _upb_DescState_Init(&s); +bool upb_MiniTable_SetSubMessage(upb_MiniTable* table, + upb_MiniTableField* field, + const upb_MiniTable* sub) { + UPB_ASSERT((uintptr_t)table->fields <= (uintptr_t)field && + (uintptr_t)field < + (uintptr_t)(table->fields + table->field_count)); + UPB_ASSERT(sub); - const upb_EnumValueDef** sorted = NULL; - if (!e->is_sorted) { - sorted = _upb_EnumValueDefs_Sorted(e->values, e->value_count, a); - if (!sorted) return false; - } + const bool sub_is_map = sub->ext & kUpb_ExtMode_IsMapEntry; - if (!_upb_DescState_Grow(&s, a)) return false; - s.ptr = upb_MtDataEncoder_StartEnum(&s.e, s.ptr); + switch (field->UPB_PRIVATE(descriptortype)) { + case kUpb_FieldType_Message: + if (sub_is_map) { + const bool table_is_map = table->ext & kUpb_ExtMode_IsMapEntry; + if (UPB_UNLIKELY(table_is_map)) return false; - // Duplicate values are allowed but we only encode each value once. - uint32_t previous = 0; + field->mode = (field->mode & ~kUpb_FieldMode_Mask) | kUpb_FieldMode_Map; + } + break; - for (size_t i = 0; i < e->value_count; i++) { - const uint32_t current = - upb_EnumValueDef_Number(sorted ? sorted[i] : upb_EnumDef_Value(e, i)); - if (i != 0 && previous == current) continue; + case kUpb_FieldType_Group: + if (UPB_UNLIKELY(sub_is_map)) return false; + break; - if (!_upb_DescState_Grow(&s, a)) return false; - s.ptr = upb_MtDataEncoder_PutEnumValue(&s.e, s.ptr, current); - previous = current; + default: + return false; } - if (!_upb_DescState_Grow(&s, a)) return false; - s.ptr = upb_MtDataEncoder_EndEnum(&s.e, s.ptr); + upb_MiniTableSub* table_sub = + (void*)&table->subs[field->UPB_PRIVATE(submsg_index)]; + // TODO(haberman): Add this assert back once YouTube is updated to not call + // this function repeatedly. + // UPB_ASSERT(table_sub->submsg == &_kUpb_MiniTable_Empty); + table_sub->submsg = sub; + return true; +} - // There will always be room for this '\0' in the encoder buffer because - // kUpb_MtDataEncoder_MinSize is overkill for upb_MtDataEncoder_EndEnum(). - UPB_ASSERT(s.ptr < s.buf + s.bufsize); - *s.ptr = '\0'; +bool upb_MiniTable_SetSubEnum(upb_MiniTable* table, upb_MiniTableField* field, + const upb_MiniTableEnum* sub) { + UPB_ASSERT((uintptr_t)table->fields <= (uintptr_t)field && + (uintptr_t)field < + (uintptr_t)(table->fields + table->field_count)); + UPB_ASSERT(sub); - out->data = s.buf; - out->size = s.ptr - s.buf; + upb_MiniTableSub* table_sub = + (void*)&table->subs[field->UPB_PRIVATE(submsg_index)]; + table_sub->subenum = sub; return true; } -static upb_MiniTableEnum* create_enumlayout(upb_DefBuilder* ctx, - const upb_EnumDef* e) { - upb_StringView sv; - bool ok = upb_EnumDef_MiniDescriptorEncode(e, ctx->tmp_arena, &sv); - if (!ok) _upb_DefBuilder_Errf(ctx, "OOM while building enum MiniDescriptor"); +uint32_t upb_MiniTable_GetSubList(const upb_MiniTable* mt, + const upb_MiniTableField** subs) { + uint32_t msg_count = 0; + uint32_t enum_count = 0; - upb_Status status; - upb_MiniTableEnum* layout = - upb_MiniTableEnum_Build(sv.data, sv.size, ctx->arena, &status); - if (!layout) - _upb_DefBuilder_Errf(ctx, "Error building enum MiniTable: %s", status.msg); - return layout; -} + for (int i = 0; i < mt->field_count; i++) { + const upb_MiniTableField* f = &mt->fields[i]; + if (upb_MiniTableField_CType(f) == kUpb_CType_Message) { + *subs = f; + ++subs; + msg_count++; + } + } -static upb_StringView* _upb_EnumReservedNames_New( - upb_DefBuilder* ctx, int n, const upb_StringView* protos) { - upb_StringView* sv = _upb_DefBuilder_Alloc(ctx, sizeof(upb_StringView) * n); - for (size_t i = 0; i < n; i++) { - sv[i].data = - upb_strdup2(protos[i].data, protos[i].size, _upb_DefBuilder_Arena(ctx)); - sv[i].size = protos[i].size; + for (int i = 0; i < mt->field_count; i++) { + const upb_MiniTableField* f = &mt->fields[i]; + if (upb_MiniTableField_CType(f) == kUpb_CType_Enum) { + *subs = f; + ++subs; + enum_count++; + } } - return sv; -} -static void create_enumdef(upb_DefBuilder* ctx, const char* prefix, - const UPB_DESC(EnumDescriptorProto) * enum_proto, - upb_EnumDef* e) { - const UPB_DESC(EnumValueDescriptorProto)* const* values; - const UPB_DESC(EnumDescriptorProto_EnumReservedRange)* const* res_ranges; - const upb_StringView* res_names; - upb_StringView name; - size_t n_value, n_res_range, n_res_name; + return (msg_count << 16) | enum_count; +} - // Must happen before _upb_DefBuilder_Add() - e->file = _upb_DefBuilder_File(ctx); +// The list of sub_tables and sub_enums must exactly match the number and order +// of sub-message fields and sub-enum fields given by upb_MiniTable_GetSubList() +// above. +bool upb_MiniTable_Link(upb_MiniTable* mt, const upb_MiniTable** sub_tables, + size_t sub_table_count, + const upb_MiniTableEnum** sub_enums, + size_t sub_enum_count) { + uint32_t msg_count = 0; + uint32_t enum_count = 0; - name = UPB_DESC(EnumDescriptorProto_name)(enum_proto); + for (int i = 0; i < mt->field_count; i++) { + upb_MiniTableField* f = (upb_MiniTableField*)&mt->fields[i]; + if (upb_MiniTableField_CType(f) == kUpb_CType_Message) { + const upb_MiniTable* sub = sub_tables[msg_count++]; + if (msg_count > sub_table_count) return false; + if (sub != NULL) { + if (!upb_MiniTable_SetSubMessage(mt, f, sub)) return false; + } + } + } - e->full_name = _upb_DefBuilder_MakeFullName(ctx, prefix, name); - _upb_DefBuilder_Add(ctx, e->full_name, - _upb_DefType_Pack(e, UPB_DEFTYPE_ENUM)); + for (int i = 0; i < mt->field_count; i++) { + upb_MiniTableField* f = (upb_MiniTableField*)&mt->fields[i]; + if (upb_MiniTableField_IsClosedEnum(f)) { + const upb_MiniTableEnum* sub = sub_enums[enum_count++]; + if (enum_count > sub_enum_count) return false; + if (sub != NULL) { + if (!upb_MiniTable_SetSubEnum(mt, f, sub)) return false; + } + } + } - e->is_closed = (!UPB_TREAT_PROTO2_ENUMS_LIKE_PROTO3) && - (upb_FileDef_Syntax(e->file) == kUpb_Syntax_Proto2); + return true; +} - values = UPB_DESC(EnumDescriptorProto_value)(enum_proto, &n_value); - bool ok = upb_strtable_init(&e->ntoi, n_value, ctx->arena); - if (!ok) _upb_DefBuilder_OomErr(ctx); - ok = upb_inttable_init(&e->iton, ctx->arena); - if (!ok) _upb_DefBuilder_OomErr(ctx); +// Must be last. - e->defaultval = 0; - e->value_count = n_value; - e->values = - _upb_EnumValueDefs_New(ctx, prefix, n_value, values, e, &e->is_sorted); +typedef struct { + uint64_t present_values_mask; + uint32_t last_written_value; +} upb_MtDataEncoderInternal_EnumState; - if (n_value == 0) { - _upb_DefBuilder_Errf(ctx, "enums must contain at least one value (%s)", - e->full_name); - } +typedef struct { + uint64_t msg_modifiers; + uint32_t last_field_num; + enum { + kUpb_OneofState_NotStarted, + kUpb_OneofState_StartedOneof, + kUpb_OneofState_EmittedOneofField, + } oneof_state; +} upb_MtDataEncoderInternal_MsgState; - res_ranges = - UPB_DESC(EnumDescriptorProto_reserved_range)(enum_proto, &n_res_range); - e->res_range_count = n_res_range; - e->res_ranges = _upb_EnumReservedRanges_New(ctx, n_res_range, res_ranges, e); +typedef struct { + char* buf_start; // Only for checking kUpb_MtDataEncoder_MinSize. + union { + upb_MtDataEncoderInternal_EnumState enum_state; + upb_MtDataEncoderInternal_MsgState msg_state; + } state; +} upb_MtDataEncoderInternal; - res_names = - UPB_DESC(EnumDescriptorProto_reserved_name)(enum_proto, &n_res_name); - e->res_name_count = n_res_name; - e->res_names = _upb_EnumReservedNames_New(ctx, n_res_name, res_names); +static upb_MtDataEncoderInternal* upb_MtDataEncoder_GetInternal( + upb_MtDataEncoder* e, char* buf_start) { + UPB_ASSERT(sizeof(upb_MtDataEncoderInternal) <= sizeof(e->internal)); + upb_MtDataEncoderInternal* ret = (upb_MtDataEncoderInternal*)e->internal; + ret->buf_start = buf_start; + return ret; +} - UPB_DEF_SET_OPTIONS(e->opts, EnumDescriptorProto, EnumOptions, enum_proto); +static char* upb_MtDataEncoder_PutRaw(upb_MtDataEncoder* e, char* ptr, + char ch) { + upb_MtDataEncoderInternal* in = (upb_MtDataEncoderInternal*)e->internal; + UPB_ASSERT(ptr - in->buf_start < kUpb_MtDataEncoder_MinSize); + if (ptr == e->end) return NULL; + *ptr++ = ch; + return ptr; +} - upb_inttable_compact(&e->iton, ctx->arena); +static char* upb_MtDataEncoder_Put(upb_MtDataEncoder* e, char* ptr, char ch) { + return upb_MtDataEncoder_PutRaw(e, ptr, _upb_ToBase92(ch)); +} - if (e->is_closed) { - if (ctx->layout) { - UPB_ASSERT(ctx->enum_count < ctx->layout->enum_count); - e->layout = ctx->layout->enums[ctx->enum_count++]; - } else { - e->layout = create_enumlayout(ctx, e); - } - } else { - e->layout = NULL; - } +static char* upb_MtDataEncoder_PutBase92Varint(upb_MtDataEncoder* e, char* ptr, + uint32_t val, int min, int max) { + int shift = upb_Log2Ceiling(_upb_FromBase92(max) - _upb_FromBase92(min) + 1); + UPB_ASSERT(shift <= 6); + uint32_t mask = (1 << shift) - 1; + do { + uint32_t bits = val & mask; + ptr = upb_MtDataEncoder_Put(e, ptr, bits + _upb_FromBase92(min)); + if (!ptr) return NULL; + val >>= shift; + } while (val); + return ptr; } -upb_EnumDef* _upb_EnumDefs_New( - upb_DefBuilder* ctx, int n, - const UPB_DESC(EnumDescriptorProto) * const* protos, - const upb_MessageDef* containing_type) { - _upb_DefType_CheckPadding(sizeof(upb_EnumDef)); +char* upb_MtDataEncoder_PutModifier(upb_MtDataEncoder* e, char* ptr, + uint64_t mod) { + if (mod) { + ptr = upb_MtDataEncoder_PutBase92Varint(e, ptr, mod, + kUpb_EncodedValue_MinModifier, + kUpb_EncodedValue_MaxModifier); + } + return ptr; +} - // If a containing type is defined then get the full name from that. - // Otherwise use the package name from the file def. - const char* name = containing_type ? upb_MessageDef_FullName(containing_type) - : _upb_FileDef_RawPackage(ctx->file); +char* upb_MtDataEncoder_EncodeExtension(upb_MtDataEncoder* e, char* ptr, + upb_FieldType type, uint32_t field_num, + uint64_t field_mod) { + upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr); + in->state.msg_state.msg_modifiers = 0; + in->state.msg_state.last_field_num = 0; + in->state.msg_state.oneof_state = kUpb_OneofState_NotStarted; - upb_EnumDef* e = _upb_DefBuilder_Alloc(ctx, sizeof(upb_EnumDef) * n); - for (size_t i = 0; i < n; i++) { - create_enumdef(ctx, name, protos[i], &e[i]); - e[i].containing_type = containing_type; - } - return e; + ptr = upb_MtDataEncoder_PutRaw(e, ptr, kUpb_EncodedVersion_ExtensionV1); + if (!ptr) return NULL; + + return upb_MtDataEncoder_PutField(e, ptr, type, field_num, field_mod); } -// #include "upb/reflection/extension_range_internal.h" -// #include "upb/reflection/message_def.h" +char* upb_MtDataEncoder_EncodeMap(upb_MtDataEncoder* e, char* ptr, + upb_FieldType key_type, + upb_FieldType value_type, uint64_t key_mod, + uint64_t value_mod) { + upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr); + in->state.msg_state.msg_modifiers = 0; + in->state.msg_state.last_field_num = 0; + in->state.msg_state.oneof_state = kUpb_OneofState_NotStarted; -// Must be last. + ptr = upb_MtDataEncoder_PutRaw(e, ptr, kUpb_EncodedVersion_MapV1); + if (!ptr) return NULL; -struct upb_EnumReservedRange { - int32_t start; - int32_t end; -}; + ptr = upb_MtDataEncoder_PutField(e, ptr, key_type, 1, key_mod); + if (!ptr) return NULL; -upb_EnumReservedRange* _upb_EnumReservedRange_At(const upb_EnumReservedRange* r, - int i) { - return (upb_EnumReservedRange*)&r[i]; + return upb_MtDataEncoder_PutField(e, ptr, value_type, 2, value_mod); } -int32_t upb_EnumReservedRange_Start(const upb_EnumReservedRange* r) { - return r->start; -} -int32_t upb_EnumReservedRange_End(const upb_EnumReservedRange* r) { - return r->end; +char* upb_MtDataEncoder_EncodeMessageSet(upb_MtDataEncoder* e, char* ptr) { + (void)upb_MtDataEncoder_GetInternal(e, ptr); + return upb_MtDataEncoder_PutRaw(e, ptr, kUpb_EncodedVersion_MessageSetV1); } -upb_EnumReservedRange* _upb_EnumReservedRanges_New( - upb_DefBuilder* ctx, int n, - const UPB_DESC(EnumDescriptorProto_EnumReservedRange) * const* protos, - const upb_EnumDef* e) { - upb_EnumReservedRange* r = - _upb_DefBuilder_Alloc(ctx, sizeof(upb_EnumReservedRange) * n); - - for (int i = 0; i < n; i++) { - const int32_t start = - UPB_DESC(EnumDescriptorProto_EnumReservedRange_start)(protos[i]); - const int32_t end = - UPB_DESC(EnumDescriptorProto_EnumReservedRange_end)(protos[i]); +char* upb_MtDataEncoder_StartMessage(upb_MtDataEncoder* e, char* ptr, + uint64_t msg_mod) { + upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr); + in->state.msg_state.msg_modifiers = msg_mod; + in->state.msg_state.last_field_num = 0; + in->state.msg_state.oneof_state = kUpb_OneofState_NotStarted; - // A full validation would also check that each range is disjoint, and that - // none of the fields overlap with the extension ranges, but we are just - // sanity checking here. + ptr = upb_MtDataEncoder_PutRaw(e, ptr, kUpb_EncodedVersion_MessageV1); + if (!ptr) return NULL; - // Note: Not a typo! Unlike extension ranges and message reserved ranges, - // the end value of an enum reserved range is *inclusive*! - if (end < start) { - _upb_DefBuilder_Errf(ctx, "Reserved range (%d, %d) is invalid, enum=%s\n", - (int)start, (int)end, upb_EnumDef_FullName(e)); - } + return upb_MtDataEncoder_PutModifier(e, ptr, msg_mod); +} - r[i].start = start; - r[i].end = end; +static char* _upb_MtDataEncoder_MaybePutFieldSkip(upb_MtDataEncoder* e, + char* ptr, + uint32_t field_num) { + upb_MtDataEncoderInternal* in = (upb_MtDataEncoderInternal*)e->internal; + if (field_num <= in->state.msg_state.last_field_num) return NULL; + if (in->state.msg_state.last_field_num + 1 != field_num) { + // Put skip. + UPB_ASSERT(field_num > in->state.msg_state.last_field_num); + uint32_t skip = field_num - in->state.msg_state.last_field_num; + ptr = upb_MtDataEncoder_PutBase92Varint( + e, ptr, skip, kUpb_EncodedValue_MinSkip, kUpb_EncodedValue_MaxSkip); + if (!ptr) return NULL; } - - return r; + in->state.msg_state.last_field_num = field_num; + return ptr; } +static char* _upb_MtDataEncoder_PutFieldType(upb_MtDataEncoder* e, char* ptr, + upb_FieldType type, + uint64_t field_mod) { + static const char kUpb_TypeToEncoded[] = { + [kUpb_FieldType_Double] = kUpb_EncodedType_Double, + [kUpb_FieldType_Float] = kUpb_EncodedType_Float, + [kUpb_FieldType_Int64] = kUpb_EncodedType_Int64, + [kUpb_FieldType_UInt64] = kUpb_EncodedType_UInt64, + [kUpb_FieldType_Int32] = kUpb_EncodedType_Int32, + [kUpb_FieldType_Fixed64] = kUpb_EncodedType_Fixed64, + [kUpb_FieldType_Fixed32] = kUpb_EncodedType_Fixed32, + [kUpb_FieldType_Bool] = kUpb_EncodedType_Bool, + [kUpb_FieldType_String] = kUpb_EncodedType_String, + [kUpb_FieldType_Group] = kUpb_EncodedType_Group, + [kUpb_FieldType_Message] = kUpb_EncodedType_Message, + [kUpb_FieldType_Bytes] = kUpb_EncodedType_Bytes, + [kUpb_FieldType_UInt32] = kUpb_EncodedType_UInt32, + [kUpb_FieldType_Enum] = kUpb_EncodedType_OpenEnum, + [kUpb_FieldType_SFixed32] = kUpb_EncodedType_SFixed32, + [kUpb_FieldType_SFixed64] = kUpb_EncodedType_SFixed64, + [kUpb_FieldType_SInt32] = kUpb_EncodedType_SInt32, + [kUpb_FieldType_SInt64] = kUpb_EncodedType_SInt64, + }; + + int encoded_type = kUpb_TypeToEncoded[type]; -// Must be last. + if (field_mod & kUpb_FieldModifier_IsClosedEnum) { + UPB_ASSERT(type == kUpb_FieldType_Enum); + encoded_type = kUpb_EncodedType_ClosedEnum; + } -struct upb_EnumValueDef { - const UPB_DESC(EnumValueOptions) * opts; - const upb_EnumDef* parent; - const char* full_name; - int32_t number; -}; + if (field_mod & kUpb_FieldModifier_IsRepeated) { + // Repeated fields shift the type number up (unlike other modifiers which + // are bit flags). + encoded_type += kUpb_EncodedType_RepeatedBase; + } -upb_EnumValueDef* _upb_EnumValueDef_At(const upb_EnumValueDef* v, int i) { - return (upb_EnumValueDef*)&v[i]; + return upb_MtDataEncoder_Put(e, ptr, encoded_type); } -static int _upb_EnumValueDef_Compare(const void* p1, const void* p2) { - const uint32_t v1 = (*(const upb_EnumValueDef**)p1)->number; - const uint32_t v2 = (*(const upb_EnumValueDef**)p2)->number; - return (v1 < v2) ? -1 : (v1 > v2); -} +static char* _upb_MtDataEncoder_MaybePutModifiers(upb_MtDataEncoder* e, + char* ptr, upb_FieldType type, + uint64_t field_mod) { + upb_MtDataEncoderInternal* in = (upb_MtDataEncoderInternal*)e->internal; + uint32_t encoded_modifiers = 0; + if ((field_mod & kUpb_FieldModifier_IsRepeated) && + upb_FieldType_IsPackable(type)) { + bool field_is_packed = field_mod & kUpb_FieldModifier_IsPacked; + bool default_is_packed = in->state.msg_state.msg_modifiers & + kUpb_MessageModifier_DefaultIsPacked; + if (field_is_packed != default_is_packed) { + encoded_modifiers |= kUpb_EncodedFieldModifier_FlipPacked; + } + } -const upb_EnumValueDef** _upb_EnumValueDefs_Sorted(const upb_EnumValueDef* v, - int n, upb_Arena* a) { - // TODO: Try to replace this arena alloc with a persistent scratch buffer. - upb_EnumValueDef** out = - (upb_EnumValueDef**)upb_Arena_Malloc(a, n * sizeof(void*)); - if (!out) return NULL; + if (field_mod & kUpb_FieldModifier_IsProto3Singular) { + encoded_modifiers |= kUpb_EncodedFieldModifier_IsProto3Singular; + } - for (int i = 0; i < n; i++) { - out[i] = (upb_EnumValueDef*)&v[i]; + if (field_mod & kUpb_FieldModifier_IsRequired) { + encoded_modifiers |= kUpb_EncodedFieldModifier_IsRequired; } - qsort(out, n, sizeof(void*), _upb_EnumValueDef_Compare); - return (const upb_EnumValueDef**)out; + return upb_MtDataEncoder_PutModifier(e, ptr, encoded_modifiers); } -const UPB_DESC(EnumValueOptions) * - upb_EnumValueDef_Options(const upb_EnumValueDef* v) { - return v->opts; -} +char* upb_MtDataEncoder_PutField(upb_MtDataEncoder* e, char* ptr, + upb_FieldType type, uint32_t field_num, + uint64_t field_mod) { + upb_MtDataEncoder_GetInternal(e, ptr); -bool upb_EnumValueDef_HasOptions(const upb_EnumValueDef* v) { - return v->opts != (void*)kUpbDefOptDefault; -} + ptr = _upb_MtDataEncoder_MaybePutFieldSkip(e, ptr, field_num); + if (!ptr) return NULL; -const upb_EnumDef* upb_EnumValueDef_Enum(const upb_EnumValueDef* v) { - return v->parent; + ptr = _upb_MtDataEncoder_PutFieldType(e, ptr, type, field_mod); + if (!ptr) return NULL; + + return _upb_MtDataEncoder_MaybePutModifiers(e, ptr, type, field_mod); } -const char* upb_EnumValueDef_FullName(const upb_EnumValueDef* v) { - return v->full_name; +char* upb_MtDataEncoder_StartOneof(upb_MtDataEncoder* e, char* ptr) { + upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr); + if (in->state.msg_state.oneof_state == kUpb_OneofState_NotStarted) { + ptr = upb_MtDataEncoder_Put(e, ptr, _upb_FromBase92(kUpb_EncodedValue_End)); + } else { + ptr = upb_MtDataEncoder_Put( + e, ptr, _upb_FromBase92(kUpb_EncodedValue_OneofSeparator)); + } + in->state.msg_state.oneof_state = kUpb_OneofState_StartedOneof; + return ptr; } -const char* upb_EnumValueDef_Name(const upb_EnumValueDef* v) { - return _upb_DefBuilder_FullToShort(v->full_name); +char* upb_MtDataEncoder_PutOneofField(upb_MtDataEncoder* e, char* ptr, + uint32_t field_num) { + upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr); + if (in->state.msg_state.oneof_state == kUpb_OneofState_EmittedOneofField) { + ptr = upb_MtDataEncoder_Put( + e, ptr, _upb_FromBase92(kUpb_EncodedValue_FieldSeparator)); + if (!ptr) return NULL; + } + ptr = upb_MtDataEncoder_PutBase92Varint(e, ptr, field_num, _upb_ToBase92(0), + _upb_ToBase92(63)); + in->state.msg_state.oneof_state = kUpb_OneofState_EmittedOneofField; + return ptr; } -int32_t upb_EnumValueDef_Number(const upb_EnumValueDef* v) { return v->number; } +char* upb_MtDataEncoder_StartEnum(upb_MtDataEncoder* e, char* ptr) { + upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr); + in->state.enum_state.present_values_mask = 0; + in->state.enum_state.last_written_value = 0; -uint32_t upb_EnumValueDef_Index(const upb_EnumValueDef* v) { - // Compute index in our parent's array. - return v - upb_EnumDef_Value(v->parent, 0); + return upb_MtDataEncoder_PutRaw(e, ptr, kUpb_EncodedVersion_EnumV1); } -static void create_enumvaldef(upb_DefBuilder* ctx, const char* prefix, - const UPB_DESC(EnumValueDescriptorProto) * - val_proto, - upb_EnumDef* e, upb_EnumValueDef* v) { - upb_StringView name = UPB_DESC(EnumValueDescriptorProto_name)(val_proto); +static char* upb_MtDataEncoder_FlushDenseEnumMask(upb_MtDataEncoder* e, + char* ptr) { + upb_MtDataEncoderInternal* in = (upb_MtDataEncoderInternal*)e->internal; + ptr = upb_MtDataEncoder_Put(e, ptr, in->state.enum_state.present_values_mask); + in->state.enum_state.present_values_mask = 0; + in->state.enum_state.last_written_value += 5; + return ptr; +} - v->parent = e; // Must happen prior to _upb_DefBuilder_Add() - v->full_name = _upb_DefBuilder_MakeFullName(ctx, prefix, name); - v->number = UPB_DESC(EnumValueDescriptorProto_number)(val_proto); - _upb_DefBuilder_Add(ctx, v->full_name, - _upb_DefType_Pack(v, UPB_DEFTYPE_ENUMVAL)); +char* upb_MtDataEncoder_PutEnumValue(upb_MtDataEncoder* e, char* ptr, + uint32_t val) { + // TODO(b/229641772): optimize this encoding. + upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr); + UPB_ASSERT(val >= in->state.enum_state.last_written_value); + uint32_t delta = val - in->state.enum_state.last_written_value; + if (delta >= 5 && in->state.enum_state.present_values_mask) { + ptr = upb_MtDataEncoder_FlushDenseEnumMask(e, ptr); + if (!ptr) { + return NULL; + } + delta -= 5; + } - UPB_DEF_SET_OPTIONS(v->opts, EnumValueDescriptorProto, EnumValueOptions, - val_proto); + if (delta >= 5) { + ptr = upb_MtDataEncoder_PutBase92Varint( + e, ptr, delta, kUpb_EncodedValue_MinSkip, kUpb_EncodedValue_MaxSkip); + in->state.enum_state.last_written_value += delta; + delta = 0; + } - bool ok = _upb_EnumDef_Insert(e, v, ctx->arena); - if (!ok) _upb_DefBuilder_OomErr(ctx); + UPB_ASSERT((in->state.enum_state.present_values_mask >> delta) == 0); + in->state.enum_state.present_values_mask |= 1ULL << delta; + return ptr; } -// Allocate and initialize an array of |n| enum value defs owned by |e|. -upb_EnumValueDef* _upb_EnumValueDefs_New( - upb_DefBuilder* ctx, const char* prefix, int n, - const UPB_DESC(EnumValueDescriptorProto) * const* protos, upb_EnumDef* e, - bool* is_sorted) { - _upb_DefType_CheckPadding(sizeof(upb_EnumValueDef)); - - upb_EnumValueDef* v = - _upb_DefBuilder_Alloc(ctx, sizeof(upb_EnumValueDef) * n); +char* upb_MtDataEncoder_EndEnum(upb_MtDataEncoder* e, char* ptr) { + upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr); + if (!in->state.enum_state.present_values_mask) return ptr; + return upb_MtDataEncoder_FlushDenseEnumMask(e, ptr); +} - *is_sorted = true; - uint32_t previous = 0; - for (size_t i = 0; i < n; i++) { - create_enumvaldef(ctx, prefix, protos[i], e, &v[i]); - const uint32_t current = v[i].number; - if (previous > current) *is_sorted = false; - previous = current; - } +const char _kUpb_ToBase92[] = { + ' ', '!', '#', '$', '%', '&', '(', ')', '*', '+', ',', '-', '.', '/', + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<', '=', + '>', '?', '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', + 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', + 'Z', '[', ']', '^', '_', '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', + 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', + 'w', 'x', 'y', 'z', '{', '|', '}', '~', +}; - if (upb_FileDef_Syntax(ctx->file) == kUpb_Syntax_Proto3 && n > 0 && - v[0].number != 0) { - _upb_DefBuilder_Errf(ctx, - "for proto3, the first enum value must be zero (%s)", - upb_EnumDef_FullName(e)); - } +const int8_t _kUpb_FromBase92[] = { + 0, 1, -1, 2, 3, 4, 5, -1, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, + 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, + 55, 56, 57, -1, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, + 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, +}; - return v; -} // Must be last. -struct upb_ExtensionRange { - const UPB_DESC(ExtensionRangeOptions) * opts; - int32_t start; - int32_t end; +#define EXTREG_KEY_SIZE (sizeof(upb_MiniTable*) + sizeof(uint32_t)) + +struct upb_ExtensionRegistry { + upb_Arena* arena; + upb_strtable exts; // Key is upb_MiniTable* concatenated with fieldnum. }; -upb_ExtensionRange* _upb_ExtensionRange_At(const upb_ExtensionRange* r, int i) { - return (upb_ExtensionRange*)&r[i]; +static void extreg_key(char* buf, const upb_MiniTable* l, uint32_t fieldnum) { + memcpy(buf, &l, sizeof(l)); + memcpy(buf + sizeof(l), &fieldnum, sizeof(fieldnum)); } -const UPB_DESC(ExtensionRangeOptions) * - upb_ExtensionRange_Options(const upb_ExtensionRange* r) { - return r->opts; +upb_ExtensionRegistry* upb_ExtensionRegistry_New(upb_Arena* arena) { + upb_ExtensionRegistry* r = upb_Arena_Malloc(arena, sizeof(*r)); + if (!r) return NULL; + r->arena = arena; + if (!upb_strtable_init(&r->exts, 8, arena)) return NULL; + return r; } -bool upb_ExtensionRange_HasOptions(const upb_ExtensionRange* r) { - return r->opts != (void*)kUpbDefOptDefault; +UPB_API bool upb_ExtensionRegistry_Add(upb_ExtensionRegistry* r, + const upb_MiniTableExtension* e) { + char buf[EXTREG_KEY_SIZE]; + extreg_key(buf, e->extendee, e->field.number); + if (upb_strtable_lookup2(&r->exts, buf, EXTREG_KEY_SIZE, NULL)) return false; + return upb_strtable_insert(&r->exts, buf, EXTREG_KEY_SIZE, + upb_value_constptr(e), r->arena); } -int32_t upb_ExtensionRange_Start(const upb_ExtensionRange* r) { - return r->start; +bool upb_ExtensionRegistry_AddArray(upb_ExtensionRegistry* r, + const upb_MiniTableExtension** e, + size_t count) { + const upb_MiniTableExtension** start = e; + const upb_MiniTableExtension** end = UPB_PTRADD(e, count); + for (; e < end; e++) { + if (!upb_ExtensionRegistry_Add(r, *e)) goto failure; + } + return true; + +failure: + // Back out the entries previously added. + for (end = e, e = start; e < end; e++) { + const upb_MiniTableExtension* ext = *e; + char buf[EXTREG_KEY_SIZE]; + extreg_key(buf, ext->extendee, ext->field.number); + upb_strtable_remove2(&r->exts, buf, EXTREG_KEY_SIZE, NULL); + } + return false; } -int32_t upb_ExtensionRange_End(const upb_ExtensionRange* r) { return r->end; } +const upb_MiniTableExtension* upb_ExtensionRegistry_Lookup( + const upb_ExtensionRegistry* r, const upb_MiniTable* t, uint32_t num) { + char buf[EXTREG_KEY_SIZE]; + upb_value v; + extreg_key(buf, t, num); + if (upb_strtable_lookup2(&r->exts, buf, EXTREG_KEY_SIZE, &v)) { + return upb_value_getconstptr(v); + } else { + return NULL; + } +} -upb_ExtensionRange* _upb_ExtensionRanges_New( - upb_DefBuilder* ctx, int n, - const UPB_DESC(DescriptorProto_ExtensionRange) * const* protos, - const upb_MessageDef* m) { - upb_ExtensionRange* r = - _upb_DefBuilder_Alloc(ctx, sizeof(upb_ExtensionRange) * n); - for (int i = 0; i < n; i++) { - const int32_t start = - UPB_DESC(DescriptorProto_ExtensionRange_start)(protos[i]); - const int32_t end = UPB_DESC(DescriptorProto_ExtensionRange_end)(protos[i]); - const int32_t max = UPB_DESC(MessageOptions_message_set_wire_format)( - upb_MessageDef_Options(m)) - ? INT32_MAX - : kUpb_MaxFieldNumber + 1; +#include - // A full validation would also check that each range is disjoint, and that - // none of the fields overlap with the extension ranges, but we are just - // sanity checking here. - if (start < 1 || end <= start || end > max) { - _upb_DefBuilder_Errf(ctx, - "Extension range (%d, %d) is invalid, message=%s\n", - (int)start, (int)end, upb_MessageDef_FullName(m)); + +// Must be last. + +const upb_MiniTableField* upb_MiniTable_FindFieldByNumber( + const upb_MiniTable* t, uint32_t number) { + const size_t i = ((size_t)number) - 1; // 0 wraps to SIZE_MAX + + // Ideal case: index into dense fields + if (i < t->dense_below) { + UPB_ASSERT(t->fields[i].number == number); + return &t->fields[i]; + } + + // Slow case: binary search + int lo = t->dense_below; + int hi = t->field_count - 1; + while (lo <= hi) { + int mid = (lo + hi) / 2; + uint32_t num = t->fields[mid].number; + if (num < number) { + lo = mid + 1; + continue; + } + if (num > number) { + hi = mid - 1; + continue; } + return &t->fields[mid]; + } + return NULL; +} - r[i].start = start; - r[i].end = end; - UPB_DEF_SET_OPTIONS(r[i].opts, DescriptorProto_ExtensionRange, - ExtensionRangeOptions, protos[i]); +static bool upb_MiniTable_Is_Oneof(const upb_MiniTableField* f) { + return f->presence < 0; +} + +const upb_MiniTableField* upb_MiniTable_GetOneof(const upb_MiniTable* m, + const upb_MiniTableField* f) { + if (UPB_UNLIKELY(!upb_MiniTable_Is_Oneof(f))) { + return NULL; + } + const upb_MiniTableField* ptr = &m->fields[0]; + const upb_MiniTableField* end = &m->fields[m->field_count]; + while (++ptr < end) { + if (ptr->presence == (*f).presence) { + return ptr; + } } + return NULL; +} - return r; +bool upb_MiniTable_NextOneofField(const upb_MiniTable* m, + const upb_MiniTableField** f) { + const upb_MiniTableField* ptr = *f; + const upb_MiniTableField* end = &m->fields[m->field_count]; + while (++ptr < end) { + if (ptr->presence == (*f)->presence) { + *f = ptr; + return true; + } + } + return false; } -#include -#include +const struct upb_MiniTable _kUpb_MiniTable_Empty = { + .subs = NULL, + .fields = NULL, + .size = 0, + .field_count = 0, + .ext = kUpb_ExtMode_NonExtendable, + .dense_below = 0, + .table_mask = -1, + .required_count = 0, +}; -// Must be last. -#define UPB_FIELD_TYPE_UNSPECIFIED 0 +#include -typedef struct { - size_t len; - char str[1]; // Null-terminated string data follows. -} str_t; -struct upb_FieldDef { - const UPB_DESC(FieldOptions) * opts; - const upb_FileDef* file; - const upb_MessageDef* msgdef; - const char* full_name; - const char* json_name; - union { - int64_t sint; - uint64_t uint; - double dbl; - float flt; - bool boolean; - str_t* str; - void* msg; // Always NULL. - } defaultval; - union { - const upb_OneofDef* oneof; - const upb_MessageDef* extension_scope; - } scope; - union { - const upb_MessageDef* msgdef; - const upb_EnumDef* enumdef; - const UPB_DESC(FieldDescriptorProto) * unresolved; - } sub; - uint32_t number_; - uint16_t index_; - uint16_t layout_index; // Index into msgdef->layout->fields or file->exts - bool has_default; - bool has_json_name; - bool has_presence; - bool is_extension; - bool is_packed; - bool is_proto3_optional; - upb_FieldType type_; - upb_Label label_; -#if UINTPTR_MAX == 0xffffffff - uint32_t padding; // Increase size to a multiple of 8. -#endif -}; +// Must be last. -upb_FieldDef* _upb_FieldDef_At(const upb_FieldDef* f, int i) { - return (upb_FieldDef*)&f[i]; -} +/* The upb core does not generally have a concept of default instances. However + * for descriptor options we make an exception since the max size is known and + * modest (<200 bytes). All types can share a default instance since it is + * initialized to zeroes. + * + * We have to allocate an extra pointer for upb's internal metadata. */ +static const char opt_default_buf[_UPB_MAXOPT_SIZE + sizeof(void*)] = {0}; +const char* kUpbDefOptDefault = &opt_default_buf[sizeof(void*)]; -const UPB_DESC(FieldOptions) * upb_FieldDef_Options(const upb_FieldDef* f) { - return f->opts; -} +const char* _upb_DefBuilder_FullToShort(const char* fullname) { + const char* p; -bool upb_FieldDef_HasOptions(const upb_FieldDef* f) { - return f->opts != (void*)kUpbDefOptDefault; + if (fullname == NULL) { + return NULL; + } else if ((p = strrchr(fullname, '.')) == NULL) { + /* No '.' in the name, return the full string. */ + return fullname; + } else { + /* Return one past the last '.'. */ + return p + 1; + } } -const char* upb_FieldDef_FullName(const upb_FieldDef* f) { - return f->full_name; +void _upb_DefBuilder_FailJmp(upb_DefBuilder* ctx) { UPB_LONGJMP(ctx->err, 1); } + +void _upb_DefBuilder_Errf(upb_DefBuilder* ctx, const char* fmt, ...) { + va_list argp; + va_start(argp, fmt); + upb_Status_VSetErrorFormat(ctx->status, fmt, argp); + va_end(argp); + _upb_DefBuilder_FailJmp(ctx); } -upb_CType upb_FieldDef_CType(const upb_FieldDef* f) { - switch (f->type_) { - case kUpb_FieldType_Double: - return kUpb_CType_Double; - case kUpb_FieldType_Float: - return kUpb_CType_Float; - case kUpb_FieldType_Int64: - case kUpb_FieldType_SInt64: - case kUpb_FieldType_SFixed64: - return kUpb_CType_Int64; - case kUpb_FieldType_Int32: - case kUpb_FieldType_SFixed32: - case kUpb_FieldType_SInt32: - return kUpb_CType_Int32; - case kUpb_FieldType_UInt64: - case kUpb_FieldType_Fixed64: - return kUpb_CType_UInt64; - case kUpb_FieldType_UInt32: - case kUpb_FieldType_Fixed32: - return kUpb_CType_UInt32; - case kUpb_FieldType_Enum: - return kUpb_CType_Enum; - case kUpb_FieldType_Bool: - return kUpb_CType_Bool; - case kUpb_FieldType_String: - return kUpb_CType_String; - case kUpb_FieldType_Bytes: - return kUpb_CType_Bytes; - case kUpb_FieldType_Group: - case kUpb_FieldType_Message: - return kUpb_CType_Message; - } - UPB_UNREACHABLE(); +void _upb_DefBuilder_OomErr(upb_DefBuilder* ctx) { + upb_Status_SetErrorMessage(ctx->status, "out of memory"); + _upb_DefBuilder_FailJmp(ctx); } -upb_FieldType upb_FieldDef_Type(const upb_FieldDef* f) { return f->type_; } +// Verify a relative identifier string. The loop is branchless for speed. +static void _upb_DefBuilder_CheckIdentNotFull(upb_DefBuilder* ctx, + upb_StringView name) { + bool good = name.size > 0; -uint32_t upb_FieldDef_Index(const upb_FieldDef* f) { return f->index_; } + for (size_t i = 0; i < name.size; i++) { + const char c = name.data[i]; + const char d = c | 0x20; // force lowercase + const bool is_alpha = (('a' <= d) & (d <= 'z')) | (c == '_'); + const bool is_numer = ('0' <= c) & (c <= '9') & (i != 0); -upb_Label upb_FieldDef_Label(const upb_FieldDef* f) { return f->label_; } + good &= is_alpha | is_numer; + } -uint32_t upb_FieldDef_Number(const upb_FieldDef* f) { return f->number_; } + if (!good) _upb_DefBuilder_CheckIdentSlow(ctx, name, false); +} -bool upb_FieldDef_IsExtension(const upb_FieldDef* f) { return f->is_extension; } +const char* _upb_DefBuilder_MakeFullName(upb_DefBuilder* ctx, + const char* prefix, + upb_StringView name) { + _upb_DefBuilder_CheckIdentNotFull(ctx, name); + if (prefix) { + // ret = prefix + '.' + name; + size_t n = strlen(prefix); + char* ret = _upb_DefBuilder_Alloc(ctx, n + name.size + 2); + strcpy(ret, prefix); + ret[n] = '.'; + memcpy(&ret[n + 1], name.data, name.size); + ret[n + 1 + name.size] = '\0'; + return ret; + } else { + char* ret = upb_strdup2(name.data, name.size, ctx->arena); + if (!ret) _upb_DefBuilder_OomErr(ctx); + return ret; + } +} -bool upb_FieldDef_IsPacked(const upb_FieldDef* f) { return f->is_packed; } +static bool remove_component(char* base, size_t* len) { + if (*len == 0) return false; -const char* upb_FieldDef_Name(const upb_FieldDef* f) { - return _upb_DefBuilder_FullToShort(f->full_name); + for (size_t i = *len - 1; i > 0; i--) { + if (base[i] == '.') { + *len = i; + return true; + } + } + + *len = 0; + return true; } -const char* upb_FieldDef_JsonName(const upb_FieldDef* f) { - return f->json_name; +const void* _upb_DefBuilder_ResolveAny(upb_DefBuilder* ctx, + const char* from_name_dbg, + const char* base, upb_StringView sym, + upb_deftype_t* type) { + if (sym.size == 0) goto notfound; + upb_value v; + if (sym.data[0] == '.') { + /* Symbols starting with '.' are absolute, so we do a single lookup. + * Slice to omit the leading '.' */ + if (!_upb_DefPool_LookupSym(ctx->symtab, sym.data + 1, sym.size - 1, &v)) { + goto notfound; + } + } else { + /* Remove components from base until we find an entry or run out. */ + size_t baselen = base ? strlen(base) : 0; + char* tmp = malloc(sym.size + baselen + 1); + while (1) { + char* p = tmp; + if (baselen) { + memcpy(p, base, baselen); + p[baselen] = '.'; + p += baselen + 1; + } + memcpy(p, sym.data, sym.size); + p += sym.size; + if (_upb_DefPool_LookupSym(ctx->symtab, tmp, p - tmp, &v)) { + break; + } + if (!remove_component(tmp, &baselen)) { + free(tmp); + goto notfound; + } + } + free(tmp); + } + + *type = _upb_DefType_Type(v); + return _upb_DefType_Unpack(v, *type); + +notfound: + _upb_DefBuilder_Errf(ctx, "couldn't resolve name '" UPB_STRINGVIEW_FORMAT "'", + UPB_STRINGVIEW_ARGS(sym)); } -bool upb_FieldDef_HasJsonName(const upb_FieldDef* f) { - return f->has_json_name; +const void* _upb_DefBuilder_Resolve(upb_DefBuilder* ctx, + const char* from_name_dbg, const char* base, + upb_StringView sym, upb_deftype_t type) { + upb_deftype_t found_type; + const void* ret = + _upb_DefBuilder_ResolveAny(ctx, from_name_dbg, base, sym, &found_type); + if (ret && found_type != type) { + _upb_DefBuilder_Errf(ctx, + "type mismatch when resolving %s: couldn't find " + "name " UPB_STRINGVIEW_FORMAT " with type=%d", + from_name_dbg, UPB_STRINGVIEW_ARGS(sym), (int)type); + } + return ret; } -const upb_FileDef* upb_FieldDef_File(const upb_FieldDef* f) { return f->file; } +// Per ASCII this will lower-case a letter. If the result is a letter, the +// input was definitely a letter. If the output is not a letter, this may +// have transformed the character unpredictably. +static char upb_ascii_lower(char ch) { return ch | 0x20; } -const upb_MessageDef* upb_FieldDef_ContainingType(const upb_FieldDef* f) { - return f->msgdef; +// isalpha() etc. from are locale-dependent, which we don't want. +static bool upb_isbetween(uint8_t c, uint8_t low, uint8_t high) { + return low <= c && c <= high; } -const upb_MessageDef* upb_FieldDef_ExtensionScope(const upb_FieldDef* f) { - return f->is_extension ? f->scope.extension_scope : NULL; +static bool upb_isletter(char c) { + char lower = upb_ascii_lower(c); + return upb_isbetween(lower, 'a', 'z') || c == '_'; } -const upb_OneofDef* upb_FieldDef_ContainingOneof(const upb_FieldDef* f) { - return f->is_extension ? NULL : f->scope.oneof; +static bool upb_isalphanum(char c) { + return upb_isletter(c) || upb_isbetween(c, '0', '9'); } -const upb_OneofDef* upb_FieldDef_RealContainingOneof(const upb_FieldDef* f) { - const upb_OneofDef* oneof = upb_FieldDef_ContainingOneof(f); - if (!oneof || upb_OneofDef_IsSynthetic(oneof)) return NULL; - return oneof; +static bool TryGetChar(const char** src, const char* end, char* ch) { + if (*src == end) return false; + *ch = **src; + *src += 1; + return true; } -upb_MessageValue upb_FieldDef_Default(const upb_FieldDef* f) { - upb_MessageValue ret; - - if (upb_FieldDef_IsRepeated(f) || upb_FieldDef_IsSubMessage(f)) { - return (upb_MessageValue){.msg_val = NULL}; +static int TryGetHexDigit(const char** src, const char* end) { + char ch; + if (!TryGetChar(src, end, &ch)) return -1; + if ('0' <= ch && ch <= '9') { + return ch - '0'; } - - switch (upb_FieldDef_CType(f)) { - case kUpb_CType_Bool: - return (upb_MessageValue){.bool_val = f->defaultval.boolean}; - case kUpb_CType_Int64: - return (upb_MessageValue){.int64_val = f->defaultval.sint}; - case kUpb_CType_UInt64: - return (upb_MessageValue){.uint64_val = f->defaultval.uint}; - case kUpb_CType_Enum: - case kUpb_CType_Int32: - return (upb_MessageValue){.int32_val = (int32_t)f->defaultval.sint}; - case kUpb_CType_UInt32: - return (upb_MessageValue){.uint32_val = (uint32_t)f->defaultval.uint}; - case kUpb_CType_Float: - return (upb_MessageValue){.float_val = f->defaultval.flt}; - case kUpb_CType_Double: - return (upb_MessageValue){.double_val = f->defaultval.dbl}; - case kUpb_CType_String: - case kUpb_CType_Bytes: { - str_t* str = f->defaultval.str; - if (str) { - return (upb_MessageValue){ - .str_val = (upb_StringView){.data = str->str, .size = str->len}}; - } else { - return (upb_MessageValue){ - .str_val = (upb_StringView){.data = NULL, .size = 0}}; - } - } - default: - UPB_UNREACHABLE(); + ch = upb_ascii_lower(ch); + if ('a' <= ch && ch <= 'f') { + return ch - 'a' + 0xa; } + *src -= 1; // Char wasn't actually a hex digit. + return -1; +} +static char upb_DefBuilder_ParseHexEscape(upb_DefBuilder* ctx, + const upb_FieldDef* f, + const char** src, const char* end) { + int hex_digit = TryGetHexDigit(src, end); + if (hex_digit < 0) { + _upb_DefBuilder_Errf( + ctx, "\\x must be followed by at least one hex digit (field='%s')", + upb_FieldDef_FullName(f)); + return 0; + } + unsigned int ret = hex_digit; + while ((hex_digit = TryGetHexDigit(src, end)) >= 0) { + ret = (ret << 4) | hex_digit; + } + if (ret > 0xff) { + _upb_DefBuilder_Errf(ctx, "Value of hex escape in field %s exceeds 8 bits", + upb_FieldDef_FullName(f)); + return 0; + } return ret; } -const upb_MessageDef* upb_FieldDef_MessageSubDef(const upb_FieldDef* f) { - return upb_FieldDef_CType(f) == kUpb_CType_Message ? f->sub.msgdef : NULL; +static char TryGetOctalDigit(const char** src, const char* end) { + char ch; + if (!TryGetChar(src, end, &ch)) return -1; + if ('0' <= ch && ch <= '7') { + return ch - '0'; + } + *src -= 1; // Char wasn't actually an octal digit. + return -1; } -const upb_EnumDef* upb_FieldDef_EnumSubDef(const upb_FieldDef* f) { - return upb_FieldDef_CType(f) == kUpb_CType_Enum ? f->sub.enumdef : NULL; +static char upb_DefBuilder_ParseOctalEscape(upb_DefBuilder* ctx, + const upb_FieldDef* f, + const char** src, const char* end) { + char ch = 0; + for (int i = 0; i < 3; i++) { + char digit; + if ((digit = TryGetOctalDigit(src, end)) >= 0) { + ch = (ch << 3) | digit; + } + } + return ch; } -const upb_MiniTableField* upb_FieldDef_MiniTable(const upb_FieldDef* f) { - if (upb_FieldDef_IsExtension(f)) { - const upb_FileDef* file = upb_FieldDef_File(f); - return (upb_MiniTableField*)_upb_FileDef_ExtensionMiniTable( - file, f->layout_index); - } else { - const upb_MiniTable* layout = upb_MessageDef_MiniTable(f->msgdef); - return &layout->fields[f->layout_index]; +char _upb_DefBuilder_ParseEscape(upb_DefBuilder* ctx, const upb_FieldDef* f, + const char** src, const char* end) { + char ch; + if (!TryGetChar(src, end, &ch)) { + _upb_DefBuilder_Errf(ctx, "unterminated escape sequence in field %s", + upb_FieldDef_FullName(f)); + return 0; + } + switch (ch) { + case 'a': + return '\a'; + case 'b': + return '\b'; + case 'f': + return '\f'; + case 'n': + return '\n'; + case 'r': + return '\r'; + case 't': + return '\t'; + case 'v': + return '\v'; + case '\\': + return '\\'; + case '\'': + return '\''; + case '\"': + return '\"'; + case '?': + return '\?'; + case 'x': + case 'X': + return upb_DefBuilder_ParseHexEscape(ctx, f, src, end); + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + *src -= 1; + return upb_DefBuilder_ParseOctalEscape(ctx, f, src, end); } + _upb_DefBuilder_Errf(ctx, "Unknown escape sequence: \\%c", ch); } -const upb_MiniTableExtension* _upb_FieldDef_ExtensionMiniTable( - const upb_FieldDef* f) { - UPB_ASSERT(upb_FieldDef_IsExtension(f)); - const upb_FileDef* file = upb_FieldDef_File(f); - return _upb_FileDef_ExtensionMiniTable(file, f->layout_index); -} +void _upb_DefBuilder_CheckIdentSlow(upb_DefBuilder* ctx, upb_StringView name, + bool full) { + const char* str = name.data; + const size_t len = name.size; + bool start = true; + for (size_t i = 0; i < len; i++) { + const char c = str[i]; + if (c == '.') { + if (start || !full) { + _upb_DefBuilder_Errf( + ctx, "invalid name: unexpected '.' (" UPB_STRINGVIEW_FORMAT ")", + UPB_STRINGVIEW_ARGS(name)); + } + start = true; + } else if (start) { + if (!upb_isletter(c)) { + _upb_DefBuilder_Errf(ctx, + "invalid name: path components must start with a " + "letter (" UPB_STRINGVIEW_FORMAT ")", + UPB_STRINGVIEW_ARGS(name)); + } + start = false; + } else if (!upb_isalphanum(c)) { + _upb_DefBuilder_Errf( + ctx, + "invalid name: non-alphanumeric character (" UPB_STRINGVIEW_FORMAT + ")", + UPB_STRINGVIEW_ARGS(name)); + } + } + if (start) { + _upb_DefBuilder_Errf(ctx, + "invalid name: empty part (" UPB_STRINGVIEW_FORMAT ")", + UPB_STRINGVIEW_ARGS(name)); + } -bool _upb_FieldDef_IsClosedEnum(const upb_FieldDef* f) { - if (f->type_ != kUpb_FieldType_Enum) return false; - return upb_EnumDef_IsClosed(f->sub.enumdef); + // We should never reach this point. + UPB_ASSERT(false); } -bool _upb_FieldDef_IsProto3Optional(const upb_FieldDef* f) { - return f->is_proto3_optional; -} -int _upb_FieldDef_LayoutIndex(const upb_FieldDef* f) { return f->layout_index; } -uint64_t _upb_FieldDef_Modifiers(const upb_FieldDef* f) { - uint64_t out = f->is_packed ? kUpb_FieldModifier_IsPacked : 0; +// Must be last. - switch (f->label_) { - case kUpb_Label_Optional: - if (!upb_FieldDef_HasPresence(f)) { - out |= kUpb_FieldModifier_IsProto3Singular; - } - break; - case kUpb_Label_Repeated: - out |= kUpb_FieldModifier_IsRepeated; - break; - case kUpb_Label_Required: - out |= kUpb_FieldModifier_IsRequired; - break; - } +struct upb_DefPool { + upb_Arena* arena; + upb_strtable syms; // full_name -> packed def ptr + upb_strtable files; // file_name -> (upb_FileDef*) + upb_inttable exts; // (upb_MiniTableExtension*) -> (upb_FieldDef*) + upb_ExtensionRegistry* extreg; + upb_MiniTablePlatform platform; + void* scratch_data; + size_t scratch_size; + size_t bytes_loaded; +}; - if (_upb_FieldDef_IsClosedEnum(f)) { - out |= kUpb_FieldModifier_IsClosedEnum; - } - return out; +void upb_DefPool_Free(upb_DefPool* s) { + upb_Arena_Free(s->arena); + upb_gfree(s->scratch_data); + upb_gfree(s); } -bool upb_FieldDef_HasDefault(const upb_FieldDef* f) { return f->has_default; } -bool upb_FieldDef_HasPresence(const upb_FieldDef* f) { return f->has_presence; } +upb_DefPool* upb_DefPool_New(void) { + upb_DefPool* s = upb_gmalloc(sizeof(*s)); + if (!s) return NULL; -bool upb_FieldDef_HasSubDef(const upb_FieldDef* f) { - return upb_FieldDef_IsSubMessage(f) || - upb_FieldDef_CType(f) == kUpb_CType_Enum; -} + s->arena = upb_Arena_New(); + s->bytes_loaded = 0; -bool upb_FieldDef_IsMap(const upb_FieldDef* f) { - return upb_FieldDef_IsRepeated(f) && upb_FieldDef_IsSubMessage(f) && - upb_MessageDef_IsMapEntry(upb_FieldDef_MessageSubDef(f)); -} + s->scratch_size = 240; + s->scratch_data = upb_gmalloc(s->scratch_size); + if (!s->scratch_data) goto err; -bool upb_FieldDef_IsOptional(const upb_FieldDef* f) { - return upb_FieldDef_Label(f) == kUpb_Label_Optional; -} + if (!upb_strtable_init(&s->syms, 32, s->arena)) goto err; + if (!upb_strtable_init(&s->files, 4, s->arena)) goto err; + if (!upb_inttable_init(&s->exts, s->arena)) goto err; -bool upb_FieldDef_IsPrimitive(const upb_FieldDef* f) { - return !upb_FieldDef_IsString(f) && !upb_FieldDef_IsSubMessage(f); -} + s->extreg = upb_ExtensionRegistry_New(s->arena); + if (!s->extreg) goto err; -bool upb_FieldDef_IsRepeated(const upb_FieldDef* f) { - return upb_FieldDef_Label(f) == kUpb_Label_Repeated; -} + s->platform = kUpb_MiniTablePlatform_Native; -bool upb_FieldDef_IsRequired(const upb_FieldDef* f) { - return upb_FieldDef_Label(f) == kUpb_Label_Required; -} + return s; -bool upb_FieldDef_IsString(const upb_FieldDef* f) { - return upb_FieldDef_CType(f) == kUpb_CType_String || - upb_FieldDef_CType(f) == kUpb_CType_Bytes; +err: + upb_DefPool_Free(s); + return NULL; } -bool upb_FieldDef_IsSubMessage(const upb_FieldDef* f) { - return upb_FieldDef_CType(f) == kUpb_CType_Message; -} +bool _upb_DefPool_InsertExt(upb_DefPool* s, const upb_MiniTableExtension* ext, + const upb_FieldDef* f) { + return upb_inttable_insert(&s->exts, (uintptr_t)ext, upb_value_constptr(f), + s->arena); +} -static bool between(int32_t x, int32_t low, int32_t high) { - return x >= low && x <= high; +bool _upb_DefPool_InsertSym(upb_DefPool* s, upb_StringView sym, upb_value v, + upb_Status* status) { + // TODO: table should support an operation "tryinsert" to avoid the double + // lookup. + if (upb_strtable_lookup2(&s->syms, sym.data, sym.size, NULL)) { + upb_Status_SetErrorFormat(status, "duplicate symbol '%s'", sym.data); + return false; + } + if (!upb_strtable_insert(&s->syms, sym.data, sym.size, v, s->arena)) { + upb_Status_SetErrorMessage(status, "out of memory"); + return false; + } + return true; } -bool upb_FieldDef_checklabel(int32_t label) { return between(label, 1, 3); } -bool upb_FieldDef_checktype(int32_t type) { return between(type, 1, 11); } -bool upb_FieldDef_checkintfmt(int32_t fmt) { return between(fmt, 1, 3); } +static const void* _upb_DefPool_Unpack(const upb_DefPool* s, const char* sym, + size_t size, upb_deftype_t type) { + upb_value v; + return upb_strtable_lookup2(&s->syms, sym, size, &v) + ? _upb_DefType_Unpack(v, type) + : NULL; +} -bool upb_FieldDef_checkdescriptortype(int32_t type) { - return between(type, 1, 18); +bool _upb_DefPool_LookupSym(const upb_DefPool* s, const char* sym, size_t size, + upb_value* v) { + return upb_strtable_lookup2(&s->syms, sym, size, v); } -static bool streql2(const char* a, size_t n, const char* b) { - return n == strlen(b) && memcmp(a, b, n) == 0; +upb_ExtensionRegistry* _upb_DefPool_ExtReg(const upb_DefPool* s) { + return s->extreg; } -// Implement the transformation as described in the spec: -// 1. upper case all letters after an underscore. -// 2. remove all underscores. -static char* make_json_name(const char* name, size_t size, upb_Arena* a) { - char* out = upb_Arena_Malloc(a, size + 1); // +1 is to add a trailing '\0' - if (out == NULL) return NULL; +void** _upb_DefPool_ScratchData(const upb_DefPool* s) { + return (void**)&s->scratch_data; +} - bool ucase_next = false; - char* des = out; - for (size_t i = 0; i < size; i++) { - if (name[i] == '_') { - ucase_next = true; - } else { - *des++ = ucase_next ? toupper(name[i]) : name[i]; - ucase_next = false; - } - } - *des++ = '\0'; - return out; +size_t* _upb_DefPool_ScratchSize(const upb_DefPool* s) { + return (size_t*)&s->scratch_size; } -static str_t* newstr(upb_DefBuilder* ctx, const char* data, size_t len) { - str_t* ret = _upb_DefBuilder_Alloc(ctx, sizeof(*ret) + len); - if (!ret) _upb_DefBuilder_OomErr(ctx); - ret->len = len; - if (len) memcpy(ret->str, data, len); - ret->str[len] = '\0'; - return ret; +void _upb_DefPool_SetPlatform(upb_DefPool* s, upb_MiniTablePlatform platform) { + assert(upb_strtable_count(&s->files) == 0); + s->platform = platform; } -static str_t* unescape(upb_DefBuilder* ctx, const upb_FieldDef* f, - const char* data, size_t len) { - // Size here is an upper bound; escape sequences could ultimately shrink it. - str_t* ret = _upb_DefBuilder_Alloc(ctx, sizeof(*ret) + len); - char* dst = &ret->str[0]; - const char* src = data; - const char* end = data + len; +const upb_MessageDef* upb_DefPool_FindMessageByName(const upb_DefPool* s, + const char* sym) { + return _upb_DefPool_Unpack(s, sym, strlen(sym), UPB_DEFTYPE_MSG); +} - while (src < end) { - if (*src == '\\') { - src++; - *dst++ = _upb_DefBuilder_ParseEscape(ctx, f, &src, end); - } else { - *dst++ = *src++; - } - } +const upb_MessageDef* upb_DefPool_FindMessageByNameWithSize( + const upb_DefPool* s, const char* sym, size_t len) { + return _upb_DefPool_Unpack(s, sym, len, UPB_DEFTYPE_MSG); +} - ret->len = dst - &ret->str[0]; - return ret; +const upb_EnumDef* upb_DefPool_FindEnumByName(const upb_DefPool* s, + const char* sym) { + return _upb_DefPool_Unpack(s, sym, strlen(sym), UPB_DEFTYPE_ENUM); } -static void parse_default(upb_DefBuilder* ctx, const char* str, size_t len, - upb_FieldDef* f) { - char* end; - char nullz[64]; - errno = 0; +const upb_EnumValueDef* upb_DefPool_FindEnumByNameval(const upb_DefPool* s, + const char* sym) { + return _upb_DefPool_Unpack(s, sym, strlen(sym), UPB_DEFTYPE_ENUMVAL); +} - switch (upb_FieldDef_CType(f)) { - case kUpb_CType_Int32: - case kUpb_CType_Int64: - case kUpb_CType_UInt32: - case kUpb_CType_UInt64: - case kUpb_CType_Double: - case kUpb_CType_Float: - // Standard C number parsing functions expect null-terminated strings. - if (len >= sizeof(nullz) - 1) { - _upb_DefBuilder_Errf(ctx, "Default too long: %.*s", (int)len, str); - } - memcpy(nullz, str, len); - nullz[len] = '\0'; - str = nullz; - break; +const upb_FileDef* upb_DefPool_FindFileByName(const upb_DefPool* s, + const char* name) { + upb_value v; + return upb_strtable_lookup(&s->files, name, &v) ? upb_value_getconstptr(v) + : NULL; +} + +const upb_FileDef* upb_DefPool_FindFileByNameWithSize(const upb_DefPool* s, + const char* name, + size_t len) { + upb_value v; + return upb_strtable_lookup2(&s->files, name, len, &v) + ? upb_value_getconstptr(v) + : NULL; +} + +const upb_FieldDef* upb_DefPool_FindExtensionByNameWithSize( + const upb_DefPool* s, const char* name, size_t size) { + upb_value v; + if (!upb_strtable_lookup2(&s->syms, name, size, &v)) return NULL; + + switch (_upb_DefType_Type(v)) { + case UPB_DEFTYPE_FIELD: + return _upb_DefType_Unpack(v, UPB_DEFTYPE_FIELD); + case UPB_DEFTYPE_MSG: { + const upb_MessageDef* m = _upb_DefType_Unpack(v, UPB_DEFTYPE_MSG); + return _upb_MessageDef_InMessageSet(m) + ? upb_MessageDef_NestedExtension(m, 0) + : NULL; + } default: break; } - switch (upb_FieldDef_CType(f)) { - case kUpb_CType_Int32: { - long val = strtol(str, &end, 0); - if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || *end) { - goto invalid; + return NULL; +} + +const upb_FieldDef* upb_DefPool_FindExtensionByName(const upb_DefPool* s, + const char* sym) { + return upb_DefPool_FindExtensionByNameWithSize(s, sym, strlen(sym)); +} + +const upb_ServiceDef* upb_DefPool_FindServiceByName(const upb_DefPool* s, + const char* name) { + return _upb_DefPool_Unpack(s, name, strlen(name), UPB_DEFTYPE_SERVICE); +} + +const upb_ServiceDef* upb_DefPool_FindServiceByNameWithSize( + const upb_DefPool* s, const char* name, size_t size) { + return _upb_DefPool_Unpack(s, name, size, UPB_DEFTYPE_SERVICE); +} + +const upb_FileDef* upb_DefPool_FindFileContainingSymbol(const upb_DefPool* s, + const char* name) { + upb_value v; + // TODO(haberman): non-extension fields and oneofs. + if (upb_strtable_lookup(&s->syms, name, &v)) { + switch (_upb_DefType_Type(v)) { + case UPB_DEFTYPE_EXT: { + const upb_FieldDef* f = _upb_DefType_Unpack(v, UPB_DEFTYPE_EXT); + return upb_FieldDef_File(f); } - f->defaultval.sint = val; - break; - } - case kUpb_CType_Enum: { - const upb_EnumDef* e = f->sub.enumdef; - const upb_EnumValueDef* ev = - upb_EnumDef_FindValueByNameWithSize(e, str, len); - if (!ev) { - goto invalid; + case UPB_DEFTYPE_MSG: { + const upb_MessageDef* m = _upb_DefType_Unpack(v, UPB_DEFTYPE_MSG); + return upb_MessageDef_File(m); } - f->defaultval.sint = upb_EnumValueDef_Number(ev); - break; - } - case kUpb_CType_Int64: { - long long val = strtoll(str, &end, 0); - if (val > INT64_MAX || val < INT64_MIN || errno == ERANGE || *end) { - goto invalid; + case UPB_DEFTYPE_ENUM: { + const upb_EnumDef* e = _upb_DefType_Unpack(v, UPB_DEFTYPE_ENUM); + return upb_EnumDef_File(e); } - f->defaultval.sint = val; - break; - } - case kUpb_CType_UInt32: { - unsigned long val = strtoul(str, &end, 0); - if (val > UINT32_MAX || errno == ERANGE || *end) { - goto invalid; + case UPB_DEFTYPE_ENUMVAL: { + const upb_EnumValueDef* ev = + _upb_DefType_Unpack(v, UPB_DEFTYPE_ENUMVAL); + return upb_EnumDef_File(upb_EnumValueDef_Enum(ev)); } - f->defaultval.uint = val; - break; - } - case kUpb_CType_UInt64: { - unsigned long long val = strtoull(str, &end, 0); - if (val > UINT64_MAX || errno == ERANGE || *end) { - goto invalid; + case UPB_DEFTYPE_SERVICE: { + const upb_ServiceDef* service = + _upb_DefType_Unpack(v, UPB_DEFTYPE_SERVICE); + return upb_ServiceDef_File(service); } - f->defaultval.uint = val; - break; + default: + UPB_UNREACHABLE(); } - case kUpb_CType_Double: { - double val = strtod(str, &end); - if (errno == ERANGE || *end) { - goto invalid; + } + + const char* last_dot = strrchr(name, '.'); + if (last_dot) { + const upb_MessageDef* parent = + upb_DefPool_FindMessageByNameWithSize(s, name, last_dot - name); + if (parent) { + const char* shortname = last_dot + 1; + if (upb_MessageDef_FindByNameWithSize(parent, shortname, + strlen(shortname), NULL, NULL)) { + return upb_MessageDef_File(parent); } - f->defaultval.dbl = val; - break; } - case kUpb_CType_Float: { - float val = strtof(str, &end); - if (errno == ERANGE || *end) { - goto invalid; - } - f->defaultval.flt = val; - break; - } - case kUpb_CType_Bool: { - if (streql2(str, len, "false")) { - f->defaultval.boolean = false; - } else if (streql2(str, len, "true")) { - f->defaultval.boolean = true; - } else { - goto invalid; - } - break; - } - case kUpb_CType_String: - f->defaultval.str = newstr(ctx, str, len); - break; - case kUpb_CType_Bytes: - f->defaultval.str = unescape(ctx, f, str, len); - break; - case kUpb_CType_Message: - /* Should not have a default value. */ - _upb_DefBuilder_Errf(ctx, "Message should not have a default (%s)", - upb_FieldDef_FullName(f)); } - return; - -invalid: - _upb_DefBuilder_Errf(ctx, "Invalid default '%.*s' for field %s of type %d", - (int)len, str, upb_FieldDef_FullName(f), - (int)upb_FieldDef_Type(f)); + return NULL; } -static void set_default_default(upb_DefBuilder* ctx, upb_FieldDef* f) { - switch (upb_FieldDef_CType(f)) { - case kUpb_CType_Int32: - case kUpb_CType_Int64: - f->defaultval.sint = 0; - break; - case kUpb_CType_UInt64: - case kUpb_CType_UInt32: - f->defaultval.uint = 0; - break; - case kUpb_CType_Double: - case kUpb_CType_Float: - f->defaultval.dbl = 0; - break; - case kUpb_CType_String: - case kUpb_CType_Bytes: - f->defaultval.str = newstr(ctx, NULL, 0); - break; - case kUpb_CType_Bool: - f->defaultval.boolean = false; - break; - case kUpb_CType_Enum: { - const upb_EnumValueDef* v = upb_EnumDef_Value(f->sub.enumdef, 0); - f->defaultval.sint = upb_EnumValueDef_Number(v); - break; +static void remove_filedef(upb_DefPool* s, upb_FileDef* file) { + intptr_t iter = UPB_INTTABLE_BEGIN; + upb_StringView key; + upb_value val; + while (upb_strtable_next2(&s->syms, &key, &val, &iter)) { + const upb_FileDef* f; + switch (_upb_DefType_Type(val)) { + case UPB_DEFTYPE_EXT: + f = upb_FieldDef_File(_upb_DefType_Unpack(val, UPB_DEFTYPE_EXT)); + break; + case UPB_DEFTYPE_MSG: + f = upb_MessageDef_File(_upb_DefType_Unpack(val, UPB_DEFTYPE_MSG)); + break; + case UPB_DEFTYPE_ENUM: + f = upb_EnumDef_File(_upb_DefType_Unpack(val, UPB_DEFTYPE_ENUM)); + break; + case UPB_DEFTYPE_ENUMVAL: + f = upb_EnumDef_File(upb_EnumValueDef_Enum( + _upb_DefType_Unpack(val, UPB_DEFTYPE_ENUMVAL))); + break; + case UPB_DEFTYPE_SERVICE: + f = upb_ServiceDef_File(_upb_DefType_Unpack(val, UPB_DEFTYPE_SERVICE)); + break; + default: + UPB_UNREACHABLE(); } - case kUpb_CType_Message: - break; - } -} - -static void _upb_FieldDef_Create(upb_DefBuilder* ctx, const char* prefix, - const UPB_DESC(FieldDescriptorProto) * - field_proto, - upb_MessageDef* m, upb_FieldDef* f) { - // Must happen before _upb_DefBuilder_Add() - f->file = _upb_DefBuilder_File(ctx); - if (!UPB_DESC(FieldDescriptorProto_has_name)(field_proto)) { - _upb_DefBuilder_Errf(ctx, "field has no name"); + if (f == file) upb_strtable_removeiter(&s->syms, &iter); } +} - const upb_StringView name = UPB_DESC(FieldDescriptorProto_name)(field_proto); - - f->full_name = _upb_DefBuilder_MakeFullName(ctx, prefix, name); - f->label_ = (int)UPB_DESC(FieldDescriptorProto_label)(field_proto); - f->number_ = UPB_DESC(FieldDescriptorProto_number)(field_proto); - f->is_proto3_optional = - UPB_DESC(FieldDescriptorProto_proto3_optional)(field_proto); - f->msgdef = m; - f->scope.oneof = NULL; - - f->has_json_name = UPB_DESC(FieldDescriptorProto_has_json_name)(field_proto); - if (f->has_json_name) { - const upb_StringView sv = - UPB_DESC(FieldDescriptorProto_json_name)(field_proto); - f->json_name = upb_strdup2(sv.data, sv.size, ctx->arena); +static const upb_FileDef* upb_DefBuilder_AddFileToPool( + upb_DefBuilder* const builder, upb_DefPool* const s, + const UPB_DESC(FileDescriptorProto) * const file_proto, + const upb_StringView name, upb_Status* const status) { + if (UPB_SETJMP(builder->err) != 0) { + UPB_ASSERT(!upb_Status_IsOk(status)); + if (builder->file) { + remove_filedef(s, builder->file); + builder->file = NULL; + } + } else if (!builder->arena || !builder->tmp_arena) { + _upb_DefBuilder_OomErr(builder); } else { - f->json_name = make_json_name(name.data, name.size, ctx->arena); + _upb_FileDef_Create(builder, file_proto); + upb_strtable_insert(&s->files, name.data, name.size, + upb_value_constptr(builder->file), builder->arena); + UPB_ASSERT(upb_Status_IsOk(status)); + upb_Arena_Fuse(s->arena, builder->arena); } - if (!f->json_name) _upb_DefBuilder_OomErr(ctx); - const bool has_type = UPB_DESC(FieldDescriptorProto_has_type)(field_proto); - const bool has_type_name = - UPB_DESC(FieldDescriptorProto_has_type_name)(field_proto); + if (builder->arena) upb_Arena_Free(builder->arena); + if (builder->tmp_arena) upb_Arena_Free(builder->tmp_arena); + return builder->file; +} - f->type_ = (int)UPB_DESC(FieldDescriptorProto_type)(field_proto); +static const upb_FileDef* _upb_DefPool_AddFile( + upb_DefPool* s, const UPB_DESC(FileDescriptorProto) * file_proto, + const upb_MiniTableFile* layout, upb_Status* status) { + const upb_StringView name = UPB_DESC(FileDescriptorProto_name)(file_proto); - if (has_type) { - switch (f->type_) { - case kUpb_FieldType_Message: - case kUpb_FieldType_Group: - case kUpb_FieldType_Enum: - if (!has_type_name) { - _upb_DefBuilder_Errf(ctx, "field of type %d requires type name (%s)", - (int)f->type_, f->full_name); - } - break; - default: - if (has_type_name) { - _upb_DefBuilder_Errf( - ctx, "invalid type for field with type_name set (%s, %d)", - f->full_name, (int)f->type_); - } + // Determine whether we already know about this file. + { + upb_value v; + if (upb_strtable_lookup2(&s->files, name.data, name.size, &v)) { + upb_Status_SetErrorFormat(status, + "duplicate file name " UPB_STRINGVIEW_FORMAT, + UPB_STRINGVIEW_ARGS(name)); + return NULL; } } - if (!has_type && has_type_name) { - f->type_ = - UPB_FIELD_TYPE_UNSPECIFIED; // We'll assign this in resolve_subdef() - } else { - if (f->type_ < kUpb_FieldType_Double || f->type_ > kUpb_FieldType_SInt64) { - _upb_DefBuilder_Errf(ctx, "invalid type for field %s (%d)", f->full_name, - f->type_); - } - } + upb_DefBuilder ctx = { + .symtab = s, + .layout = layout, + .platform = s->platform, + .msg_count = 0, + .enum_count = 0, + .ext_count = 0, + .status = status, + .file = NULL, + .arena = upb_Arena_New(), + .tmp_arena = upb_Arena_New(), + }; - if (f->label_ < kUpb_Label_Optional || f->label_ > kUpb_Label_Repeated) { - _upb_DefBuilder_Errf(ctx, "invalid label for field %s (%d)", f->full_name, - f->label_); - } + return upb_DefBuilder_AddFileToPool(&ctx, s, file_proto, name, status); +} - /* We can't resolve the subdef or (in the case of extensions) the containing - * message yet, because it may not have been defined yet. We stash a pointer - * to the field_proto until later when we can properly resolve it. */ - f->sub.unresolved = field_proto; +const upb_FileDef* upb_DefPool_AddFile(upb_DefPool* s, + const UPB_DESC(FileDescriptorProto) * + file_proto, + upb_Status* status) { + return _upb_DefPool_AddFile(s, file_proto, NULL, status); +} - if (f->label_ == kUpb_Label_Required && - upb_FileDef_Syntax(f->file) == kUpb_Syntax_Proto3) { - _upb_DefBuilder_Errf(ctx, "proto3 fields cannot be required (%s)", - f->full_name); - } +bool _upb_DefPool_LoadDefInitEx(upb_DefPool* s, const _upb_DefPool_Init* init, + bool rebuild_minitable) { + /* Since this function should never fail (it would indicate a bug in upb) we + * print errors to stderr instead of returning error status to the user. */ + _upb_DefPool_Init** deps = init->deps; + UPB_DESC(FileDescriptorProto) * file; + upb_Arena* arena; + upb_Status status; - if (UPB_DESC(FieldDescriptorProto_has_oneof_index)(field_proto)) { - uint32_t oneof_index = - UPB_DESC(FieldDescriptorProto_oneof_index)(field_proto); + upb_Status_Clear(&status); - if (upb_FieldDef_Label(f) != kUpb_Label_Optional) { - _upb_DefBuilder_Errf(ctx, "fields in oneof must have OPTIONAL label (%s)", - f->full_name); - } + if (upb_DefPool_FindFileByName(s, init->filename)) { + return true; + } - if (!m) { - _upb_DefBuilder_Errf(ctx, "oneof field (%s) has no containing msg", - f->full_name); - } + arena = upb_Arena_New(); - if (oneof_index >= upb_MessageDef_OneofCount(m)) { - _upb_DefBuilder_Errf(ctx, "oneof_index out of range (%s)", f->full_name); - } + for (; *deps; deps++) { + if (!_upb_DefPool_LoadDefInitEx(s, *deps, rebuild_minitable)) goto err; + } - upb_OneofDef* oneof = (upb_OneofDef*)upb_MessageDef_Oneof(m, oneof_index); - f->scope.oneof = oneof; + file = UPB_DESC(FileDescriptorProto_parse_ex)( + init->descriptor.data, init->descriptor.size, NULL, + kUpb_DecodeOption_AliasString, arena); + s->bytes_loaded += init->descriptor.size; - _upb_OneofDef_Insert(ctx, oneof, f, name.data, name.size); + if (!file) { + upb_Status_SetErrorFormat( + &status, + "Failed to parse compiled-in descriptor for file '%s'. This should " + "never happen.", + init->filename); + goto err; } - UPB_DEF_SET_OPTIONS(f->opts, FieldDescriptorProto, FieldOptions, field_proto); + const upb_MiniTableFile* mt = rebuild_minitable ? NULL : init->layout; + if (!_upb_DefPool_AddFile(s, file, mt, &status)) { + goto err; + } - if (UPB_DESC(FieldOptions_has_packed)(f->opts)) { - f->is_packed = UPB_DESC(FieldOptions_packed)(f->opts); - } else { - // Repeated fields default to packed for proto3 only. - f->is_packed = has_type && upb_FieldDef_IsPrimitive(f) && - f->label_ == kUpb_Label_Repeated && - upb_FileDef_Syntax(f->file) == kUpb_Syntax_Proto3; - } + upb_Arena_Free(arena); + return true; - f->has_presence = - (!upb_FieldDef_IsRepeated(f)) && - (f->type_ == kUpb_FieldType_Message || f->type_ == kUpb_FieldType_Group || - upb_FieldDef_ContainingOneof(f) || - (upb_FileDef_Syntax(f->file) == kUpb_Syntax_Proto2)); +err: + fprintf(stderr, + "Error loading compiled-in descriptor for file '%s' (this should " + "never happen): %s\n", + init->filename, upb_Status_ErrorMessage(&status)); + upb_Arena_Free(arena); + return false; } -static void _upb_FieldDef_CreateExt(upb_DefBuilder* ctx, const char* prefix, - const UPB_DESC(FieldDescriptorProto) * - field_proto, - upb_MessageDef* m, upb_FieldDef* f) { - f->is_extension = true; - _upb_FieldDef_Create(ctx, prefix, field_proto, m, f); - - if (UPB_DESC(FieldDescriptorProto_has_oneof_index)(field_proto)) { - _upb_DefBuilder_Errf(ctx, "oneof_index provided for extension field (%s)", - f->full_name); - } - - f->scope.extension_scope = m; - _upb_DefBuilder_Add(ctx, f->full_name, _upb_DefType_Pack(f, UPB_DEFTYPE_EXT)); - f->layout_index = ctx->ext_count++; - - if (ctx->layout) { - UPB_ASSERT(_upb_FieldDef_ExtensionMiniTable(f)->field.number == f->number_); - } +size_t _upb_DefPool_BytesLoaded(const upb_DefPool* s) { + return s->bytes_loaded; } -static void _upb_FieldDef_CreateNotExt(upb_DefBuilder* ctx, const char* prefix, - const UPB_DESC(FieldDescriptorProto) * - field_proto, - upb_MessageDef* m, upb_FieldDef* f) { - f->is_extension = false; - _upb_FieldDef_Create(ctx, prefix, field_proto, m, f); - - if (!UPB_DESC(FieldDescriptorProto_has_oneof_index)(field_proto)) { - if (f->is_proto3_optional) { - _upb_DefBuilder_Errf( - ctx, - "non-extension field (%s) with proto3_optional was not in a oneof", - f->full_name); - } - } +upb_Arena* _upb_DefPool_Arena(const upb_DefPool* s) { return s->arena; } - _upb_MessageDef_InsertField(ctx, m, f); +const upb_FieldDef* upb_DefPool_FindExtensionByMiniTable( + const upb_DefPool* s, const upb_MiniTableExtension* ext) { + upb_value v; + bool ok = upb_inttable_lookup(&s->exts, (uintptr_t)ext, &v); + UPB_ASSERT(ok); + return upb_value_getconstptr(v); } -upb_FieldDef* _upb_Extensions_New( - upb_DefBuilder* ctx, int n, - const UPB_DESC(FieldDescriptorProto) * const* protos, const char* prefix, - upb_MessageDef* m) { - _upb_DefType_CheckPadding(sizeof(upb_FieldDef)); - upb_FieldDef* defs = - (upb_FieldDef*)_upb_DefBuilder_Alloc(ctx, sizeof(upb_FieldDef) * n); +const upb_FieldDef* upb_DefPool_FindExtensionByNumber(const upb_DefPool* s, + const upb_MessageDef* m, + int32_t fieldnum) { + const upb_MiniTable* t = upb_MessageDef_MiniTable(m); + const upb_MiniTableExtension* ext = + upb_ExtensionRegistry_Lookup(s->extreg, t, fieldnum); + return ext ? upb_DefPool_FindExtensionByMiniTable(s, ext) : NULL; +} - for (int i = 0; i < n; i++) { - upb_FieldDef* f = &defs[i]; +const upb_ExtensionRegistry* upb_DefPool_ExtensionRegistry( + const upb_DefPool* s) { + return s->extreg; +} - _upb_FieldDef_CreateExt(ctx, prefix, protos[i], m, f); - f->index_ = i; +const upb_FieldDef** upb_DefPool_GetAllExtensions(const upb_DefPool* s, + const upb_MessageDef* m, + size_t* count) { + size_t n = 0; + intptr_t iter = UPB_INTTABLE_BEGIN; + uintptr_t key; + upb_value val; + // This is O(all exts) instead of O(exts for m). If we need this to be + // efficient we may need to make extreg into a two-level table, or have a + // second per-message index. + while (upb_inttable_next(&s->exts, &key, &val, &iter)) { + const upb_FieldDef* f = upb_value_getconstptr(val); + if (upb_FieldDef_ContainingType(f) == m) n++; } - - return defs; + const upb_FieldDef** exts = malloc(n * sizeof(*exts)); + iter = UPB_INTTABLE_BEGIN; + size_t i = 0; + while (upb_inttable_next(&s->exts, &key, &val, &iter)) { + const upb_FieldDef* f = upb_value_getconstptr(val); + if (upb_FieldDef_ContainingType(f) == m) exts[i++] = f; + } + *count = n; + return exts; } -upb_FieldDef* _upb_FieldDefs_New( - upb_DefBuilder* ctx, int n, - const UPB_DESC(FieldDescriptorProto) * const* protos, const char* prefix, - upb_MessageDef* m, bool* is_sorted) { - _upb_DefType_CheckPadding(sizeof(upb_FieldDef)); - upb_FieldDef* defs = - (upb_FieldDef*)_upb_DefBuilder_Alloc(ctx, sizeof(upb_FieldDef) * n); - - uint32_t previous = 0; - for (int i = 0; i < n; i++) { - upb_FieldDef* f = &defs[i]; +bool _upb_DefPool_LoadDefInit(upb_DefPool* s, const _upb_DefPool_Init* init) { + return _upb_DefPool_LoadDefInitEx(s, init, false); +} - _upb_FieldDef_CreateNotExt(ctx, prefix, protos[i], m, f); - f->index_ = i; - if (!ctx->layout) { - // Speculate that the def fields are sorted. We will always sort the - // MiniTable fields, so if defs are sorted then indices will match. - // - // If this is incorrect, we will overwrite later. - f->layout_index = i; - } - const uint32_t current = f->number_; - if (previous > current) *is_sorted = false; - previous = current; - } +// Must be last. - return defs; +upb_deftype_t _upb_DefType_Type(upb_value v) { + const uintptr_t num = (uintptr_t)upb_value_getconstptr(v); + return num & UPB_DEFTYPE_MASK; } -static void resolve_subdef(upb_DefBuilder* ctx, const char* prefix, - upb_FieldDef* f) { - const UPB_DESC(FieldDescriptorProto)* field_proto = f->sub.unresolved; - upb_StringView name = UPB_DESC(FieldDescriptorProto_type_name)(field_proto); - bool has_name = UPB_DESC(FieldDescriptorProto_has_type_name)(field_proto); - switch ((int)f->type_) { - case UPB_FIELD_TYPE_UNSPECIFIED: { - // Type was not specified and must be inferred. - UPB_ASSERT(has_name); - upb_deftype_t type; - const void* def = - _upb_DefBuilder_ResolveAny(ctx, f->full_name, prefix, name, &type); - switch (type) { - case UPB_DEFTYPE_ENUM: - f->sub.enumdef = def; - f->type_ = kUpb_FieldType_Enum; - if (!UPB_DESC(FieldOptions_has_packed)(f->opts)) { - f->is_packed = f->label_ == kUpb_Label_Repeated && - upb_FileDef_Syntax(f->file) == kUpb_Syntax_Proto3; - } - break; - case UPB_DEFTYPE_MSG: - f->sub.msgdef = def; - f->type_ = kUpb_FieldType_Message; // It appears there is no way of - // this being a group. - f->has_presence = !upb_FieldDef_IsRepeated(f); - break; - default: - _upb_DefBuilder_Errf(ctx, "Couldn't resolve type name for field %s", - f->full_name); - } - break; - } - case kUpb_FieldType_Message: - case kUpb_FieldType_Group: - UPB_ASSERT(has_name); - f->sub.msgdef = _upb_DefBuilder_Resolve(ctx, f->full_name, prefix, name, - UPB_DEFTYPE_MSG); - break; - case kUpb_FieldType_Enum: - UPB_ASSERT(has_name); - f->sub.enumdef = _upb_DefBuilder_Resolve(ctx, f->full_name, prefix, name, - UPB_DEFTYPE_ENUM); - break; - default: - // No resolution necessary. - break; - } +upb_value _upb_DefType_Pack(const void* ptr, upb_deftype_t type) { + uintptr_t num = (uintptr_t)ptr; + UPB_ASSERT((num & UPB_DEFTYPE_MASK) == 0); + num |= type; + return upb_value_constptr((const void*)num); } -static int _upb_FieldDef_Compare(const void* p1, const void* p2) { - const uint32_t v1 = (*(upb_FieldDef**)p1)->number_; - const uint32_t v2 = (*(upb_FieldDef**)p2)->number_; - return (v1 < v2) ? -1 : (v1 > v2); +const void* _upb_DefType_Unpack(upb_value v, upb_deftype_t type) { + uintptr_t num = (uintptr_t)upb_value_getconstptr(v); + return (num & UPB_DEFTYPE_MASK) == type + ? (const void*)(num & ~UPB_DEFTYPE_MASK) + : NULL; } -// _upb_FieldDefs_Sorted() is mostly a pure function of its inputs, but has one -// critical side effect that we depend on: it sets layout_index appropriately -// for non-sorted lists of fields. -const upb_FieldDef** _upb_FieldDefs_Sorted(const upb_FieldDef* f, int n, - upb_Arena* a) { - // TODO(salo): Replace this arena alloc with a persistent scratch buffer. - upb_FieldDef** out = (upb_FieldDef**)upb_Arena_Malloc(a, n * sizeof(void*)); - if (!out) return NULL; - for (int i = 0; i < n; i++) { - out[i] = (upb_FieldDef*)&f[i]; - } - qsort(out, n, sizeof(void*), _upb_FieldDef_Compare); +// Must be last. - for (int i = 0; i < n; i++) { - out[i]->layout_index = i; +bool _upb_DescState_Grow(upb_DescState* d, upb_Arena* a) { + const size_t oldbufsize = d->bufsize; + const int used = d->ptr - d->buf; + + if (!d->buf) { + d->buf = upb_Arena_Malloc(a, d->bufsize); + if (!d->buf) return false; + d->ptr = d->buf; + d->e.end = d->buf + d->bufsize; } - return (const upb_FieldDef**)out; -} -bool upb_FieldDef_MiniDescriptorEncode(const upb_FieldDef* f, upb_Arena* a, - upb_StringView* out) { - UPB_ASSERT(f->is_extension); + if (oldbufsize - used < kUpb_MtDataEncoder_MinSize) { + d->bufsize *= 2; + d->buf = upb_Arena_Realloc(a, d->buf, oldbufsize, d->bufsize); + if (!d->buf) return false; + d->ptr = d->buf + used; + d->e.end = d->buf + d->bufsize; + } - upb_DescState s; - _upb_DescState_Init(&s); + return true; +} - const int number = upb_FieldDef_Number(f); - const uint64_t modifiers = _upb_FieldDef_Modifiers(f); - if (!_upb_DescState_Grow(&s, a)) return false; - s.ptr = upb_MtDataEncoder_EncodeExtension(&s.e, s.ptr, f->type_, number, - modifiers); - *s.ptr = '\0'; - out->data = s.buf; - out->size = s.ptr - s.buf; - return true; -} +// Must be last. -static void resolve_extension(upb_DefBuilder* ctx, const char* prefix, - upb_FieldDef* f, - const UPB_DESC(FieldDescriptorProto) * - field_proto) { - if (!UPB_DESC(FieldDescriptorProto_has_extendee)(field_proto)) { - _upb_DefBuilder_Errf(ctx, "extension for field '%s' had no extendee", - f->full_name); - } - - upb_StringView name = UPB_DESC(FieldDescriptorProto_extendee)(field_proto); - const upb_MessageDef* m = - _upb_DefBuilder_Resolve(ctx, f->full_name, prefix, name, UPB_DEFTYPE_MSG); - f->msgdef = m; +struct upb_EnumDef { + const UPB_DESC(EnumOptions) * opts; + const upb_MiniTableEnum* layout; // Only for proto2. + const upb_FileDef* file; + const upb_MessageDef* containing_type; // Could be merged with "file". + const char* full_name; + upb_strtable ntoi; + upb_inttable iton; + const upb_EnumValueDef* values; + const upb_EnumReservedRange* res_ranges; + const upb_StringView* res_names; + int value_count; + int res_range_count; + int res_name_count; + int32_t defaultval; + bool is_closed; + bool is_sorted; // Whether all of the values are defined in ascending order. +}; - if (!_upb_MessageDef_IsValidExtensionNumber(m, f->number_)) { - _upb_DefBuilder_Errf( - ctx, - "field number %u in extension %s has no extension range in message %s", - (unsigned)f->number_, f->full_name, upb_MessageDef_FullName(m)); - } +upb_EnumDef* _upb_EnumDef_At(const upb_EnumDef* e, int i) { + return (upb_EnumDef*)&e[i]; } -void _upb_FieldDef_BuildMiniTableExtension(upb_DefBuilder* ctx, - const upb_FieldDef* f) { - const upb_MiniTableExtension* ext = _upb_FieldDef_ExtensionMiniTable(f); - - if (ctx->layout) { - UPB_ASSERT(upb_FieldDef_Number(f) == ext->field.number); - } else { - upb_StringView desc; - if (!upb_FieldDef_MiniDescriptorEncode(f, ctx->tmp_arena, &desc)) { - _upb_DefBuilder_OomErr(ctx); - } - - upb_MiniTableExtension* mut_ext = (upb_MiniTableExtension*)ext; - upb_MiniTableSub sub = {NULL}; - if (upb_FieldDef_IsSubMessage(f)) { - sub.submsg = upb_MessageDef_MiniTable(f->sub.msgdef); - } else if (_upb_FieldDef_IsClosedEnum(f)) { - sub.subenum = _upb_EnumDef_MiniTable(f->sub.enumdef); - } - bool ok2 = upb_MiniTableExtension_Init(desc.data, desc.size, mut_ext, - upb_MessageDef_MiniTable(f->msgdef), - sub, ctx->status); - if (!ok2) _upb_DefBuilder_Errf(ctx, "Could not build extension mini table"); - } - - bool ok = _upb_DefPool_InsertExt(ctx->symtab, ext, f); - if (!ok) _upb_DefBuilder_OomErr(ctx); +const upb_MiniTableEnum* _upb_EnumDef_MiniTable(const upb_EnumDef* e) { + return e->layout; } -static void resolve_default(upb_DefBuilder* ctx, upb_FieldDef* f, - const UPB_DESC(FieldDescriptorProto) * - field_proto) { - // Have to delay resolving of the default value until now because of the enum - // case, since enum defaults are specified with a label. - if (UPB_DESC(FieldDescriptorProto_has_default_value)(field_proto)) { - upb_StringView defaultval = - UPB_DESC(FieldDescriptorProto_default_value)(field_proto); - - if (upb_FileDef_Syntax(f->file) == kUpb_Syntax_Proto3) { - _upb_DefBuilder_Errf(ctx, - "proto3 fields cannot have explicit defaults (%s)", - f->full_name); - } - - if (upb_FieldDef_IsSubMessage(f)) { - _upb_DefBuilder_Errf(ctx, - "message fields cannot have explicit defaults (%s)", - f->full_name); - } +bool _upb_EnumDef_Insert(upb_EnumDef* e, upb_EnumValueDef* v, upb_Arena* a) { + const char* name = upb_EnumValueDef_Name(v); + const upb_value val = upb_value_constptr(v); + bool ok = upb_strtable_insert(&e->ntoi, name, strlen(name), val, a); + if (!ok) return false; - parse_default(ctx, defaultval.data, defaultval.size, f); - f->has_default = true; - } else { - set_default_default(ctx, f); - f->has_default = false; + // Multiple enumerators can have the same number, first one wins. + const int number = upb_EnumValueDef_Number(v); + if (!upb_inttable_lookup(&e->iton, number, NULL)) { + return upb_inttable_insert(&e->iton, number, val, a); } + return true; } -void _upb_FieldDef_Resolve(upb_DefBuilder* ctx, const char* prefix, - upb_FieldDef* f) { - // We have to stash this away since resolve_subdef() may overwrite it. - const UPB_DESC(FieldDescriptorProto)* field_proto = f->sub.unresolved; - - resolve_subdef(ctx, prefix, f); - resolve_default(ctx, f, field_proto); - - if (f->is_extension) { - resolve_extension(ctx, prefix, f, field_proto); - } +const UPB_DESC(EnumOptions) * upb_EnumDef_Options(const upb_EnumDef* e) { + return e->opts; } +bool upb_EnumDef_HasOptions(const upb_EnumDef* e) { + return e->opts != (void*)kUpbDefOptDefault; +} -// Must be last. - -struct upb_FileDef { - const UPB_DESC(FileOptions) * opts; - const char* name; - const char* package; - const char* edition; +const char* upb_EnumDef_FullName(const upb_EnumDef* e) { return e->full_name; } - const upb_FileDef** deps; - const int32_t* public_deps; - const int32_t* weak_deps; - const upb_MessageDef* top_lvl_msgs; - const upb_EnumDef* top_lvl_enums; - const upb_FieldDef* top_lvl_exts; - const upb_ServiceDef* services; - const upb_MiniTableExtension** ext_layouts; - const upb_DefPool* symtab; +const char* upb_EnumDef_Name(const upb_EnumDef* e) { + return _upb_DefBuilder_FullToShort(e->full_name); +} - int dep_count; - int public_dep_count; - int weak_dep_count; - int top_lvl_msg_count; - int top_lvl_enum_count; - int top_lvl_ext_count; - int service_count; - int ext_count; // All exts in the file. - upb_Syntax syntax; -}; +const upb_FileDef* upb_EnumDef_File(const upb_EnumDef* e) { return e->file; } -const UPB_DESC(FileOptions) * upb_FileDef_Options(const upb_FileDef* f) { - return f->opts; +const upb_MessageDef* upb_EnumDef_ContainingType(const upb_EnumDef* e) { + return e->containing_type; } -bool upb_FileDef_HasOptions(const upb_FileDef* f) { - return f->opts != (void*)kUpbDefOptDefault; +int32_t upb_EnumDef_Default(const upb_EnumDef* e) { + UPB_ASSERT(upb_EnumDef_FindValueByNumber(e, e->defaultval)); + return e->defaultval; } -const char* upb_FileDef_Name(const upb_FileDef* f) { return f->name; } - -const char* upb_FileDef_Package(const upb_FileDef* f) { - return f->package ? f->package : ""; +int upb_EnumDef_ReservedRangeCount(const upb_EnumDef* e) { + return e->res_range_count; } -const char* upb_FileDef_Edition(const upb_FileDef* f) { - return f->edition ? f->edition : ""; +const upb_EnumReservedRange* upb_EnumDef_ReservedRange(const upb_EnumDef* e, + int i) { + UPB_ASSERT(0 <= i && i < e->res_range_count); + return _upb_EnumReservedRange_At(e->res_ranges, i); } -const char* _upb_FileDef_RawPackage(const upb_FileDef* f) { return f->package; } - -upb_Syntax upb_FileDef_Syntax(const upb_FileDef* f) { return f->syntax; } +int upb_EnumDef_ReservedNameCount(const upb_EnumDef* e) { + return e->res_name_count; +} -int upb_FileDef_TopLevelMessageCount(const upb_FileDef* f) { - return f->top_lvl_msg_count; +upb_StringView upb_EnumDef_ReservedName(const upb_EnumDef* e, int i) { + UPB_ASSERT(0 <= i && i < e->res_name_count); + return e->res_names[i]; } -int upb_FileDef_DependencyCount(const upb_FileDef* f) { return f->dep_count; } +int upb_EnumDef_ValueCount(const upb_EnumDef* e) { return e->value_count; } -int upb_FileDef_PublicDependencyCount(const upb_FileDef* f) { - return f->public_dep_count; +const upb_EnumValueDef* upb_EnumDef_FindValueByName(const upb_EnumDef* e, + const char* name) { + return upb_EnumDef_FindValueByNameWithSize(e, name, strlen(name)); } -int upb_FileDef_WeakDependencyCount(const upb_FileDef* f) { - return f->weak_dep_count; +const upb_EnumValueDef* upb_EnumDef_FindValueByNameWithSize( + const upb_EnumDef* e, const char* name, size_t size) { + upb_value v; + return upb_strtable_lookup2(&e->ntoi, name, size, &v) + ? upb_value_getconstptr(v) + : NULL; } -const int32_t* _upb_FileDef_PublicDependencyIndexes(const upb_FileDef* f) { - return f->public_deps; +const upb_EnumValueDef* upb_EnumDef_FindValueByNumber(const upb_EnumDef* e, + int32_t num) { + upb_value v; + return upb_inttable_lookup(&e->iton, num, &v) ? upb_value_getconstptr(v) + : NULL; } -const int32_t* _upb_FileDef_WeakDependencyIndexes(const upb_FileDef* f) { - return f->weak_deps; +bool upb_EnumDef_CheckNumber(const upb_EnumDef* e, int32_t num) { + // We could use upb_EnumDef_FindValueByNumber(e, num) != NULL, but we expect + // this to be faster (especially for small numbers). + return upb_MiniTableEnum_CheckValue(e->layout, num); } -int upb_FileDef_TopLevelEnumCount(const upb_FileDef* f) { - return f->top_lvl_enum_count; +const upb_EnumValueDef* upb_EnumDef_Value(const upb_EnumDef* e, int i) { + UPB_ASSERT(0 <= i && i < e->value_count); + return _upb_EnumValueDef_At(e->values, i); } -int upb_FileDef_TopLevelExtensionCount(const upb_FileDef* f) { - return f->top_lvl_ext_count; -} +bool upb_EnumDef_IsClosed(const upb_EnumDef* e) { return e->is_closed; } -int upb_FileDef_ServiceCount(const upb_FileDef* f) { return f->service_count; } +bool upb_EnumDef_MiniDescriptorEncode(const upb_EnumDef* e, upb_Arena* a, + upb_StringView* out) { + upb_DescState s; + _upb_DescState_Init(&s); -const upb_FileDef* upb_FileDef_Dependency(const upb_FileDef* f, int i) { - UPB_ASSERT(0 <= i && i < f->dep_count); - return f->deps[i]; -} + const upb_EnumValueDef** sorted = NULL; + if (!e->is_sorted) { + sorted = _upb_EnumValueDefs_Sorted(e->values, e->value_count, a); + if (!sorted) return false; + } -const upb_FileDef* upb_FileDef_PublicDependency(const upb_FileDef* f, int i) { - UPB_ASSERT(0 <= i && i < f->public_dep_count); - return f->deps[f->public_deps[i]]; -} + if (!_upb_DescState_Grow(&s, a)) return false; + s.ptr = upb_MtDataEncoder_StartEnum(&s.e, s.ptr); -const upb_FileDef* upb_FileDef_WeakDependency(const upb_FileDef* f, int i) { - UPB_ASSERT(0 <= i && i < f->public_dep_count); - return f->deps[f->weak_deps[i]]; -} + // Duplicate values are allowed but we only encode each value once. + uint32_t previous = 0; -const upb_MessageDef* upb_FileDef_TopLevelMessage(const upb_FileDef* f, int i) { - UPB_ASSERT(0 <= i && i < f->top_lvl_msg_count); - return _upb_MessageDef_At(f->top_lvl_msgs, i); -} + for (int i = 0; i < e->value_count; i++) { + const uint32_t current = + upb_EnumValueDef_Number(sorted ? sorted[i] : upb_EnumDef_Value(e, i)); + if (i != 0 && previous == current) continue; -const upb_EnumDef* upb_FileDef_TopLevelEnum(const upb_FileDef* f, int i) { - UPB_ASSERT(0 <= i && i < f->top_lvl_enum_count); - return _upb_EnumDef_At(f->top_lvl_enums, i); -} + if (!_upb_DescState_Grow(&s, a)) return false; + s.ptr = upb_MtDataEncoder_PutEnumValue(&s.e, s.ptr, current); + previous = current; + } -const upb_FieldDef* upb_FileDef_TopLevelExtension(const upb_FileDef* f, int i) { - UPB_ASSERT(0 <= i && i < f->top_lvl_ext_count); - return _upb_FieldDef_At(f->top_lvl_exts, i); -} + if (!_upb_DescState_Grow(&s, a)) return false; + s.ptr = upb_MtDataEncoder_EndEnum(&s.e, s.ptr); -const upb_ServiceDef* upb_FileDef_Service(const upb_FileDef* f, int i) { - UPB_ASSERT(0 <= i && i < f->service_count); - return _upb_ServiceDef_At(f->services, i); + // There will always be room for this '\0' in the encoder buffer because + // kUpb_MtDataEncoder_MinSize is overkill for upb_MtDataEncoder_EndEnum(). + UPB_ASSERT(s.ptr < s.buf + s.bufsize); + *s.ptr = '\0'; + + out->data = s.buf; + out->size = s.ptr - s.buf; + return true; } -const upb_DefPool* upb_FileDef_Pool(const upb_FileDef* f) { return f->symtab; } +static upb_MiniTableEnum* create_enumlayout(upb_DefBuilder* ctx, + const upb_EnumDef* e) { + upb_StringView sv; + bool ok = upb_EnumDef_MiniDescriptorEncode(e, ctx->tmp_arena, &sv); + if (!ok) _upb_DefBuilder_Errf(ctx, "OOM while building enum MiniDescriptor"); -const upb_MiniTableExtension* _upb_FileDef_ExtensionMiniTable( - const upb_FileDef* f, int i) { - return f->ext_layouts[i]; + upb_Status status; + upb_MiniTableEnum* layout = + upb_MiniTableEnum_Build(sv.data, sv.size, ctx->arena, &status); + if (!layout) + _upb_DefBuilder_Errf(ctx, "Error building enum MiniTable: %s", status.msg); + return layout; } -static char* strviewdup(upb_DefBuilder* ctx, upb_StringView view) { - char* ret = upb_strdup2(view.data, view.size, _upb_DefBuilder_Arena(ctx)); - if (!ret) _upb_DefBuilder_OomErr(ctx); - return ret; +static upb_StringView* _upb_EnumReservedNames_New( + upb_DefBuilder* ctx, int n, const upb_StringView* protos) { + upb_StringView* sv = _upb_DefBuilder_Alloc(ctx, sizeof(upb_StringView) * n); + for (int i = 0; i < n; i++) { + sv[i].data = + upb_strdup2(protos[i].data, protos[i].size, _upb_DefBuilder_Arena(ctx)); + sv[i].size = protos[i].size; + } + return sv; } -static bool streql_view(upb_StringView view, const char* b) { - return view.size == strlen(b) && memcmp(view.data, b, view.size) == 0; -} +static void create_enumdef(upb_DefBuilder* ctx, const char* prefix, + const UPB_DESC(EnumDescriptorProto) * enum_proto, + upb_EnumDef* e) { + const UPB_DESC(EnumValueDescriptorProto)* const* values; + const UPB_DESC(EnumDescriptorProto_EnumReservedRange)* const* res_ranges; + const upb_StringView* res_names; + upb_StringView name; + size_t n_value, n_res_range, n_res_name; -static int count_exts_in_msg(const UPB_DESC(DescriptorProto) * msg_proto) { - size_t n; - UPB_DESC(DescriptorProto_extension)(msg_proto, &n); - int ext_count = n; + // Must happen before _upb_DefBuilder_Add() + e->file = _upb_DefBuilder_File(ctx); - const UPB_DESC(DescriptorProto)* const* nested_msgs = - UPB_DESC(DescriptorProto_nested_type)(msg_proto, &n); - for (size_t i = 0; i < n; i++) { - ext_count += count_exts_in_msg(nested_msgs[i]); - } + name = UPB_DESC(EnumDescriptorProto_name)(enum_proto); - return ext_count; -} + e->full_name = _upb_DefBuilder_MakeFullName(ctx, prefix, name); + _upb_DefBuilder_Add(ctx, e->full_name, + _upb_DefType_Pack(e, UPB_DEFTYPE_ENUM)); -// Allocate and initialize one file def, and add it to the context object. -void _upb_FileDef_Create(upb_DefBuilder* ctx, - const UPB_DESC(FileDescriptorProto) * file_proto) { - upb_FileDef* file = _upb_DefBuilder_Alloc(ctx, sizeof(upb_FileDef)); - ctx->file = file; + e->is_closed = (!UPB_TREAT_PROTO2_ENUMS_LIKE_PROTO3) && + (upb_FileDef_Syntax(e->file) == kUpb_Syntax_Proto2); - const UPB_DESC(DescriptorProto)* const* msgs; - const UPB_DESC(EnumDescriptorProto)* const* enums; - const UPB_DESC(FieldDescriptorProto)* const* exts; - const UPB_DESC(ServiceDescriptorProto)* const* services; - const upb_StringView* strs; - const int32_t* public_deps; - const int32_t* weak_deps; - size_t n; + values = UPB_DESC(EnumDescriptorProto_value)(enum_proto, &n_value); - file->symtab = ctx->symtab; + bool ok = upb_strtable_init(&e->ntoi, n_value, ctx->arena); + if (!ok) _upb_DefBuilder_OomErr(ctx); - // Count all extensions in the file, to build a flat array of layouts. - UPB_DESC(FileDescriptorProto_extension)(file_proto, &n); - int ext_count = n; - msgs = UPB_DESC(FileDescriptorProto_message_type)(file_proto, &n); - for (int i = 0; i < n; i++) { - ext_count += count_exts_in_msg(msgs[i]); - } - file->ext_count = ext_count; + ok = upb_inttable_init(&e->iton, ctx->arena); + if (!ok) _upb_DefBuilder_OomErr(ctx); - if (ctx->layout) { - // We are using the ext layouts that were passed in. - file->ext_layouts = ctx->layout->exts; - if (ctx->layout->ext_count != file->ext_count) { - _upb_DefBuilder_Errf(ctx, - "Extension count did not match layout (%d vs %d)", - ctx->layout->ext_count, file->ext_count); - } - } else { - // We are building ext layouts from scratch. - file->ext_layouts = _upb_DefBuilder_Alloc( - ctx, sizeof(*file->ext_layouts) * file->ext_count); - upb_MiniTableExtension* ext = - _upb_DefBuilder_Alloc(ctx, sizeof(*ext) * file->ext_count); - for (int i = 0; i < file->ext_count; i++) { - file->ext_layouts[i] = &ext[i]; - } - } + e->defaultval = 0; + e->value_count = n_value; + e->values = + _upb_EnumValueDefs_New(ctx, prefix, n_value, values, e, &e->is_sorted); - upb_StringView name = UPB_DESC(FileDescriptorProto_name)(file_proto); - file->name = strviewdup(ctx, name); - if (strlen(file->name) != name.size) { - _upb_DefBuilder_Errf(ctx, "File name contained embedded NULL"); + if (n_value == 0) { + _upb_DefBuilder_Errf(ctx, "enums must contain at least one value (%s)", + e->full_name); } - upb_StringView package = UPB_DESC(FileDescriptorProto_package)(file_proto); - - if (package.size) { - _upb_DefBuilder_CheckIdentFull(ctx, package); - file->package = strviewdup(ctx, package); - } else { - file->package = NULL; - } + res_ranges = + UPB_DESC(EnumDescriptorProto_reserved_range)(enum_proto, &n_res_range); + e->res_range_count = n_res_range; + e->res_ranges = _upb_EnumReservedRanges_New(ctx, n_res_range, res_ranges, e); - upb_StringView edition = UPB_DESC(FileDescriptorProto_edition)(file_proto); + res_names = + UPB_DESC(EnumDescriptorProto_reserved_name)(enum_proto, &n_res_name); + e->res_name_count = n_res_name; + e->res_names = _upb_EnumReservedNames_New(ctx, n_res_name, res_names); - if (edition.size == 0) { - file->edition = NULL; - } else { - // TODO(b/267770604): How should we validate this? - file->edition = strviewdup(ctx, edition); - if (strlen(file->edition) != edition.size) { - _upb_DefBuilder_Errf(ctx, "Edition name contained embedded NULL"); - } - } + UPB_DEF_SET_OPTIONS(e->opts, EnumDescriptorProto, EnumOptions, enum_proto); - if (UPB_DESC(FileDescriptorProto_has_syntax)(file_proto)) { - upb_StringView syntax = UPB_DESC(FileDescriptorProto_syntax)(file_proto); + upb_inttable_compact(&e->iton, ctx->arena); - if (streql_view(syntax, "proto2")) { - file->syntax = kUpb_Syntax_Proto2; - } else if (streql_view(syntax, "proto3")) { - file->syntax = kUpb_Syntax_Proto3; + if (e->is_closed) { + if (ctx->layout) { + UPB_ASSERT(ctx->enum_count < ctx->layout->enum_count); + e->layout = ctx->layout->enums[ctx->enum_count++]; } else { - _upb_DefBuilder_Errf(ctx, "Invalid syntax '" UPB_STRINGVIEW_FORMAT "'", - UPB_STRINGVIEW_ARGS(syntax)); + e->layout = create_enumlayout(ctx, e); } } else { - file->syntax = kUpb_Syntax_Proto2; + e->layout = NULL; } +} - // Read options. - UPB_DEF_SET_OPTIONS(file->opts, FileDescriptorProto, FileOptions, file_proto); +upb_EnumDef* _upb_EnumDefs_New( + upb_DefBuilder* ctx, int n, + const UPB_DESC(EnumDescriptorProto) * const* protos, + const upb_MessageDef* containing_type) { + _upb_DefType_CheckPadding(sizeof(upb_EnumDef)); - // Verify dependencies. - strs = UPB_DESC(FileDescriptorProto_dependency)(file_proto, &n); - file->dep_count = n; - file->deps = _upb_DefBuilder_Alloc(ctx, sizeof(*file->deps) * n); + // If a containing type is defined then get the full name from that. + // Otherwise use the package name from the file def. + const char* name = containing_type ? upb_MessageDef_FullName(containing_type) + : _upb_FileDef_RawPackage(ctx->file); - for (size_t i = 0; i < n; i++) { - upb_StringView str = strs[i]; - file->deps[i] = - upb_DefPool_FindFileByNameWithSize(ctx->symtab, str.data, str.size); - if (!file->deps[i]) { - _upb_DefBuilder_Errf(ctx, - "Depends on file '" UPB_STRINGVIEW_FORMAT - "', but it has not been loaded", - UPB_STRINGVIEW_ARGS(str)); - } + upb_EnumDef* e = _upb_DefBuilder_Alloc(ctx, sizeof(upb_EnumDef) * n); + for (int i = 0; i < n; i++) { + create_enumdef(ctx, name, protos[i], &e[i]); + e[i].containing_type = containing_type; } + return e; +} - public_deps = UPB_DESC(FileDescriptorProto_public_dependency)(file_proto, &n); - file->public_dep_count = n; - file->public_deps = - _upb_DefBuilder_Alloc(ctx, sizeof(*file->public_deps) * n); - int32_t* mutable_public_deps = (int32_t*)file->public_deps; - for (size_t i = 0; i < n; i++) { - if (public_deps[i] >= file->dep_count) { - _upb_DefBuilder_Errf(ctx, "public_dep %d is out of range", - (int)public_deps[i]); - } - mutable_public_deps[i] = public_deps[i]; - } - weak_deps = UPB_DESC(FileDescriptorProto_weak_dependency)(file_proto, &n); - file->weak_dep_count = n; - file->weak_deps = _upb_DefBuilder_Alloc(ctx, sizeof(*file->weak_deps) * n); - int32_t* mutable_weak_deps = (int32_t*)file->weak_deps; - for (size_t i = 0; i < n; i++) { - if (weak_deps[i] >= file->dep_count) { - _upb_DefBuilder_Errf(ctx, "weak_dep %d is out of range", - (int)weak_deps[i]); - } - mutable_weak_deps[i] = weak_deps[i]; - } - // Create enums. - enums = UPB_DESC(FileDescriptorProto_enum_type)(file_proto, &n); - file->top_lvl_enum_count = n; - file->top_lvl_enums = _upb_EnumDefs_New(ctx, n, enums, NULL); +// Must be last. - // Create extensions. - exts = UPB_DESC(FileDescriptorProto_extension)(file_proto, &n); - file->top_lvl_ext_count = n; - file->top_lvl_exts = _upb_Extensions_New(ctx, n, exts, file->package, NULL); +struct upb_EnumReservedRange { + int32_t start; + int32_t end; +}; - // Create messages. - msgs = UPB_DESC(FileDescriptorProto_message_type)(file_proto, &n); - file->top_lvl_msg_count = n; - file->top_lvl_msgs = _upb_MessageDefs_New(ctx, n, msgs, NULL); +upb_EnumReservedRange* _upb_EnumReservedRange_At(const upb_EnumReservedRange* r, + int i) { + return (upb_EnumReservedRange*)&r[i]; +} - // Create services. - services = UPB_DESC(FileDescriptorProto_service)(file_proto, &n); - file->service_count = n; - file->services = _upb_ServiceDefs_New(ctx, n, services); - - // Now that all names are in the table, build layouts and resolve refs. +int32_t upb_EnumReservedRange_Start(const upb_EnumReservedRange* r) { + return r->start; +} +int32_t upb_EnumReservedRange_End(const upb_EnumReservedRange* r) { + return r->end; +} - for (int i = 0; i < file->top_lvl_msg_count; i++) { - upb_MessageDef* m = (upb_MessageDef*)upb_FileDef_TopLevelMessage(file, i); - _upb_MessageDef_Resolve(ctx, m); - } +upb_EnumReservedRange* _upb_EnumReservedRanges_New( + upb_DefBuilder* ctx, int n, + const UPB_DESC(EnumDescriptorProto_EnumReservedRange) * const* protos, + const upb_EnumDef* e) { + upb_EnumReservedRange* r = + _upb_DefBuilder_Alloc(ctx, sizeof(upb_EnumReservedRange) * n); - for (int i = 0; i < file->top_lvl_ext_count; i++) { - upb_FieldDef* f = (upb_FieldDef*)upb_FileDef_TopLevelExtension(file, i); - _upb_FieldDef_Resolve(ctx, file->package, f); - } + for (int i = 0; i < n; i++) { + const int32_t start = + UPB_DESC(EnumDescriptorProto_EnumReservedRange_start)(protos[i]); + const int32_t end = + UPB_DESC(EnumDescriptorProto_EnumReservedRange_end)(protos[i]); - for (int i = 0; i < file->top_lvl_msg_count; i++) { - upb_MessageDef* m = (upb_MessageDef*)upb_FileDef_TopLevelMessage(file, i); - _upb_MessageDef_CreateMiniTable(ctx, (upb_MessageDef*)m); - } + // A full validation would also check that each range is disjoint, and that + // none of the fields overlap with the extension ranges, but we are just + // sanity checking here. - for (int i = 0; i < file->top_lvl_ext_count; i++) { - upb_FieldDef* f = (upb_FieldDef*)upb_FileDef_TopLevelExtension(file, i); - _upb_FieldDef_BuildMiniTableExtension(ctx, f); - } + // Note: Not a typo! Unlike extension ranges and message reserved ranges, + // the end value of an enum reserved range is *inclusive*! + if (end < start) { + _upb_DefBuilder_Errf(ctx, "Reserved range (%d, %d) is invalid, enum=%s\n", + (int)start, (int)end, upb_EnumDef_FullName(e)); + } - for (int i = 0; i < file->top_lvl_msg_count; i++) { - upb_MessageDef* m = (upb_MessageDef*)upb_FileDef_TopLevelMessage(file, i); - _upb_MessageDef_LinkMiniTable(ctx, m); + r[i].start = start; + r[i].end = end; } - if (file->ext_count) { - bool ok = upb_ExtensionRegistry_AddArray( - _upb_DefPool_ExtReg(ctx->symtab), file->ext_layouts, file->ext_count); - if (!ok) _upb_DefBuilder_OomErr(ctx); - } + return r; } -#include - // Must be last. -bool upb_Message_HasFieldByDef(const upb_Message* msg, const upb_FieldDef* f) { - UPB_ASSERT(upb_FieldDef_HasPresence(f)); - return upb_Message_HasField(msg, upb_FieldDef_MiniTable(f)); -} +struct upb_EnumValueDef { + const UPB_DESC(EnumValueOptions) * opts; + const upb_EnumDef* parent; + const char* full_name; + int32_t number; +}; -const upb_FieldDef* upb_Message_WhichOneof(const upb_Message* msg, - const upb_OneofDef* o) { - const upb_FieldDef* f = upb_OneofDef_Field(o, 0); - if (upb_OneofDef_IsSynthetic(o)) { - UPB_ASSERT(upb_OneofDef_FieldCount(o) == 1); - return upb_Message_HasFieldByDef(msg, f) ? f : NULL; - } else { - const upb_MiniTableField* field = upb_FieldDef_MiniTable(f); - uint32_t oneof_case = upb_Message_WhichOneofFieldNumber(msg, field); - f = oneof_case ? upb_OneofDef_LookupNumber(o, oneof_case) : NULL; - UPB_ASSERT((f != NULL) == (oneof_case != 0)); - return f; - } +upb_EnumValueDef* _upb_EnumValueDef_At(const upb_EnumValueDef* v, int i) { + return (upb_EnumValueDef*)&v[i]; } -upb_MessageValue upb_Message_GetFieldByDef(const upb_Message* msg, - const upb_FieldDef* f) { - upb_MessageValue default_val = upb_FieldDef_Default(f); - upb_MessageValue ret; - _upb_Message_GetField(msg, upb_FieldDef_MiniTable(f), &default_val, &ret); - return ret; +static int _upb_EnumValueDef_Compare(const void* p1, const void* p2) { + const uint32_t v1 = (*(const upb_EnumValueDef**)p1)->number; + const uint32_t v2 = (*(const upb_EnumValueDef**)p2)->number; + return (v1 < v2) ? -1 : (v1 > v2); } -upb_MutableMessageValue upb_Message_Mutable(upb_Message* msg, - const upb_FieldDef* f, - upb_Arena* a) { - UPB_ASSERT(upb_FieldDef_IsSubMessage(f) || upb_FieldDef_IsRepeated(f)); - if (upb_FieldDef_HasPresence(f) && !upb_Message_HasFieldByDef(msg, f)) { - // We need to skip the upb_Message_GetFieldByDef() call in this case. - goto make; - } +const upb_EnumValueDef** _upb_EnumValueDefs_Sorted(const upb_EnumValueDef* v, + int n, upb_Arena* a) { + // TODO: Try to replace this arena alloc with a persistent scratch buffer. + upb_EnumValueDef** out = + (upb_EnumValueDef**)upb_Arena_Malloc(a, n * sizeof(void*)); + if (!out) return NULL; - upb_MessageValue val = upb_Message_GetFieldByDef(msg, f); - if (val.array_val) { - return (upb_MutableMessageValue){.array = (upb_Array*)val.array_val}; + for (int i = 0; i < n; i++) { + out[i] = (upb_EnumValueDef*)&v[i]; } + qsort(out, n, sizeof(void*), _upb_EnumValueDef_Compare); - upb_MutableMessageValue ret; -make: - if (!a) return (upb_MutableMessageValue){.array = NULL}; - if (upb_FieldDef_IsMap(f)) { - const upb_MessageDef* entry = upb_FieldDef_MessageSubDef(f); - const upb_FieldDef* key = - upb_MessageDef_FindFieldByNumber(entry, kUpb_MapEntry_KeyFieldNumber); - const upb_FieldDef* value = - upb_MessageDef_FindFieldByNumber(entry, kUpb_MapEntry_ValueFieldNumber); - ret.map = - upb_Map_New(a, upb_FieldDef_CType(key), upb_FieldDef_CType(value)); - } else if (upb_FieldDef_IsRepeated(f)) { - ret.array = upb_Array_New(a, upb_FieldDef_CType(f)); - } else { - UPB_ASSERT(upb_FieldDef_IsSubMessage(f)); - const upb_MessageDef* m = upb_FieldDef_MessageSubDef(f); - ret.msg = upb_Message_New(upb_MessageDef_MiniTable(m), a); - } + return (const upb_EnumValueDef**)out; +} - val.array_val = ret.array; - upb_Message_SetFieldByDef(msg, f, val, a); +const UPB_DESC(EnumValueOptions) * + upb_EnumValueDef_Options(const upb_EnumValueDef* v) { + return v->opts; +} - return ret; +bool upb_EnumValueDef_HasOptions(const upb_EnumValueDef* v) { + return v->opts != (void*)kUpbDefOptDefault; } -bool upb_Message_SetFieldByDef(upb_Message* msg, const upb_FieldDef* f, - upb_MessageValue val, upb_Arena* a) { - return _upb_Message_SetField(msg, upb_FieldDef_MiniTable(f), &val, a); +const upb_EnumDef* upb_EnumValueDef_Enum(const upb_EnumValueDef* v) { + return v->parent; } -void upb_Message_ClearFieldByDef(upb_Message* msg, const upb_FieldDef* f) { - upb_Message_ClearField(msg, upb_FieldDef_MiniTable(f)); +const char* upb_EnumValueDef_FullName(const upb_EnumValueDef* v) { + return v->full_name; } -void upb_Message_ClearByDef(upb_Message* msg, const upb_MessageDef* m) { - upb_Message_Clear(msg, upb_MessageDef_MiniTable(m)); +const char* upb_EnumValueDef_Name(const upb_EnumValueDef* v) { + return _upb_DefBuilder_FullToShort(v->full_name); } -bool upb_Message_Next(const upb_Message* msg, const upb_MessageDef* m, - const upb_DefPool* ext_pool, const upb_FieldDef** out_f, - upb_MessageValue* out_val, size_t* iter) { - size_t i = *iter; - size_t n = upb_MessageDef_FieldCount(m); - UPB_UNUSED(ext_pool); +int32_t upb_EnumValueDef_Number(const upb_EnumValueDef* v) { return v->number; } - // Iterate over normal fields, returning the first one that is set. - while (++i < n) { - const upb_FieldDef* f = upb_MessageDef_Field(m, i); - const upb_MiniTableField* field = upb_FieldDef_MiniTable(f); - upb_MessageValue val = upb_Message_GetFieldByDef(msg, f); +uint32_t upb_EnumValueDef_Index(const upb_EnumValueDef* v) { + // Compute index in our parent's array. + return v - upb_EnumDef_Value(v->parent, 0); +} - // Skip field if unset or empty. - if (upb_MiniTableField_HasPresence(field)) { - if (!upb_Message_HasFieldByDef(msg, f)) continue; - } else { - switch (upb_FieldMode_Get(field)) { - case kUpb_FieldMode_Map: - if (!val.map_val || upb_Map_Size(val.map_val) == 0) continue; - break; - case kUpb_FieldMode_Array: - if (!val.array_val || upb_Array_Size(val.array_val) == 0) continue; - break; - case kUpb_FieldMode_Scalar: - if (!_upb_MiniTable_ValueIsNonZero(&val, field)) continue; - break; - } - } +static void create_enumvaldef(upb_DefBuilder* ctx, const char* prefix, + const UPB_DESC(EnumValueDescriptorProto) * + val_proto, + upb_EnumDef* e, upb_EnumValueDef* v) { + upb_StringView name = UPB_DESC(EnumValueDescriptorProto_name)(val_proto); - *out_val = val; - *out_f = f; - *iter = i; - return true; + v->parent = e; // Must happen prior to _upb_DefBuilder_Add() + v->full_name = _upb_DefBuilder_MakeFullName(ctx, prefix, name); + v->number = UPB_DESC(EnumValueDescriptorProto_number)(val_proto); + _upb_DefBuilder_Add(ctx, v->full_name, + _upb_DefType_Pack(v, UPB_DEFTYPE_ENUMVAL)); + + UPB_DEF_SET_OPTIONS(v->opts, EnumValueDescriptorProto, EnumValueOptions, + val_proto); + + bool ok = _upb_EnumDef_Insert(e, v, ctx->arena); + if (!ok) _upb_DefBuilder_OomErr(ctx); +} + +// Allocate and initialize an array of |n| enum value defs owned by |e|. +upb_EnumValueDef* _upb_EnumValueDefs_New( + upb_DefBuilder* ctx, const char* prefix, int n, + const UPB_DESC(EnumValueDescriptorProto) * const* protos, upb_EnumDef* e, + bool* is_sorted) { + _upb_DefType_CheckPadding(sizeof(upb_EnumValueDef)); + + upb_EnumValueDef* v = + _upb_DefBuilder_Alloc(ctx, sizeof(upb_EnumValueDef) * n); + + *is_sorted = true; + uint32_t previous = 0; + for (int i = 0; i < n; i++) { + create_enumvaldef(ctx, prefix, protos[i], e, &v[i]); + + const uint32_t current = v[i].number; + if (previous > current) *is_sorted = false; + previous = current; } - if (ext_pool) { - // Return any extensions that are set. - size_t count; - const upb_Message_Extension* ext = _upb_Message_Getexts(msg, &count); - if (i - n < count) { - ext += count - 1 - (i - n); - memcpy(out_val, &ext->data, sizeof(*out_val)); - *out_f = upb_DefPool_FindExtensionByMiniTable(ext_pool, ext->ext); - *iter = i; - return true; - } + if (upb_FileDef_Syntax(ctx->file) == kUpb_Syntax_Proto3 && n > 0 && + v[0].number != 0) { + _upb_DefBuilder_Errf(ctx, + "for proto3, the first enum value must be zero (%s)", + upb_EnumDef_FullName(e)); } - *iter = i; - return false; + return v; } -bool _upb_Message_DiscardUnknown(upb_Message* msg, const upb_MessageDef* m, - int depth) { - size_t iter = kUpb_Message_Begin; - const upb_FieldDef* f; - upb_MessageValue val; - bool ret = true; - if (--depth == 0) return false; - _upb_Message_DiscardUnknown_shallow(msg); +// Must be last. - while (upb_Message_Next(msg, m, NULL /*ext_pool*/, &f, &val, &iter)) { - const upb_MessageDef* subm = upb_FieldDef_MessageSubDef(f); - if (!subm) continue; - if (upb_FieldDef_IsMap(f)) { - const upb_FieldDef* val_f = upb_MessageDef_FindFieldByNumber(subm, 2); - const upb_MessageDef* val_m = upb_FieldDef_MessageSubDef(val_f); - upb_Map* map = (upb_Map*)val.map_val; - size_t iter = kUpb_Map_Begin; - - if (!val_m) continue; - - upb_MessageValue map_key, map_val; - while (upb_Map_Next(map, &map_key, &map_val, &iter)) { - if (!_upb_Message_DiscardUnknown((upb_Message*)map_val.msg_val, val_m, - depth)) { - ret = false; - } - } - } else if (upb_FieldDef_IsRepeated(f)) { - const upb_Array* arr = val.array_val; - size_t i, n = upb_Array_Size(arr); - for (i = 0; i < n; i++) { - upb_MessageValue elem = upb_Array_Get(arr, i); - if (!_upb_Message_DiscardUnknown((upb_Message*)elem.msg_val, subm, - depth)) { - ret = false; - } - } - } else { - if (!_upb_Message_DiscardUnknown((upb_Message*)val.msg_val, subm, - depth)) { - ret = false; - } - } - } +struct upb_ExtensionRange { + const UPB_DESC(ExtensionRangeOptions) * opts; + int32_t start; + int32_t end; +}; - return ret; +upb_ExtensionRange* _upb_ExtensionRange_At(const upb_ExtensionRange* r, int i) { + return (upb_ExtensionRange*)&r[i]; } -bool upb_Message_DiscardUnknown(upb_Message* msg, const upb_MessageDef* m, - int maxdepth) { - return _upb_Message_DiscardUnknown(msg, m, maxdepth); +const UPB_DESC(ExtensionRangeOptions) * + upb_ExtensionRange_Options(const upb_ExtensionRange* r) { + return r->opts; } +bool upb_ExtensionRange_HasOptions(const upb_ExtensionRange* r) { + return r->opts != (void*)kUpbDefOptDefault; +} -// Must be last. +int32_t upb_ExtensionRange_Start(const upb_ExtensionRange* r) { + return r->start; +} -struct upb_MessageDef { - const UPB_DESC(MessageOptions) * opts; - const upb_MiniTable* layout; - const upb_FileDef* file; - const upb_MessageDef* containing_type; - const char* full_name; +int32_t upb_ExtensionRange_End(const upb_ExtensionRange* r) { return r->end; } - // Tables for looking up fields by number and name. - upb_inttable itof; - upb_strtable ntof; +upb_ExtensionRange* _upb_ExtensionRanges_New( + upb_DefBuilder* ctx, int n, + const UPB_DESC(DescriptorProto_ExtensionRange) * const* protos, + const upb_MessageDef* m) { + upb_ExtensionRange* r = + _upb_DefBuilder_Alloc(ctx, sizeof(upb_ExtensionRange) * n); - /* All nested defs. - * MEM: We could save some space here by putting nested defs in a contiguous - * region and calculating counts from offsets or vice-versa. */ - const upb_FieldDef* fields; - const upb_OneofDef* oneofs; - const upb_ExtensionRange* ext_ranges; - const upb_StringView* res_names; - const upb_MessageDef* nested_msgs; - const upb_MessageReservedRange* res_ranges; - const upb_EnumDef* nested_enums; - const upb_FieldDef* nested_exts; + for (int i = 0; i < n; i++) { + const int32_t start = + UPB_DESC(DescriptorProto_ExtensionRange_start)(protos[i]); + const int32_t end = UPB_DESC(DescriptorProto_ExtensionRange_end)(protos[i]); + const int32_t max = UPB_DESC(MessageOptions_message_set_wire_format)( + upb_MessageDef_Options(m)) + ? INT32_MAX + : kUpb_MaxFieldNumber + 1; - // TODO(salo): These counters don't need anywhere near 32 bits. - int field_count; - int real_oneof_count; - int oneof_count; - int ext_range_count; - int res_range_count; - int res_name_count; - int nested_msg_count; - int nested_enum_count; - int nested_ext_count; - bool in_message_set; - bool is_sorted; - upb_WellKnown well_known_type; -#if UINTPTR_MAX == 0xffffffff - uint32_t padding; // Increase size to a multiple of 8. -#endif -}; + // A full validation would also check that each range is disjoint, and that + // none of the fields overlap with the extension ranges, but we are just + // sanity checking here. + if (start < 1 || end <= start || end > max) { + _upb_DefBuilder_Errf(ctx, + "Extension range (%d, %d) is invalid, message=%s\n", + (int)start, (int)end, upb_MessageDef_FullName(m)); + } -static void assign_msg_wellknowntype(upb_MessageDef* m) { - const char* name = m->full_name; - if (name == NULL) { - m->well_known_type = kUpb_WellKnown_Unspecified; - return; - } - if (!strcmp(name, "google.protobuf.Any")) { - m->well_known_type = kUpb_WellKnown_Any; - } else if (!strcmp(name, "google.protobuf.FieldMask")) { - m->well_known_type = kUpb_WellKnown_FieldMask; - } else if (!strcmp(name, "google.protobuf.Duration")) { - m->well_known_type = kUpb_WellKnown_Duration; - } else if (!strcmp(name, "google.protobuf.Timestamp")) { - m->well_known_type = kUpb_WellKnown_Timestamp; - } else if (!strcmp(name, "google.protobuf.DoubleValue")) { - m->well_known_type = kUpb_WellKnown_DoubleValue; - } else if (!strcmp(name, "google.protobuf.FloatValue")) { - m->well_known_type = kUpb_WellKnown_FloatValue; - } else if (!strcmp(name, "google.protobuf.Int64Value")) { - m->well_known_type = kUpb_WellKnown_Int64Value; - } else if (!strcmp(name, "google.protobuf.UInt64Value")) { - m->well_known_type = kUpb_WellKnown_UInt64Value; - } else if (!strcmp(name, "google.protobuf.Int32Value")) { - m->well_known_type = kUpb_WellKnown_Int32Value; - } else if (!strcmp(name, "google.protobuf.UInt32Value")) { - m->well_known_type = kUpb_WellKnown_UInt32Value; - } else if (!strcmp(name, "google.protobuf.BoolValue")) { - m->well_known_type = kUpb_WellKnown_BoolValue; - } else if (!strcmp(name, "google.protobuf.StringValue")) { - m->well_known_type = kUpb_WellKnown_StringValue; - } else if (!strcmp(name, "google.protobuf.BytesValue")) { - m->well_known_type = kUpb_WellKnown_BytesValue; - } else if (!strcmp(name, "google.protobuf.Value")) { - m->well_known_type = kUpb_WellKnown_Value; - } else if (!strcmp(name, "google.protobuf.ListValue")) { - m->well_known_type = kUpb_WellKnown_ListValue; - } else if (!strcmp(name, "google.protobuf.Struct")) { - m->well_known_type = kUpb_WellKnown_Struct; - } else { - m->well_known_type = kUpb_WellKnown_Unspecified; + r[i].start = start; + r[i].end = end; + UPB_DEF_SET_OPTIONS(r[i].opts, DescriptorProto_ExtensionRange, + ExtensionRangeOptions, protos[i]); } -} -upb_MessageDef* _upb_MessageDef_At(const upb_MessageDef* m, int i) { - return (upb_MessageDef*)&m[i]; + return r; } -bool _upb_MessageDef_IsValidExtensionNumber(const upb_MessageDef* m, int n) { - for (int i = 0; i < m->ext_range_count; i++) { - const upb_ExtensionRange* r = upb_MessageDef_ExtensionRange(m, i); - if (upb_ExtensionRange_Start(r) <= n && n < upb_ExtensionRange_End(r)) { - return true; - } - } - return false; -} -const UPB_DESC(MessageOptions) * - upb_MessageDef_Options(const upb_MessageDef* m) { - return m->opts; -} +#include +#include -bool upb_MessageDef_HasOptions(const upb_MessageDef* m) { - return m->opts != (void*)kUpbDefOptDefault; -} -const char* upb_MessageDef_FullName(const upb_MessageDef* m) { - return m->full_name; -} +// Must be last. -const upb_FileDef* upb_MessageDef_File(const upb_MessageDef* m) { - return m->file; -} +#define UPB_FIELD_TYPE_UNSPECIFIED 0 -const upb_MessageDef* upb_MessageDef_ContainingType(const upb_MessageDef* m) { - return m->containing_type; +typedef struct { + size_t len; + char str[1]; // Null-terminated string data follows. +} str_t; + +struct upb_FieldDef { + const UPB_DESC(FieldOptions) * opts; + const upb_FileDef* file; + const upb_MessageDef* msgdef; + const char* full_name; + const char* json_name; + union { + int64_t sint; + uint64_t uint; + double dbl; + float flt; + bool boolean; + str_t* str; + void* msg; // Always NULL. + } defaultval; + union { + const upb_OneofDef* oneof; + const upb_MessageDef* extension_scope; + } scope; + union { + const upb_MessageDef* msgdef; + const upb_EnumDef* enumdef; + const UPB_DESC(FieldDescriptorProto) * unresolved; + } sub; + uint32_t number_; + uint16_t index_; + uint16_t layout_index; // Index into msgdef->layout->fields or file->exts + bool has_default; + bool has_json_name; + bool has_presence; + bool is_extension; + bool is_packed; + bool is_proto3_optional; + upb_FieldType type_; + upb_Label label_; +#if UINTPTR_MAX == 0xffffffff + uint32_t padding; // Increase size to a multiple of 8. +#endif +}; + +upb_FieldDef* _upb_FieldDef_At(const upb_FieldDef* f, int i) { + return (upb_FieldDef*)&f[i]; } -const char* upb_MessageDef_Name(const upb_MessageDef* m) { - return _upb_DefBuilder_FullToShort(m->full_name); +const UPB_DESC(FieldOptions) * upb_FieldDef_Options(const upb_FieldDef* f) { + return f->opts; } -upb_Syntax upb_MessageDef_Syntax(const upb_MessageDef* m) { - return upb_FileDef_Syntax(m->file); +bool upb_FieldDef_HasOptions(const upb_FieldDef* f) { + return f->opts != (void*)kUpbDefOptDefault; } -const upb_FieldDef* upb_MessageDef_FindFieldByNumber(const upb_MessageDef* m, - uint32_t i) { - upb_value val; - return upb_inttable_lookup(&m->itof, i, &val) ? upb_value_getconstptr(val) - : NULL; +const char* upb_FieldDef_FullName(const upb_FieldDef* f) { + return f->full_name; } -const upb_FieldDef* upb_MessageDef_FindFieldByNameWithSize( - const upb_MessageDef* m, const char* name, size_t size) { - upb_value val; - - if (!upb_strtable_lookup2(&m->ntof, name, size, &val)) { - return NULL; +upb_CType upb_FieldDef_CType(const upb_FieldDef* f) { + switch (f->type_) { + case kUpb_FieldType_Double: + return kUpb_CType_Double; + case kUpb_FieldType_Float: + return kUpb_CType_Float; + case kUpb_FieldType_Int64: + case kUpb_FieldType_SInt64: + case kUpb_FieldType_SFixed64: + return kUpb_CType_Int64; + case kUpb_FieldType_Int32: + case kUpb_FieldType_SFixed32: + case kUpb_FieldType_SInt32: + return kUpb_CType_Int32; + case kUpb_FieldType_UInt64: + case kUpb_FieldType_Fixed64: + return kUpb_CType_UInt64; + case kUpb_FieldType_UInt32: + case kUpb_FieldType_Fixed32: + return kUpb_CType_UInt32; + case kUpb_FieldType_Enum: + return kUpb_CType_Enum; + case kUpb_FieldType_Bool: + return kUpb_CType_Bool; + case kUpb_FieldType_String: + return kUpb_CType_String; + case kUpb_FieldType_Bytes: + return kUpb_CType_Bytes; + case kUpb_FieldType_Group: + case kUpb_FieldType_Message: + return kUpb_CType_Message; } - - return _upb_DefType_Unpack(val, UPB_DEFTYPE_FIELD); + UPB_UNREACHABLE(); } -const upb_OneofDef* upb_MessageDef_FindOneofByNameWithSize( - const upb_MessageDef* m, const char* name, size_t size) { - upb_value val; +upb_FieldType upb_FieldDef_Type(const upb_FieldDef* f) { return f->type_; } - if (!upb_strtable_lookup2(&m->ntof, name, size, &val)) { - return NULL; - } +uint32_t upb_FieldDef_Index(const upb_FieldDef* f) { return f->index_; } - return _upb_DefType_Unpack(val, UPB_DEFTYPE_ONEOF); -} +upb_Label upb_FieldDef_Label(const upb_FieldDef* f) { return f->label_; } -bool _upb_MessageDef_Insert(upb_MessageDef* m, const char* name, size_t len, - upb_value v, upb_Arena* a) { - return upb_strtable_insert(&m->ntof, name, len, v, a); -} +uint32_t upb_FieldDef_Number(const upb_FieldDef* f) { return f->number_; } -bool upb_MessageDef_FindByNameWithSize(const upb_MessageDef* m, - const char* name, size_t len, - const upb_FieldDef** out_f, - const upb_OneofDef** out_o) { - upb_value val; +bool upb_FieldDef_IsExtension(const upb_FieldDef* f) { return f->is_extension; } - if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) { - return false; - } +bool upb_FieldDef_IsPacked(const upb_FieldDef* f) { return f->is_packed; } - const upb_FieldDef* f = _upb_DefType_Unpack(val, UPB_DEFTYPE_FIELD); - const upb_OneofDef* o = _upb_DefType_Unpack(val, UPB_DEFTYPE_ONEOF); - if (out_f) *out_f = f; - if (out_o) *out_o = o; - return f || o; /* False if this was a JSON name. */ +const char* upb_FieldDef_Name(const upb_FieldDef* f) { + return _upb_DefBuilder_FullToShort(f->full_name); } -const upb_FieldDef* upb_MessageDef_FindByJsonNameWithSize( - const upb_MessageDef* m, const char* name, size_t size) { - upb_value val; - const upb_FieldDef* f; +const char* upb_FieldDef_JsonName(const upb_FieldDef* f) { + return f->json_name; +} - if (!upb_strtable_lookup2(&m->ntof, name, size, &val)) { - return NULL; - } +bool upb_FieldDef_HasJsonName(const upb_FieldDef* f) { + return f->has_json_name; +} - f = _upb_DefType_Unpack(val, UPB_DEFTYPE_FIELD); - if (!f) f = _upb_DefType_Unpack(val, UPB_DEFTYPE_FIELD_JSONNAME); +const upb_FileDef* upb_FieldDef_File(const upb_FieldDef* f) { return f->file; } - return f; +const upb_MessageDef* upb_FieldDef_ContainingType(const upb_FieldDef* f) { + return f->msgdef; } -int upb_MessageDef_ExtensionRangeCount(const upb_MessageDef* m) { - return m->ext_range_count; +const upb_MessageDef* upb_FieldDef_ExtensionScope(const upb_FieldDef* f) { + return f->is_extension ? f->scope.extension_scope : NULL; } -int upb_MessageDef_ReservedRangeCount(const upb_MessageDef* m) { - return m->res_range_count; +const upb_OneofDef* upb_FieldDef_ContainingOneof(const upb_FieldDef* f) { + return f->is_extension ? NULL : f->scope.oneof; } -int upb_MessageDef_ReservedNameCount(const upb_MessageDef* m) { - return m->res_name_count; +const upb_OneofDef* upb_FieldDef_RealContainingOneof(const upb_FieldDef* f) { + const upb_OneofDef* oneof = upb_FieldDef_ContainingOneof(f); + if (!oneof || upb_OneofDef_IsSynthetic(oneof)) return NULL; + return oneof; } -int upb_MessageDef_FieldCount(const upb_MessageDef* m) { - return m->field_count; -} +upb_MessageValue upb_FieldDef_Default(const upb_FieldDef* f) { + upb_MessageValue ret; -int upb_MessageDef_OneofCount(const upb_MessageDef* m) { - return m->oneof_count; -} + if (upb_FieldDef_IsRepeated(f) || upb_FieldDef_IsSubMessage(f)) { + return (upb_MessageValue){.msg_val = NULL}; + } -int upb_MessageDef_RealOneofCount(const upb_MessageDef* m) { - return m->real_oneof_count; -} + switch (upb_FieldDef_CType(f)) { + case kUpb_CType_Bool: + return (upb_MessageValue){.bool_val = f->defaultval.boolean}; + case kUpb_CType_Int64: + return (upb_MessageValue){.int64_val = f->defaultval.sint}; + case kUpb_CType_UInt64: + return (upb_MessageValue){.uint64_val = f->defaultval.uint}; + case kUpb_CType_Enum: + case kUpb_CType_Int32: + return (upb_MessageValue){.int32_val = (int32_t)f->defaultval.sint}; + case kUpb_CType_UInt32: + return (upb_MessageValue){.uint32_val = (uint32_t)f->defaultval.uint}; + case kUpb_CType_Float: + return (upb_MessageValue){.float_val = f->defaultval.flt}; + case kUpb_CType_Double: + return (upb_MessageValue){.double_val = f->defaultval.dbl}; + case kUpb_CType_String: + case kUpb_CType_Bytes: { + str_t* str = f->defaultval.str; + if (str) { + return (upb_MessageValue){ + .str_val = (upb_StringView){.data = str->str, .size = str->len}}; + } else { + return (upb_MessageValue){ + .str_val = (upb_StringView){.data = NULL, .size = 0}}; + } + } + default: + UPB_UNREACHABLE(); + } -int upb_MessageDef_NestedMessageCount(const upb_MessageDef* m) { - return m->nested_msg_count; + return ret; } -int upb_MessageDef_NestedEnumCount(const upb_MessageDef* m) { - return m->nested_enum_count; +const upb_MessageDef* upb_FieldDef_MessageSubDef(const upb_FieldDef* f) { + return upb_FieldDef_CType(f) == kUpb_CType_Message ? f->sub.msgdef : NULL; } -int upb_MessageDef_NestedExtensionCount(const upb_MessageDef* m) { - return m->nested_ext_count; +const upb_EnumDef* upb_FieldDef_EnumSubDef(const upb_FieldDef* f) { + return upb_FieldDef_CType(f) == kUpb_CType_Enum ? f->sub.enumdef : NULL; } -const upb_MiniTable* upb_MessageDef_MiniTable(const upb_MessageDef* m) { - return m->layout; +const upb_MiniTableField* upb_FieldDef_MiniTable(const upb_FieldDef* f) { + if (upb_FieldDef_IsExtension(f)) { + const upb_FileDef* file = upb_FieldDef_File(f); + return (upb_MiniTableField*)_upb_FileDef_ExtensionMiniTable( + file, f->layout_index); + } else { + const upb_MiniTable* layout = upb_MessageDef_MiniTable(f->msgdef); + return &layout->fields[f->layout_index]; + } } -const upb_ExtensionRange* upb_MessageDef_ExtensionRange(const upb_MessageDef* m, - int i) { - UPB_ASSERT(0 <= i && i < m->ext_range_count); - return _upb_ExtensionRange_At(m->ext_ranges, i); +const upb_MiniTableExtension* _upb_FieldDef_ExtensionMiniTable( + const upb_FieldDef* f) { + UPB_ASSERT(upb_FieldDef_IsExtension(f)); + const upb_FileDef* file = upb_FieldDef_File(f); + return _upb_FileDef_ExtensionMiniTable(file, f->layout_index); } -const upb_MessageReservedRange* upb_MessageDef_ReservedRange( - const upb_MessageDef* m, int i) { - UPB_ASSERT(0 <= i && i < m->res_range_count); - return _upb_MessageReservedRange_At(m->res_ranges, i); +bool _upb_FieldDef_IsClosedEnum(const upb_FieldDef* f) { + if (f->type_ != kUpb_FieldType_Enum) return false; + return upb_EnumDef_IsClosed(f->sub.enumdef); } -upb_StringView upb_MessageDef_ReservedName(const upb_MessageDef* m, int i) { - UPB_ASSERT(0 <= i && i < m->res_name_count); - return m->res_names[i]; +bool _upb_FieldDef_IsProto3Optional(const upb_FieldDef* f) { + return f->is_proto3_optional; } -const upb_FieldDef* upb_MessageDef_Field(const upb_MessageDef* m, int i) { - UPB_ASSERT(0 <= i && i < m->field_count); - return _upb_FieldDef_At(m->fields, i); -} +int _upb_FieldDef_LayoutIndex(const upb_FieldDef* f) { return f->layout_index; } -const upb_OneofDef* upb_MessageDef_Oneof(const upb_MessageDef* m, int i) { - UPB_ASSERT(0 <= i && i < m->oneof_count); - return _upb_OneofDef_At(m->oneofs, i); -} +uint64_t _upb_FieldDef_Modifiers(const upb_FieldDef* f) { + uint64_t out = f->is_packed ? kUpb_FieldModifier_IsPacked : 0; -const upb_MessageDef* upb_MessageDef_NestedMessage(const upb_MessageDef* m, - int i) { - UPB_ASSERT(0 <= i && i < m->nested_msg_count); - return &m->nested_msgs[i]; -} + switch (f->label_) { + case kUpb_Label_Optional: + if (!upb_FieldDef_HasPresence(f)) { + out |= kUpb_FieldModifier_IsProto3Singular; + } + break; + case kUpb_Label_Repeated: + out |= kUpb_FieldModifier_IsRepeated; + break; + case kUpb_Label_Required: + out |= kUpb_FieldModifier_IsRequired; + break; + } -const upb_EnumDef* upb_MessageDef_NestedEnum(const upb_MessageDef* m, int i) { - UPB_ASSERT(0 <= i && i < m->nested_enum_count); - return _upb_EnumDef_At(m->nested_enums, i); + if (_upb_FieldDef_IsClosedEnum(f)) { + out |= kUpb_FieldModifier_IsClosedEnum; + } + return out; } -const upb_FieldDef* upb_MessageDef_NestedExtension(const upb_MessageDef* m, - int i) { - UPB_ASSERT(0 <= i && i < m->nested_ext_count); - return _upb_FieldDef_At(m->nested_exts, i); +bool upb_FieldDef_HasDefault(const upb_FieldDef* f) { return f->has_default; } +bool upb_FieldDef_HasPresence(const upb_FieldDef* f) { return f->has_presence; } + +bool upb_FieldDef_HasSubDef(const upb_FieldDef* f) { + return upb_FieldDef_IsSubMessage(f) || + upb_FieldDef_CType(f) == kUpb_CType_Enum; } -upb_WellKnown upb_MessageDef_WellKnownType(const upb_MessageDef* m) { - return m->well_known_type; +bool upb_FieldDef_IsMap(const upb_FieldDef* f) { + return upb_FieldDef_IsRepeated(f) && upb_FieldDef_IsSubMessage(f) && + upb_MessageDef_IsMapEntry(upb_FieldDef_MessageSubDef(f)); } -bool _upb_MessageDef_InMessageSet(const upb_MessageDef* m) { - return m->in_message_set; +bool upb_FieldDef_IsOptional(const upb_FieldDef* f) { + return upb_FieldDef_Label(f) == kUpb_Label_Optional; } -const upb_FieldDef* upb_MessageDef_FindFieldByName(const upb_MessageDef* m, - const char* name) { - return upb_MessageDef_FindFieldByNameWithSize(m, name, strlen(name)); +bool upb_FieldDef_IsPrimitive(const upb_FieldDef* f) { + return !upb_FieldDef_IsString(f) && !upb_FieldDef_IsSubMessage(f); } -const upb_OneofDef* upb_MessageDef_FindOneofByName(const upb_MessageDef* m, - const char* name) { - return upb_MessageDef_FindOneofByNameWithSize(m, name, strlen(name)); +bool upb_FieldDef_IsRepeated(const upb_FieldDef* f) { + return upb_FieldDef_Label(f) == kUpb_Label_Repeated; } -bool upb_MessageDef_IsMapEntry(const upb_MessageDef* m) { - return UPB_DESC(MessageOptions_map_entry)(m->opts); +bool upb_FieldDef_IsRequired(const upb_FieldDef* f) { + return upb_FieldDef_Label(f) == kUpb_Label_Required; } -bool upb_MessageDef_IsMessageSet(const upb_MessageDef* m) { - return UPB_DESC(MessageOptions_message_set_wire_format)(m->opts); +bool upb_FieldDef_IsString(const upb_FieldDef* f) { + return upb_FieldDef_CType(f) == kUpb_CType_String || + upb_FieldDef_CType(f) == kUpb_CType_Bytes; } -static upb_MiniTable* _upb_MessageDef_MakeMiniTable(upb_DefBuilder* ctx, - const upb_MessageDef* m) { - upb_StringView desc; - // Note: this will assign layout_index for fields, so upb_FieldDef_MiniTable() - // is safe to call only after this call. - bool ok = upb_MessageDef_MiniDescriptorEncode(m, ctx->tmp_arena, &desc); - if (!ok) _upb_DefBuilder_OomErr(ctx); - - void** scratch_data = _upb_DefPool_ScratchData(ctx->symtab); - size_t* scratch_size = _upb_DefPool_ScratchSize(ctx->symtab); - upb_MiniTable* ret = upb_MiniTable_BuildWithBuf( - desc.data, desc.size, ctx->platform, ctx->arena, scratch_data, - scratch_size, ctx->status); - if (!ret) _upb_DefBuilder_FailJmp(ctx); - - return ret; +bool upb_FieldDef_IsSubMessage(const upb_FieldDef* f) { + return upb_FieldDef_CType(f) == kUpb_CType_Message; } -void _upb_MessageDef_Resolve(upb_DefBuilder* ctx, upb_MessageDef* m) { - for (int i = 0; i < m->field_count; i++) { - upb_FieldDef* f = (upb_FieldDef*)upb_MessageDef_Field(m, i); - _upb_FieldDef_Resolve(ctx, m->full_name, f); - } - - m->in_message_set = false; - for (int i = 0; i < upb_MessageDef_NestedExtensionCount(m); i++) { - upb_FieldDef* ext = (upb_FieldDef*)upb_MessageDef_NestedExtension(m, i); - _upb_FieldDef_Resolve(ctx, m->full_name, ext); - if (upb_FieldDef_Type(ext) == kUpb_FieldType_Message && - upb_FieldDef_Label(ext) == kUpb_Label_Optional && - upb_FieldDef_MessageSubDef(ext) == m && - UPB_DESC(MessageOptions_message_set_wire_format)( - upb_MessageDef_Options(upb_FieldDef_ContainingType(ext)))) { - m->in_message_set = true; - } - } - - for (int i = 0; i < upb_MessageDef_NestedMessageCount(m); i++) { - upb_MessageDef* n = (upb_MessageDef*)upb_MessageDef_NestedMessage(m, i); - _upb_MessageDef_Resolve(ctx, n); - } +static bool between(int32_t x, int32_t low, int32_t high) { + return x >= low && x <= high; } -void _upb_MessageDef_InsertField(upb_DefBuilder* ctx, upb_MessageDef* m, - const upb_FieldDef* f) { - const int32_t field_number = upb_FieldDef_Number(f); - - if (field_number <= 0 || field_number > kUpb_MaxFieldNumber) { - _upb_DefBuilder_Errf(ctx, "invalid field number (%u)", field_number); - } - - const char* json_name = upb_FieldDef_JsonName(f); - const char* shortname = upb_FieldDef_Name(f); - const size_t shortnamelen = strlen(shortname); +bool upb_FieldDef_checklabel(int32_t label) { return between(label, 1, 3); } +bool upb_FieldDef_checktype(int32_t type) { return between(type, 1, 11); } +bool upb_FieldDef_checkintfmt(int32_t fmt) { return between(fmt, 1, 3); } - upb_value v = upb_value_constptr(f); +bool upb_FieldDef_checkdescriptortype(int32_t type) { + return between(type, 1, 18); +} - upb_value existing_v; - if (upb_strtable_lookup(&m->ntof, shortname, &existing_v)) { - _upb_DefBuilder_Errf(ctx, "duplicate field name (%s)", shortname); - } +static bool streql2(const char* a, size_t n, const char* b) { + return n == strlen(b) && memcmp(a, b, n) == 0; +} - const upb_value field_v = _upb_DefType_Pack(f, UPB_DEFTYPE_FIELD); - bool ok = - _upb_MessageDef_Insert(m, shortname, shortnamelen, field_v, ctx->arena); - if (!ok) _upb_DefBuilder_OomErr(ctx); +// Implement the transformation as described in the spec: +// 1. upper case all letters after an underscore. +// 2. remove all underscores. +static char* make_json_name(const char* name, size_t size, upb_Arena* a) { + char* out = upb_Arena_Malloc(a, size + 1); // +1 is to add a trailing '\0' + if (out == NULL) return NULL; - if (strcmp(shortname, json_name) != 0) { - if (upb_strtable_lookup(&m->ntof, json_name, &v)) { - _upb_DefBuilder_Errf(ctx, "duplicate json_name (%s)", json_name); + bool ucase_next = false; + char* des = out; + for (size_t i = 0; i < size; i++) { + if (name[i] == '_') { + ucase_next = true; + } else { + *des++ = ucase_next ? toupper(name[i]) : name[i]; + ucase_next = false; } - - const size_t json_size = strlen(json_name); - const upb_value json_v = _upb_DefType_Pack(f, UPB_DEFTYPE_FIELD_JSONNAME); - ok = _upb_MessageDef_Insert(m, json_name, json_size, json_v, ctx->arena); - if (!ok) _upb_DefBuilder_OomErr(ctx); - } - - if (upb_inttable_lookup(&m->itof, field_number, NULL)) { - _upb_DefBuilder_Errf(ctx, "duplicate field number (%u)", field_number); } + *des++ = '\0'; + return out; +} - ok = upb_inttable_insert(&m->itof, field_number, v, ctx->arena); - if (!ok) _upb_DefBuilder_OomErr(ctx); +static str_t* newstr(upb_DefBuilder* ctx, const char* data, size_t len) { + str_t* ret = _upb_DefBuilder_Alloc(ctx, sizeof(*ret) + len); + if (!ret) _upb_DefBuilder_OomErr(ctx); + ret->len = len; + if (len) memcpy(ret->str, data, len); + ret->str[len] = '\0'; + return ret; } -void _upb_MessageDef_CreateMiniTable(upb_DefBuilder* ctx, upb_MessageDef* m) { - if (ctx->layout == NULL) { - m->layout = _upb_MessageDef_MakeMiniTable(ctx, m); - } else { - UPB_ASSERT(ctx->msg_count < ctx->layout->msg_count); - m->layout = ctx->layout->msgs[ctx->msg_count++]; - UPB_ASSERT(m->field_count == m->layout->field_count); +static str_t* unescape(upb_DefBuilder* ctx, const upb_FieldDef* f, + const char* data, size_t len) { + // Size here is an upper bound; escape sequences could ultimately shrink it. + str_t* ret = _upb_DefBuilder_Alloc(ctx, sizeof(*ret) + len); + char* dst = &ret->str[0]; + const char* src = data; + const char* end = data + len; - // We don't need the result of this call, but it will assign layout_index - // for all the fields in O(n lg n) time. - _upb_FieldDefs_Sorted(m->fields, m->field_count, ctx->tmp_arena); + while (src < end) { + if (*src == '\\') { + src++; + *dst++ = _upb_DefBuilder_ParseEscape(ctx, f, &src, end); + } else { + *dst++ = *src++; + } } - for (int i = 0; i < m->nested_msg_count; i++) { - upb_MessageDef* nested = - (upb_MessageDef*)upb_MessageDef_NestedMessage(m, i); - _upb_MessageDef_CreateMiniTable(ctx, nested); - } + ret->len = dst - &ret->str[0]; + return ret; } -void _upb_MessageDef_LinkMiniTable(upb_DefBuilder* ctx, - const upb_MessageDef* m) { - for (int i = 0; i < upb_MessageDef_NestedExtensionCount(m); i++) { - const upb_FieldDef* ext = upb_MessageDef_NestedExtension(m, i); - _upb_FieldDef_BuildMiniTableExtension(ctx, ext); - } +static void parse_default(upb_DefBuilder* ctx, const char* str, size_t len, + upb_FieldDef* f) { + char* end; + char nullz[64]; + errno = 0; - for (int i = 0; i < m->nested_msg_count; i++) { - _upb_MessageDef_LinkMiniTable(ctx, upb_MessageDef_NestedMessage(m, i)); + switch (upb_FieldDef_CType(f)) { + case kUpb_CType_Int32: + case kUpb_CType_Int64: + case kUpb_CType_UInt32: + case kUpb_CType_UInt64: + case kUpb_CType_Double: + case kUpb_CType_Float: + // Standard C number parsing functions expect null-terminated strings. + if (len >= sizeof(nullz) - 1) { + _upb_DefBuilder_Errf(ctx, "Default too long: %.*s", (int)len, str); + } + memcpy(nullz, str, len); + nullz[len] = '\0'; + str = nullz; + break; + default: + break; } - if (ctx->layout) return; - - for (int i = 0; i < m->field_count; i++) { - const upb_FieldDef* f = upb_MessageDef_Field(m, i); - const upb_MessageDef* sub_m = upb_FieldDef_MessageSubDef(f); - const upb_EnumDef* sub_e = upb_FieldDef_EnumSubDef(f); - const int layout_index = _upb_FieldDef_LayoutIndex(f); - upb_MiniTable* mt = (upb_MiniTable*)upb_MessageDef_MiniTable(m); - - UPB_ASSERT(layout_index < m->field_count); - upb_MiniTableField* mt_f = - (upb_MiniTableField*)&m->layout->fields[layout_index]; - if (sub_m) { - if (!mt->subs) { - _upb_DefBuilder_Errf(ctx, "unexpected submsg for (%s)", m->full_name); + switch (upb_FieldDef_CType(f)) { + case kUpb_CType_Int32: { + long val = strtol(str, &end, 0); + if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || *end) { + goto invalid; } - UPB_ASSERT(mt_f); - UPB_ASSERT(sub_m->layout); - if (UPB_UNLIKELY(!upb_MiniTable_SetSubMessage(mt, mt_f, sub_m->layout))) { - _upb_DefBuilder_Errf(ctx, "invalid submsg for (%s)", m->full_name); + f->defaultval.sint = val; + break; + } + case kUpb_CType_Enum: { + const upb_EnumDef* e = f->sub.enumdef; + const upb_EnumValueDef* ev = + upb_EnumDef_FindValueByNameWithSize(e, str, len); + if (!ev) { + goto invalid; } - } else if (_upb_FieldDef_IsClosedEnum(f)) { - const upb_MiniTableEnum* mt_e = _upb_EnumDef_MiniTable(sub_e); - if (UPB_UNLIKELY(!upb_MiniTable_SetSubEnum(mt, mt_f, mt_e))) { - _upb_DefBuilder_Errf(ctx, "invalid subenum for (%s)", m->full_name); + f->defaultval.sint = upb_EnumValueDef_Number(ev); + break; + } + case kUpb_CType_Int64: { + long long val = strtoll(str, &end, 0); + if (val > INT64_MAX || val < INT64_MIN || errno == ERANGE || *end) { + goto invalid; } + f->defaultval.sint = val; + break; } - } - -#ifndef NDEBUG - for (int i = 0; i < m->field_count; i++) { - const upb_FieldDef* f = upb_MessageDef_Field(m, i); - const int layout_index = _upb_FieldDef_LayoutIndex(f); - UPB_ASSERT(layout_index < m->layout->field_count); - const upb_MiniTableField* mt_f = &m->layout->fields[layout_index]; - UPB_ASSERT(upb_FieldDef_Type(f) == upb_MiniTableField_Type(mt_f)); - UPB_ASSERT(upb_FieldDef_CType(f) == upb_MiniTableField_CType(mt_f)); - UPB_ASSERT(upb_FieldDef_HasPresence(f) == - upb_MiniTableField_HasPresence(mt_f)); - } -#endif -} - -static uint64_t _upb_MessageDef_Modifiers(const upb_MessageDef* m) { - uint64_t out = 0; - if (upb_FileDef_Syntax(m->file) == kUpb_Syntax_Proto3) { - out |= kUpb_MessageModifier_ValidateUtf8; - out |= kUpb_MessageModifier_DefaultIsPacked; - } - if (m->ext_range_count) { - out |= kUpb_MessageModifier_IsExtendable; - } - return out; -} - -static bool _upb_MessageDef_EncodeMap(upb_DescState* s, const upb_MessageDef* m, - upb_Arena* a) { - if (m->field_count != 2) return false; - - const upb_FieldDef* key_field = upb_MessageDef_Field(m, 0); - const upb_FieldDef* val_field = upb_MessageDef_Field(m, 1); - if (key_field == NULL || val_field == NULL) return false; + case kUpb_CType_UInt32: { + unsigned long val = strtoul(str, &end, 0); + if (val > UINT32_MAX || errno == ERANGE || *end) { + goto invalid; + } + f->defaultval.uint = val; + break; + } + case kUpb_CType_UInt64: { + unsigned long long val = strtoull(str, &end, 0); + if (val > UINT64_MAX || errno == ERANGE || *end) { + goto invalid; + } + f->defaultval.uint = val; + break; + } + case kUpb_CType_Double: { + double val = strtod(str, &end); + if (errno == ERANGE || *end) { + goto invalid; + } + f->defaultval.dbl = val; + break; + } + case kUpb_CType_Float: { + float val = strtof(str, &end); + if (errno == ERANGE || *end) { + goto invalid; + } + f->defaultval.flt = val; + break; + } + case kUpb_CType_Bool: { + if (streql2(str, len, "false")) { + f->defaultval.boolean = false; + } else if (streql2(str, len, "true")) { + f->defaultval.boolean = true; + } else { + goto invalid; + } + break; + } + case kUpb_CType_String: + f->defaultval.str = newstr(ctx, str, len); + break; + case kUpb_CType_Bytes: + f->defaultval.str = unescape(ctx, f, str, len); + break; + case kUpb_CType_Message: + /* Should not have a default value. */ + _upb_DefBuilder_Errf(ctx, "Message should not have a default (%s)", + upb_FieldDef_FullName(f)); + } - UPB_ASSERT(_upb_FieldDef_LayoutIndex(key_field) == 0); - UPB_ASSERT(_upb_FieldDef_LayoutIndex(val_field) == 1); + return; - s->ptr = upb_MtDataEncoder_EncodeMap( - &s->e, s->ptr, upb_FieldDef_Type(key_field), upb_FieldDef_Type(val_field), - _upb_FieldDef_Modifiers(key_field), _upb_FieldDef_Modifiers(val_field)); - return true; +invalid: + _upb_DefBuilder_Errf(ctx, "Invalid default '%.*s' for field %s of type %d", + (int)len, str, upb_FieldDef_FullName(f), + (int)upb_FieldDef_Type(f)); } -static bool _upb_MessageDef_EncodeMessage(upb_DescState* s, - const upb_MessageDef* m, - upb_Arena* a) { - const upb_FieldDef** sorted = NULL; - if (!m->is_sorted) { - sorted = _upb_FieldDefs_Sorted(m->fields, m->field_count, a); - if (!sorted) return false; +static void set_default_default(upb_DefBuilder* ctx, upb_FieldDef* f) { + switch (upb_FieldDef_CType(f)) { + case kUpb_CType_Int32: + case kUpb_CType_Int64: + f->defaultval.sint = 0; + break; + case kUpb_CType_UInt64: + case kUpb_CType_UInt32: + f->defaultval.uint = 0; + break; + case kUpb_CType_Double: + case kUpb_CType_Float: + f->defaultval.dbl = 0; + break; + case kUpb_CType_String: + case kUpb_CType_Bytes: + f->defaultval.str = newstr(ctx, NULL, 0); + break; + case kUpb_CType_Bool: + f->defaultval.boolean = false; + break; + case kUpb_CType_Enum: { + const upb_EnumValueDef* v = upb_EnumDef_Value(f->sub.enumdef, 0); + f->defaultval.sint = upb_EnumValueDef_Number(v); + break; + } + case kUpb_CType_Message: + break; } +} - s->ptr = upb_MtDataEncoder_StartMessage(&s->e, s->ptr, - _upb_MessageDef_Modifiers(m)); - - for (int i = 0; i < m->field_count; i++) { - const upb_FieldDef* f = sorted ? sorted[i] : upb_MessageDef_Field(m, i); - const upb_FieldType type = upb_FieldDef_Type(f); - const int number = upb_FieldDef_Number(f); - const uint64_t modifiers = _upb_FieldDef_Modifiers(f); +static void _upb_FieldDef_Create(upb_DefBuilder* ctx, const char* prefix, + const UPB_DESC(FieldDescriptorProto) * + field_proto, + upb_MessageDef* m, upb_FieldDef* f) { + // Must happen before _upb_DefBuilder_Add() + f->file = _upb_DefBuilder_File(ctx); - if (!_upb_DescState_Grow(s, a)) return false; - s->ptr = upb_MtDataEncoder_PutField(&s->e, s->ptr, type, number, modifiers); + if (!UPB_DESC(FieldDescriptorProto_has_name)(field_proto)) { + _upb_DefBuilder_Errf(ctx, "field has no name"); } - for (int i = 0; i < m->real_oneof_count; i++) { - if (!_upb_DescState_Grow(s, a)) return false; - s->ptr = upb_MtDataEncoder_StartOneof(&s->e, s->ptr); + const upb_StringView name = UPB_DESC(FieldDescriptorProto_name)(field_proto); - const upb_OneofDef* o = upb_MessageDef_Oneof(m, i); - const int field_count = upb_OneofDef_FieldCount(o); - for (int j = 0; j < field_count; j++) { - const int number = upb_FieldDef_Number(upb_OneofDef_Field(o, j)); + f->full_name = _upb_DefBuilder_MakeFullName(ctx, prefix, name); + f->label_ = (int)UPB_DESC(FieldDescriptorProto_label)(field_proto); + f->number_ = UPB_DESC(FieldDescriptorProto_number)(field_proto); + f->is_proto3_optional = + UPB_DESC(FieldDescriptorProto_proto3_optional)(field_proto); + f->msgdef = m; + f->scope.oneof = NULL; - if (!_upb_DescState_Grow(s, a)) return false; - s->ptr = upb_MtDataEncoder_PutOneofField(&s->e, s->ptr, number); - } + f->has_json_name = UPB_DESC(FieldDescriptorProto_has_json_name)(field_proto); + if (f->has_json_name) { + const upb_StringView sv = + UPB_DESC(FieldDescriptorProto_json_name)(field_proto); + f->json_name = upb_strdup2(sv.data, sv.size, ctx->arena); + } else { + f->json_name = make_json_name(name.data, name.size, ctx->arena); } + if (!f->json_name) _upb_DefBuilder_OomErr(ctx); - return true; -} - -static bool _upb_MessageDef_EncodeMessageSet(upb_DescState* s, - const upb_MessageDef* m, - upb_Arena* a) { - s->ptr = upb_MtDataEncoder_EncodeMessageSet(&s->e, s->ptr); - - return true; -} + const bool has_type = UPB_DESC(FieldDescriptorProto_has_type)(field_proto); + const bool has_type_name = + UPB_DESC(FieldDescriptorProto_has_type_name)(field_proto); -bool upb_MessageDef_MiniDescriptorEncode(const upb_MessageDef* m, upb_Arena* a, - upb_StringView* out) { - upb_DescState s; - _upb_DescState_Init(&s); + f->type_ = (int)UPB_DESC(FieldDescriptorProto_type)(field_proto); - if (!_upb_DescState_Grow(&s, a)) return false; + if (has_type) { + switch (f->type_) { + case kUpb_FieldType_Message: + case kUpb_FieldType_Group: + case kUpb_FieldType_Enum: + if (!has_type_name) { + _upb_DefBuilder_Errf(ctx, "field of type %d requires type name (%s)", + (int)f->type_, f->full_name); + } + break; + default: + if (has_type_name) { + _upb_DefBuilder_Errf( + ctx, "invalid type for field with type_name set (%s, %d)", + f->full_name, (int)f->type_); + } + } + } - if (upb_MessageDef_IsMapEntry(m)) { - if (!_upb_MessageDef_EncodeMap(&s, m, a)) return false; - } else if (UPB_DESC(MessageOptions_message_set_wire_format)(m->opts)) { - if (!_upb_MessageDef_EncodeMessageSet(&s, m, a)) return false; + if (!has_type && has_type_name) { + f->type_ = + UPB_FIELD_TYPE_UNSPECIFIED; // We'll assign this in resolve_subdef() } else { - if (!_upb_MessageDef_EncodeMessage(&s, m, a)) return false; + if (f->type_ < kUpb_FieldType_Double || f->type_ > kUpb_FieldType_SInt64) { + _upb_DefBuilder_Errf(ctx, "invalid type for field %s (%d)", f->full_name, + f->type_); + } } - if (!_upb_DescState_Grow(&s, a)) return false; - *s.ptr = '\0'; + if (f->label_ < kUpb_Label_Optional || f->label_ > kUpb_Label_Repeated) { + _upb_DefBuilder_Errf(ctx, "invalid label for field %s (%d)", f->full_name, + f->label_); + } - out->data = s.buf; - out->size = s.ptr - s.buf; - return true; -} + /* We can't resolve the subdef or (in the case of extensions) the containing + * message yet, because it may not have been defined yet. We stash a pointer + * to the field_proto until later when we can properly resolve it. */ + f->sub.unresolved = field_proto; -static upb_StringView* _upb_ReservedNames_New(upb_DefBuilder* ctx, int n, - const upb_StringView* protos) { - upb_StringView* sv = _upb_DefBuilder_Alloc(ctx, sizeof(upb_StringView) * n); - for (size_t i = 0; i < n; i++) { - sv[i].data = - upb_strdup2(protos[i].data, protos[i].size, _upb_DefBuilder_Arena(ctx)); - sv[i].size = protos[i].size; + if (f->label_ == kUpb_Label_Required && + upb_FileDef_Syntax(f->file) == kUpb_Syntax_Proto3) { + _upb_DefBuilder_Errf(ctx, "proto3 fields cannot be required (%s)", + f->full_name); } - return sv; -} -static void create_msgdef(upb_DefBuilder* ctx, const char* prefix, - const UPB_DESC(DescriptorProto) * msg_proto, - const upb_MessageDef* containing_type, - upb_MessageDef* m) { - const UPB_DESC(OneofDescriptorProto)* const* oneofs; - const UPB_DESC(FieldDescriptorProto)* const* fields; - const UPB_DESC(DescriptorProto_ExtensionRange)* const* ext_ranges; - const UPB_DESC(DescriptorProto_ReservedRange)* const* res_ranges; - const upb_StringView* res_names; - size_t n_oneof, n_field, n_enum, n_ext, n_msg; - size_t n_ext_range, n_res_range, n_res_name; - upb_StringView name; + if (UPB_DESC(FieldDescriptorProto_has_oneof_index)(field_proto)) { + int oneof_index = UPB_DESC(FieldDescriptorProto_oneof_index)(field_proto); - // Must happen before _upb_DefBuilder_Add() - m->file = _upb_DefBuilder_File(ctx); + if (upb_FieldDef_Label(f) != kUpb_Label_Optional) { + _upb_DefBuilder_Errf(ctx, "fields in oneof must have OPTIONAL label (%s)", + f->full_name); + } - m->containing_type = containing_type; - m->is_sorted = true; + if (!m) { + _upb_DefBuilder_Errf(ctx, "oneof field (%s) has no containing msg", + f->full_name); + } - name = UPB_DESC(DescriptorProto_name)(msg_proto); + if (oneof_index >= upb_MessageDef_OneofCount(m)) { + _upb_DefBuilder_Errf(ctx, "oneof_index out of range (%s)", f->full_name); + } - m->full_name = _upb_DefBuilder_MakeFullName(ctx, prefix, name); - _upb_DefBuilder_Add(ctx, m->full_name, _upb_DefType_Pack(m, UPB_DEFTYPE_MSG)); - - oneofs = UPB_DESC(DescriptorProto_oneof_decl)(msg_proto, &n_oneof); - fields = UPB_DESC(DescriptorProto_field)(msg_proto, &n_field); - ext_ranges = - UPB_DESC(DescriptorProto_extension_range)(msg_proto, &n_ext_range); - res_ranges = - UPB_DESC(DescriptorProto_reserved_range)(msg_proto, &n_res_range); - res_names = UPB_DESC(DescriptorProto_reserved_name)(msg_proto, &n_res_name); - - bool ok = upb_inttable_init(&m->itof, ctx->arena); - if (!ok) _upb_DefBuilder_OomErr(ctx); - - ok = upb_strtable_init(&m->ntof, n_oneof + n_field, ctx->arena); - if (!ok) _upb_DefBuilder_OomErr(ctx); - - UPB_DEF_SET_OPTIONS(m->opts, DescriptorProto, MessageOptions, msg_proto); + upb_OneofDef* oneof = (upb_OneofDef*)upb_MessageDef_Oneof(m, oneof_index); + f->scope.oneof = oneof; - m->oneof_count = n_oneof; - m->oneofs = _upb_OneofDefs_New(ctx, n_oneof, oneofs, m); + _upb_OneofDef_Insert(ctx, oneof, f, name.data, name.size); + } - m->field_count = n_field; - m->fields = - _upb_FieldDefs_New(ctx, n_field, fields, m->full_name, m, &m->is_sorted); + UPB_DEF_SET_OPTIONS(f->opts, FieldDescriptorProto, FieldOptions, field_proto); - // Message Sets may not contain fields. - if (UPB_UNLIKELY(UPB_DESC(MessageOptions_message_set_wire_format)(m->opts))) { - if (UPB_UNLIKELY(n_field > 0)) { - _upb_DefBuilder_Errf(ctx, "invalid message set (%s)", m->full_name); - } + if (UPB_DESC(FieldOptions_has_packed)(f->opts)) { + f->is_packed = UPB_DESC(FieldOptions_packed)(f->opts); + } else { + // Repeated fields default to packed for proto3 only. + f->is_packed = has_type && upb_FieldDef_IsPrimitive(f) && + f->label_ == kUpb_Label_Repeated && + upb_FileDef_Syntax(f->file) == kUpb_Syntax_Proto3; } - m->ext_range_count = n_ext_range; - m->ext_ranges = _upb_ExtensionRanges_New(ctx, n_ext_range, ext_ranges, m); + f->has_presence = + (!upb_FieldDef_IsRepeated(f)) && + (f->type_ == kUpb_FieldType_Message || f->type_ == kUpb_FieldType_Group || + upb_FieldDef_ContainingOneof(f) || + (upb_FileDef_Syntax(f->file) == kUpb_Syntax_Proto2)); +} - m->res_range_count = n_res_range; - m->res_ranges = - _upb_MessageReservedRanges_New(ctx, n_res_range, res_ranges, m); +static void _upb_FieldDef_CreateExt(upb_DefBuilder* ctx, const char* prefix, + const UPB_DESC(FieldDescriptorProto) * + field_proto, + upb_MessageDef* m, upb_FieldDef* f) { + f->is_extension = true; + _upb_FieldDef_Create(ctx, prefix, field_proto, m, f); - m->res_name_count = n_res_name; - m->res_names = _upb_ReservedNames_New(ctx, n_res_name, res_names); + if (UPB_DESC(FieldDescriptorProto_has_oneof_index)(field_proto)) { + _upb_DefBuilder_Errf(ctx, "oneof_index provided for extension field (%s)", + f->full_name); + } - const size_t synthetic_count = _upb_OneofDefs_Finalize(ctx, m); - m->real_oneof_count = m->oneof_count - synthetic_count; + f->scope.extension_scope = m; + _upb_DefBuilder_Add(ctx, f->full_name, _upb_DefType_Pack(f, UPB_DEFTYPE_EXT)); + f->layout_index = ctx->ext_count++; - assign_msg_wellknowntype(m); - upb_inttable_compact(&m->itof, ctx->arena); + if (ctx->layout) { + UPB_ASSERT(_upb_FieldDef_ExtensionMiniTable(f)->field.number == f->number_); + } +} - const UPB_DESC(EnumDescriptorProto)* const* enums = - UPB_DESC(DescriptorProto_enum_type)(msg_proto, &n_enum); - m->nested_enum_count = n_enum; - m->nested_enums = _upb_EnumDefs_New(ctx, n_enum, enums, m); +static void _upb_FieldDef_CreateNotExt(upb_DefBuilder* ctx, const char* prefix, + const UPB_DESC(FieldDescriptorProto) * + field_proto, + upb_MessageDef* m, upb_FieldDef* f) { + f->is_extension = false; + _upb_FieldDef_Create(ctx, prefix, field_proto, m, f); - const UPB_DESC(FieldDescriptorProto)* const* exts = - UPB_DESC(DescriptorProto_extension)(msg_proto, &n_ext); - m->nested_ext_count = n_ext; - m->nested_exts = _upb_Extensions_New(ctx, n_ext, exts, m->full_name, m); + if (!UPB_DESC(FieldDescriptorProto_has_oneof_index)(field_proto)) { + if (f->is_proto3_optional) { + _upb_DefBuilder_Errf( + ctx, + "non-extension field (%s) with proto3_optional was not in a oneof", + f->full_name); + } + } - const UPB_DESC(DescriptorProto)* const* msgs = - UPB_DESC(DescriptorProto_nested_type)(msg_proto, &n_msg); - m->nested_msg_count = n_msg; - m->nested_msgs = _upb_MessageDefs_New(ctx, n_msg, msgs, m); + _upb_MessageDef_InsertField(ctx, m, f); } -// Allocate and initialize an array of |n| message defs. -upb_MessageDef* _upb_MessageDefs_New( - upb_DefBuilder* ctx, int n, const UPB_DESC(DescriptorProto) * const* protos, - const upb_MessageDef* containing_type) { - _upb_DefType_CheckPadding(sizeof(upb_MessageDef)); - - const char* name = containing_type ? containing_type->full_name - : _upb_FileDef_RawPackage(ctx->file); +upb_FieldDef* _upb_Extensions_New( + upb_DefBuilder* ctx, int n, + const UPB_DESC(FieldDescriptorProto) * const* protos, const char* prefix, + upb_MessageDef* m) { + _upb_DefType_CheckPadding(sizeof(upb_FieldDef)); + upb_FieldDef* defs = + (upb_FieldDef*)_upb_DefBuilder_Alloc(ctx, sizeof(upb_FieldDef) * n); - upb_MessageDef* m = _upb_DefBuilder_Alloc(ctx, sizeof(upb_MessageDef) * n); for (int i = 0; i < n; i++) { - create_msgdef(ctx, name, protos[i], containing_type, &m[i]); + upb_FieldDef* f = &defs[i]; + + _upb_FieldDef_CreateExt(ctx, prefix, protos[i], m, f); + f->index_ = i; } - return m; + + return defs; } +upb_FieldDef* _upb_FieldDefs_New( + upb_DefBuilder* ctx, int n, + const UPB_DESC(FieldDescriptorProto) * const* protos, const char* prefix, + upb_MessageDef* m, bool* is_sorted) { + _upb_DefType_CheckPadding(sizeof(upb_FieldDef)); + upb_FieldDef* defs = + (upb_FieldDef*)_upb_DefBuilder_Alloc(ctx, sizeof(upb_FieldDef) * n); + + uint32_t previous = 0; + for (int i = 0; i < n; i++) { + upb_FieldDef* f = &defs[i]; -// Must be last. + _upb_FieldDef_CreateNotExt(ctx, prefix, protos[i], m, f); + f->index_ = i; + if (!ctx->layout) { + // Speculate that the def fields are sorted. We will always sort the + // MiniTable fields, so if defs are sorted then indices will match. + // + // If this is incorrect, we will overwrite later. + f->layout_index = i; + } -struct upb_MessageReservedRange { - int32_t start; - int32_t end; -}; + const uint32_t current = f->number_; + if (previous > current) *is_sorted = false; + previous = current; + } -upb_MessageReservedRange* _upb_MessageReservedRange_At( - const upb_MessageReservedRange* r, int i) { - return (upb_MessageReservedRange*)&r[i]; + return defs; } -int32_t upb_MessageReservedRange_Start(const upb_MessageReservedRange* r) { - return r->start; +static void resolve_subdef(upb_DefBuilder* ctx, const char* prefix, + upb_FieldDef* f) { + const UPB_DESC(FieldDescriptorProto)* field_proto = f->sub.unresolved; + upb_StringView name = UPB_DESC(FieldDescriptorProto_type_name)(field_proto); + bool has_name = UPB_DESC(FieldDescriptorProto_has_type_name)(field_proto); + switch ((int)f->type_) { + case UPB_FIELD_TYPE_UNSPECIFIED: { + // Type was not specified and must be inferred. + UPB_ASSERT(has_name); + upb_deftype_t type; + const void* def = + _upb_DefBuilder_ResolveAny(ctx, f->full_name, prefix, name, &type); + switch (type) { + case UPB_DEFTYPE_ENUM: + f->sub.enumdef = def; + f->type_ = kUpb_FieldType_Enum; + if (!UPB_DESC(FieldOptions_has_packed)(f->opts)) { + f->is_packed = f->label_ == kUpb_Label_Repeated && + upb_FileDef_Syntax(f->file) == kUpb_Syntax_Proto3; + } + break; + case UPB_DEFTYPE_MSG: + f->sub.msgdef = def; + f->type_ = kUpb_FieldType_Message; // It appears there is no way of + // this being a group. + f->has_presence = !upb_FieldDef_IsRepeated(f); + break; + default: + _upb_DefBuilder_Errf(ctx, "Couldn't resolve type name for field %s", + f->full_name); + } + break; + } + case kUpb_FieldType_Message: + case kUpb_FieldType_Group: + UPB_ASSERT(has_name); + f->sub.msgdef = _upb_DefBuilder_Resolve(ctx, f->full_name, prefix, name, + UPB_DEFTYPE_MSG); + break; + case kUpb_FieldType_Enum: + UPB_ASSERT(has_name); + f->sub.enumdef = _upb_DefBuilder_Resolve(ctx, f->full_name, prefix, name, + UPB_DEFTYPE_ENUM); + break; + default: + // No resolution necessary. + break; + } } -int32_t upb_MessageReservedRange_End(const upb_MessageReservedRange* r) { - return r->end; + +static int _upb_FieldDef_Compare(const void* p1, const void* p2) { + const uint32_t v1 = (*(upb_FieldDef**)p1)->number_; + const uint32_t v2 = (*(upb_FieldDef**)p2)->number_; + return (v1 < v2) ? -1 : (v1 > v2); } -upb_MessageReservedRange* _upb_MessageReservedRanges_New( - upb_DefBuilder* ctx, int n, - const UPB_DESC(DescriptorProto_ReservedRange) * const* protos, - const upb_MessageDef* m) { - upb_MessageReservedRange* r = - _upb_DefBuilder_Alloc(ctx, sizeof(upb_MessageReservedRange) * n); +// _upb_FieldDefs_Sorted() is mostly a pure function of its inputs, but has one +// critical side effect that we depend on: it sets layout_index appropriately +// for non-sorted lists of fields. +const upb_FieldDef** _upb_FieldDefs_Sorted(const upb_FieldDef* f, int n, + upb_Arena* a) { + // TODO(salo): Replace this arena alloc with a persistent scratch buffer. + upb_FieldDef** out = (upb_FieldDef**)upb_Arena_Malloc(a, n * sizeof(void*)); + if (!out) return NULL; for (int i = 0; i < n; i++) { - const int32_t start = - UPB_DESC(DescriptorProto_ReservedRange_start)(protos[i]); - const int32_t end = UPB_DESC(DescriptorProto_ReservedRange_end)(protos[i]); - const int32_t max = kUpb_MaxFieldNumber + 1; - - // A full validation would also check that each range is disjoint, and that - // none of the fields overlap with the extension ranges, but we are just - // sanity checking here. - if (start < 1 || end <= start || end > max) { - _upb_DefBuilder_Errf(ctx, - "Reserved range (%d, %d) is invalid, message=%s\n", - (int)start, (int)end, upb_MessageDef_FullName(m)); - } - - r[i].start = start; - r[i].end = end; + out[i] = (upb_FieldDef*)&f[i]; } + qsort(out, n, sizeof(void*), _upb_FieldDef_Compare); - return r; + for (int i = 0; i < n; i++) { + out[i]->layout_index = i; + } + return (const upb_FieldDef**)out; } +bool upb_FieldDef_MiniDescriptorEncode(const upb_FieldDef* f, upb_Arena* a, + upb_StringView* out) { + UPB_ASSERT(f->is_extension); -// Must be last. + upb_DescState s; + _upb_DescState_Init(&s); -struct upb_MethodDef { - const UPB_DESC(MethodOptions) * opts; - upb_ServiceDef* service; - const char* full_name; - const upb_MessageDef* input_type; - const upb_MessageDef* output_type; - int index; - bool client_streaming; - bool server_streaming; -}; - -upb_MethodDef* _upb_MethodDef_At(const upb_MethodDef* m, int i) { - return (upb_MethodDef*)&m[i]; -} + const int number = upb_FieldDef_Number(f); + const uint64_t modifiers = _upb_FieldDef_Modifiers(f); -const upb_ServiceDef* upb_MethodDef_Service(const upb_MethodDef* m) { - return m->service; -} + if (!_upb_DescState_Grow(&s, a)) return false; + s.ptr = upb_MtDataEncoder_EncodeExtension(&s.e, s.ptr, f->type_, number, + modifiers); + *s.ptr = '\0'; -const UPB_DESC(MethodOptions) * upb_MethodDef_Options(const upb_MethodDef* m) { - return m->opts; + out->data = s.buf; + out->size = s.ptr - s.buf; + return true; } -bool upb_MethodDef_HasOptions(const upb_MethodDef* m) { - return m->opts != (void*)kUpbDefOptDefault; -} +static void resolve_extension(upb_DefBuilder* ctx, const char* prefix, + upb_FieldDef* f, + const UPB_DESC(FieldDescriptorProto) * + field_proto) { + if (!UPB_DESC(FieldDescriptorProto_has_extendee)(field_proto)) { + _upb_DefBuilder_Errf(ctx, "extension for field '%s' had no extendee", + f->full_name); + } -const char* upb_MethodDef_FullName(const upb_MethodDef* m) { - return m->full_name; -} + upb_StringView name = UPB_DESC(FieldDescriptorProto_extendee)(field_proto); + const upb_MessageDef* m = + _upb_DefBuilder_Resolve(ctx, f->full_name, prefix, name, UPB_DEFTYPE_MSG); + f->msgdef = m; -const char* upb_MethodDef_Name(const upb_MethodDef* m) { - return _upb_DefBuilder_FullToShort(m->full_name); + if (!_upb_MessageDef_IsValidExtensionNumber(m, f->number_)) { + _upb_DefBuilder_Errf( + ctx, + "field number %u in extension %s has no extension range in message %s", + (unsigned)f->number_, f->full_name, upb_MessageDef_FullName(m)); + } } -int upb_MethodDef_Index(const upb_MethodDef* m) { return m->index; } +void _upb_FieldDef_BuildMiniTableExtension(upb_DefBuilder* ctx, + const upb_FieldDef* f) { + const upb_MiniTableExtension* ext = _upb_FieldDef_ExtensionMiniTable(f); -const upb_MessageDef* upb_MethodDef_InputType(const upb_MethodDef* m) { - return m->input_type; -} + if (ctx->layout) { + UPB_ASSERT(upb_FieldDef_Number(f) == ext->field.number); + } else { + upb_StringView desc; + if (!upb_FieldDef_MiniDescriptorEncode(f, ctx->tmp_arena, &desc)) { + _upb_DefBuilder_OomErr(ctx); + } -const upb_MessageDef* upb_MethodDef_OutputType(const upb_MethodDef* m) { - return m->output_type; -} + upb_MiniTableExtension* mut_ext = (upb_MiniTableExtension*)ext; + upb_MiniTableSub sub = {NULL}; + if (upb_FieldDef_IsSubMessage(f)) { + sub.submsg = upb_MessageDef_MiniTable(f->sub.msgdef); + } else if (_upb_FieldDef_IsClosedEnum(f)) { + sub.subenum = _upb_EnumDef_MiniTable(f->sub.enumdef); + } + bool ok2 = upb_MiniTableExtension_Init(desc.data, desc.size, mut_ext, + upb_MessageDef_MiniTable(f->msgdef), + sub, ctx->status); + if (!ok2) _upb_DefBuilder_Errf(ctx, "Could not build extension mini table"); + } -bool upb_MethodDef_ClientStreaming(const upb_MethodDef* m) { - return m->client_streaming; + bool ok = _upb_DefPool_InsertExt(ctx->symtab, ext, f); + if (!ok) _upb_DefBuilder_OomErr(ctx); } -bool upb_MethodDef_ServerStreaming(const upb_MethodDef* m) { - return m->server_streaming; -} +static void resolve_default(upb_DefBuilder* ctx, upb_FieldDef* f, + const UPB_DESC(FieldDescriptorProto) * + field_proto) { + // Have to delay resolving of the default value until now because of the enum + // case, since enum defaults are specified with a label. + if (UPB_DESC(FieldDescriptorProto_has_default_value)(field_proto)) { + upb_StringView defaultval = + UPB_DESC(FieldDescriptorProto_default_value)(field_proto); -static void create_method(upb_DefBuilder* ctx, - const UPB_DESC(MethodDescriptorProto) * method_proto, - upb_ServiceDef* s, upb_MethodDef* m) { - upb_StringView name = UPB_DESC(MethodDescriptorProto_name)(method_proto); + if (upb_FileDef_Syntax(f->file) == kUpb_Syntax_Proto3) { + _upb_DefBuilder_Errf(ctx, + "proto3 fields cannot have explicit defaults (%s)", + f->full_name); + } - m->service = s; - m->full_name = - _upb_DefBuilder_MakeFullName(ctx, upb_ServiceDef_FullName(s), name); - m->client_streaming = - UPB_DESC(MethodDescriptorProto_client_streaming)(method_proto); - m->server_streaming = - UPB_DESC(MethodDescriptorProto_server_streaming)(method_proto); - m->input_type = _upb_DefBuilder_Resolve( - ctx, m->full_name, m->full_name, - UPB_DESC(MethodDescriptorProto_input_type)(method_proto), - UPB_DEFTYPE_MSG); - m->output_type = _upb_DefBuilder_Resolve( - ctx, m->full_name, m->full_name, - UPB_DESC(MethodDescriptorProto_output_type)(method_proto), - UPB_DEFTYPE_MSG); + if (upb_FieldDef_IsSubMessage(f)) { + _upb_DefBuilder_Errf(ctx, + "message fields cannot have explicit defaults (%s)", + f->full_name); + } - UPB_DEF_SET_OPTIONS(m->opts, MethodDescriptorProto, MethodOptions, - method_proto); + parse_default(ctx, defaultval.data, defaultval.size, f); + f->has_default = true; + } else { + set_default_default(ctx, f); + f->has_default = false; + } } -// Allocate and initialize an array of |n| method defs belonging to |s|. -upb_MethodDef* _upb_MethodDefs_New( - upb_DefBuilder* ctx, int n, - const UPB_DESC(MethodDescriptorProto) * const* protos, upb_ServiceDef* s) { - upb_MethodDef* m = _upb_DefBuilder_Alloc(ctx, sizeof(upb_MethodDef) * n); - for (int i = 0; i < n; i++) { - create_method(ctx, protos[i], s, &m[i]); - m[i].index = i; +void _upb_FieldDef_Resolve(upb_DefBuilder* ctx, const char* prefix, + upb_FieldDef* f) { + // We have to stash this away since resolve_subdef() may overwrite it. + const UPB_DESC(FieldDescriptorProto)* field_proto = f->sub.unresolved; + + resolve_subdef(ctx, prefix, f); + resolve_default(ctx, f, field_proto); + + if (f->is_extension) { + resolve_extension(ctx, prefix, f, field_proto); } - return m; } -#include -#include -#include // Must be last. -struct upb_OneofDef { - const UPB_DESC(OneofOptions) * opts; - const upb_MessageDef* parent; - const char* full_name; - int field_count; - bool synthetic; - const upb_FieldDef** fields; - upb_strtable ntof; // lookup a field by name - upb_inttable itof; // lookup a field by number (index) -#if UINTPTR_MAX == 0xffffffff - uint32_t padding; // Increase size to a multiple of 8. -#endif +struct upb_FileDef { + const UPB_DESC(FileOptions) * opts; + const char* name; + const char* package; + const char* edition; + + const upb_FileDef** deps; + const int32_t* public_deps; + const int32_t* weak_deps; + const upb_MessageDef* top_lvl_msgs; + const upb_EnumDef* top_lvl_enums; + const upb_FieldDef* top_lvl_exts; + const upb_ServiceDef* services; + const upb_MiniTableExtension** ext_layouts; + const upb_DefPool* symtab; + + int dep_count; + int public_dep_count; + int weak_dep_count; + int top_lvl_msg_count; + int top_lvl_enum_count; + int top_lvl_ext_count; + int service_count; + int ext_count; // All exts in the file. + upb_Syntax syntax; }; -upb_OneofDef* _upb_OneofDef_At(const upb_OneofDef* o, int i) { - return (upb_OneofDef*)&o[i]; +const UPB_DESC(FileOptions) * upb_FileDef_Options(const upb_FileDef* f) { + return f->opts; } -const UPB_DESC(OneofOptions) * upb_OneofDef_Options(const upb_OneofDef* o) { - return o->opts; +bool upb_FileDef_HasOptions(const upb_FileDef* f) { + return f->opts != (void*)kUpbDefOptDefault; } -bool upb_OneofDef_HasOptions(const upb_OneofDef* o) { - return o->opts != (void*)kUpbDefOptDefault; -} +const char* upb_FileDef_Name(const upb_FileDef* f) { return f->name; } -const char* upb_OneofDef_FullName(const upb_OneofDef* o) { - return o->full_name; +const char* upb_FileDef_Package(const upb_FileDef* f) { + return f->package ? f->package : ""; } -const char* upb_OneofDef_Name(const upb_OneofDef* o) { - return _upb_DefBuilder_FullToShort(o->full_name); +const char* upb_FileDef_Edition(const upb_FileDef* f) { + return f->edition ? f->edition : ""; } -const upb_MessageDef* upb_OneofDef_ContainingType(const upb_OneofDef* o) { - return o->parent; -} +const char* _upb_FileDef_RawPackage(const upb_FileDef* f) { return f->package; } -int upb_OneofDef_FieldCount(const upb_OneofDef* o) { return o->field_count; } +upb_Syntax upb_FileDef_Syntax(const upb_FileDef* f) { return f->syntax; } -const upb_FieldDef* upb_OneofDef_Field(const upb_OneofDef* o, int i) { - UPB_ASSERT(i < o->field_count); - return o->fields[i]; +int upb_FileDef_TopLevelMessageCount(const upb_FileDef* f) { + return f->top_lvl_msg_count; } -int upb_OneofDef_numfields(const upb_OneofDef* o) { return o->field_count; } +int upb_FileDef_DependencyCount(const upb_FileDef* f) { return f->dep_count; } -uint32_t upb_OneofDef_Index(const upb_OneofDef* o) { - // Compute index in our parent's array. - return o - upb_MessageDef_Oneof(o->parent, 0); +int upb_FileDef_PublicDependencyCount(const upb_FileDef* f) { + return f->public_dep_count; } -bool upb_OneofDef_IsSynthetic(const upb_OneofDef* o) { return o->synthetic; } - -const upb_FieldDef* upb_OneofDef_LookupNameWithSize(const upb_OneofDef* o, - const char* name, - size_t size) { - upb_value val; - return upb_strtable_lookup2(&o->ntof, name, size, &val) - ? upb_value_getptr(val) - : NULL; +int upb_FileDef_WeakDependencyCount(const upb_FileDef* f) { + return f->weak_dep_count; } -const upb_FieldDef* upb_OneofDef_LookupName(const upb_OneofDef* o, - const char* name) { - return upb_OneofDef_LookupNameWithSize(o, name, strlen(name)); +const int32_t* _upb_FileDef_PublicDependencyIndexes(const upb_FileDef* f) { + return f->public_deps; } -const upb_FieldDef* upb_OneofDef_LookupNumber(const upb_OneofDef* o, - uint32_t num) { - upb_value val; - return upb_inttable_lookup(&o->itof, num, &val) ? upb_value_getptr(val) - : NULL; +const int32_t* _upb_FileDef_WeakDependencyIndexes(const upb_FileDef* f) { + return f->weak_deps; } -void _upb_OneofDef_Insert(upb_DefBuilder* ctx, upb_OneofDef* o, - const upb_FieldDef* f, const char* name, - size_t size) { - o->field_count++; - if (_upb_FieldDef_IsProto3Optional(f)) o->synthetic = true; - - const int number = upb_FieldDef_Number(f); - const upb_value v = upb_value_constptr(f); - - // TODO(salo): This lookup is unfortunate because we also perform it when - // inserting into the message's table. Unfortunately that step occurs after - // this one and moving things around could be tricky so let's leave it for - // a future refactoring. - const bool number_exists = upb_inttable_lookup(&o->itof, number, NULL); - if (UPB_UNLIKELY(number_exists)) { - _upb_DefBuilder_Errf(ctx, "oneof fields have the same number (%d)", number); - } - - // TODO(salo): More redundant work happening here. - const bool name_exists = upb_strtable_lookup2(&o->ntof, name, size, NULL); - if (UPB_UNLIKELY(name_exists)) { - _upb_DefBuilder_Errf(ctx, "oneof fields have the same name (%.*s)", - (int)size, name); - } - - const bool ok = upb_inttable_insert(&o->itof, number, v, ctx->arena) && - upb_strtable_insert(&o->ntof, name, size, v, ctx->arena); - if (UPB_UNLIKELY(!ok)) { - _upb_DefBuilder_OomErr(ctx); - } +int upb_FileDef_TopLevelEnumCount(const upb_FileDef* f) { + return f->top_lvl_enum_count; } -// Returns the synthetic count. -size_t _upb_OneofDefs_Finalize(upb_DefBuilder* ctx, upb_MessageDef* m) { - int synthetic_count = 0; - - for (int i = 0; i < upb_MessageDef_OneofCount(m); i++) { - upb_OneofDef* o = (upb_OneofDef*)upb_MessageDef_Oneof(m, i); - - if (o->synthetic && o->field_count != 1) { - _upb_DefBuilder_Errf(ctx, - "Synthetic oneofs must have one field, not %d: %s", - o->field_count, upb_OneofDef_Name(o)); - } - - if (o->synthetic) { - synthetic_count++; - } else if (synthetic_count != 0) { - _upb_DefBuilder_Errf( - ctx, "Synthetic oneofs must be after all other oneofs: %s", - upb_OneofDef_Name(o)); - } - - o->fields = - _upb_DefBuilder_Alloc(ctx, sizeof(upb_FieldDef*) * o->field_count); - o->field_count = 0; - } - - for (int i = 0; i < upb_MessageDef_FieldCount(m); i++) { - const upb_FieldDef* f = upb_MessageDef_Field(m, i); - upb_OneofDef* o = (upb_OneofDef*)upb_FieldDef_ContainingOneof(f); - if (o) { - o->fields[o->field_count++] = f; - } - } - - return synthetic_count; +int upb_FileDef_TopLevelExtensionCount(const upb_FileDef* f) { + return f->top_lvl_ext_count; } -static void create_oneofdef(upb_DefBuilder* ctx, upb_MessageDef* m, - const UPB_DESC(OneofDescriptorProto) * oneof_proto, - const upb_OneofDef* _o) { - upb_OneofDef* o = (upb_OneofDef*)_o; - upb_StringView name = UPB_DESC(OneofDescriptorProto_name)(oneof_proto); - - o->parent = m; - o->full_name = - _upb_DefBuilder_MakeFullName(ctx, upb_MessageDef_FullName(m), name); - o->field_count = 0; - o->synthetic = false; - - UPB_DEF_SET_OPTIONS(o->opts, OneofDescriptorProto, OneofOptions, oneof_proto); - - if (upb_MessageDef_FindByNameWithSize(m, name.data, name.size, NULL, NULL)) { - _upb_DefBuilder_Errf(ctx, "duplicate oneof name (%s)", o->full_name); - } - - upb_value v = _upb_DefType_Pack(o, UPB_DEFTYPE_ONEOF); - bool ok = _upb_MessageDef_Insert(m, name.data, name.size, v, ctx->arena); - if (!ok) _upb_DefBuilder_OomErr(ctx); - - ok = upb_inttable_init(&o->itof, ctx->arena); - if (!ok) _upb_DefBuilder_OomErr(ctx); +int upb_FileDef_ServiceCount(const upb_FileDef* f) { return f->service_count; } - ok = upb_strtable_init(&o->ntof, 4, ctx->arena); - if (!ok) _upb_DefBuilder_OomErr(ctx); +const upb_FileDef* upb_FileDef_Dependency(const upb_FileDef* f, int i) { + UPB_ASSERT(0 <= i && i < f->dep_count); + return f->deps[i]; } -// Allocate and initialize an array of |n| oneof defs. -upb_OneofDef* _upb_OneofDefs_New( - upb_DefBuilder* ctx, int n, - const UPB_DESC(OneofDescriptorProto) * const* protos, upb_MessageDef* m) { - _upb_DefType_CheckPadding(sizeof(upb_OneofDef)); - - upb_OneofDef* o = _upb_DefBuilder_Alloc(ctx, sizeof(upb_OneofDef) * n); - for (int i = 0; i < n; i++) { - create_oneofdef(ctx, m, protos[i], &o[i]); - } - return o; +const upb_FileDef* upb_FileDef_PublicDependency(const upb_FileDef* f, int i) { + UPB_ASSERT(0 <= i && i < f->public_dep_count); + return f->deps[f->public_deps[i]]; } - -// Must be last. - -struct upb_ServiceDef { - const UPB_DESC(ServiceOptions) * opts; - const upb_FileDef* file; - const char* full_name; - upb_MethodDef* methods; - int method_count; - int index; -}; - -upb_ServiceDef* _upb_ServiceDef_At(const upb_ServiceDef* s, int index) { - return (upb_ServiceDef*)&s[index]; +const upb_FileDef* upb_FileDef_WeakDependency(const upb_FileDef* f, int i) { + UPB_ASSERT(0 <= i && i < f->public_dep_count); + return f->deps[f->weak_deps[i]]; } -const UPB_DESC(ServiceOptions) * - upb_ServiceDef_Options(const upb_ServiceDef* s) { - return s->opts; +const upb_MessageDef* upb_FileDef_TopLevelMessage(const upb_FileDef* f, int i) { + UPB_ASSERT(0 <= i && i < f->top_lvl_msg_count); + return _upb_MessageDef_At(f->top_lvl_msgs, i); } -bool upb_ServiceDef_HasOptions(const upb_ServiceDef* s) { - return s->opts != (void*)kUpbDefOptDefault; +const upb_EnumDef* upb_FileDef_TopLevelEnum(const upb_FileDef* f, int i) { + UPB_ASSERT(0 <= i && i < f->top_lvl_enum_count); + return _upb_EnumDef_At(f->top_lvl_enums, i); } -const char* upb_ServiceDef_FullName(const upb_ServiceDef* s) { - return s->full_name; +const upb_FieldDef* upb_FileDef_TopLevelExtension(const upb_FileDef* f, int i) { + UPB_ASSERT(0 <= i && i < f->top_lvl_ext_count); + return _upb_FieldDef_At(f->top_lvl_exts, i); } -const char* upb_ServiceDef_Name(const upb_ServiceDef* s) { - return _upb_DefBuilder_FullToShort(s->full_name); +const upb_ServiceDef* upb_FileDef_Service(const upb_FileDef* f, int i) { + UPB_ASSERT(0 <= i && i < f->service_count); + return _upb_ServiceDef_At(f->services, i); } -int upb_ServiceDef_Index(const upb_ServiceDef* s) { return s->index; } - -const upb_FileDef* upb_ServiceDef_File(const upb_ServiceDef* s) { - return s->file; -} +const upb_DefPool* upb_FileDef_Pool(const upb_FileDef* f) { return f->symtab; } -int upb_ServiceDef_MethodCount(const upb_ServiceDef* s) { - return s->method_count; +const upb_MiniTableExtension* _upb_FileDef_ExtensionMiniTable( + const upb_FileDef* f, int i) { + return f->ext_layouts[i]; } -const upb_MethodDef* upb_ServiceDef_Method(const upb_ServiceDef* s, int i) { - return (i < 0 || i >= s->method_count) ? NULL - : _upb_MethodDef_At(s->methods, i); +static char* strviewdup(upb_DefBuilder* ctx, upb_StringView view) { + char* ret = upb_strdup2(view.data, view.size, _upb_DefBuilder_Arena(ctx)); + if (!ret) _upb_DefBuilder_OomErr(ctx); + return ret; } -const upb_MethodDef* upb_ServiceDef_FindMethodByName(const upb_ServiceDef* s, - const char* name) { - for (int i = 0; i < s->method_count; i++) { - const upb_MethodDef* m = _upb_MethodDef_At(s->methods, i); - if (strcmp(name, upb_MethodDef_Name(m)) == 0) { - return m; - } - } - return NULL; +static bool streql_view(upb_StringView view, const char* b) { + return view.size == strlen(b) && memcmp(view.data, b, view.size) == 0; } -static void create_service(upb_DefBuilder* ctx, - const UPB_DESC(ServiceDescriptorProto) * svc_proto, - upb_ServiceDef* s) { - upb_StringView name; +static int count_exts_in_msg(const UPB_DESC(DescriptorProto) * msg_proto) { size_t n; + UPB_DESC(DescriptorProto_extension)(msg_proto, &n); + int ext_count = n; - // Must happen before _upb_DefBuilder_Add() - s->file = _upb_DefBuilder_File(ctx); + const UPB_DESC(DescriptorProto)* const* nested_msgs = + UPB_DESC(DescriptorProto_nested_type)(msg_proto, &n); + for (size_t i = 0; i < n; i++) { + ext_count += count_exts_in_msg(nested_msgs[i]); + } - name = UPB_DESC(ServiceDescriptorProto_name)(svc_proto); - const char* package = _upb_FileDef_RawPackage(s->file); - s->full_name = _upb_DefBuilder_MakeFullName(ctx, package, name); - _upb_DefBuilder_Add(ctx, s->full_name, - _upb_DefType_Pack(s, UPB_DEFTYPE_SERVICE)); + return ext_count; +} - const UPB_DESC(MethodDescriptorProto)* const* methods = - UPB_DESC(ServiceDescriptorProto_method)(svc_proto, &n); - s->method_count = n; - s->methods = _upb_MethodDefs_New(ctx, n, methods, s); +// Allocate and initialize one file def, and add it to the context object. +void _upb_FileDef_Create(upb_DefBuilder* ctx, + const UPB_DESC(FileDescriptorProto) * file_proto) { + upb_FileDef* file = _upb_DefBuilder_Alloc(ctx, sizeof(upb_FileDef)); + ctx->file = file; - UPB_DEF_SET_OPTIONS(s->opts, ServiceDescriptorProto, ServiceOptions, - svc_proto); -} + const UPB_DESC(DescriptorProto)* const* msgs; + const UPB_DESC(EnumDescriptorProto)* const* enums; + const UPB_DESC(FieldDescriptorProto)* const* exts; + const UPB_DESC(ServiceDescriptorProto)* const* services; + const upb_StringView* strs; + const int32_t* public_deps; + const int32_t* weak_deps; + size_t n; -upb_ServiceDef* _upb_ServiceDefs_New( - upb_DefBuilder* ctx, int n, - const UPB_DESC(ServiceDescriptorProto) * const* protos) { - _upb_DefType_CheckPadding(sizeof(upb_ServiceDef)); + file->symtab = ctx->symtab; - upb_ServiceDef* s = _upb_DefBuilder_Alloc(ctx, sizeof(upb_ServiceDef) * n); - for (int i = 0; i < n; i++) { - create_service(ctx, protos[i], &s[i]); - s[i].index = i; + // Count all extensions in the file, to build a flat array of layouts. + UPB_DESC(FileDescriptorProto_extension)(file_proto, &n); + int ext_count = n; + msgs = UPB_DESC(FileDescriptorProto_message_type)(file_proto, &n); + for (size_t i = 0; i < n; i++) { + ext_count += count_exts_in_msg(msgs[i]); } - return s; -} + file->ext_count = ext_count; + if (ctx->layout) { + // We are using the ext layouts that were passed in. + file->ext_layouts = ctx->layout->exts; + if (ctx->layout->ext_count != file->ext_count) { + _upb_DefBuilder_Errf(ctx, + "Extension count did not match layout (%d vs %d)", + ctx->layout->ext_count, file->ext_count); + } + } else { + // We are building ext layouts from scratch. + file->ext_layouts = _upb_DefBuilder_Alloc( + ctx, sizeof(*file->ext_layouts) * file->ext_count); + upb_MiniTableExtension* ext = + _upb_DefBuilder_Alloc(ctx, sizeof(*ext) * file->ext_count); + for (int i = 0; i < file->ext_count; i++) { + file->ext_layouts[i] = &ext[i]; + } + } -#include + upb_StringView name = UPB_DESC(FileDescriptorProto_name)(file_proto); + file->name = strviewdup(ctx, name); + if (strlen(file->name) != name.size) { + _upb_DefBuilder_Errf(ctx, "File name contained embedded NULL"); + } + upb_StringView package = UPB_DESC(FileDescriptorProto_package)(file_proto); -// Must be last. + if (package.size) { + _upb_DefBuilder_CheckIdentFull(ctx, package); + file->package = strviewdup(ctx, package); + } else { + file->package = NULL; + } -// A few fake field types for our tables. -enum { - kUpb_FakeFieldType_FieldNotFound = 0, - kUpb_FakeFieldType_MessageSetItem = 19, -}; + upb_StringView edition = UPB_DESC(FileDescriptorProto_edition)(file_proto); -// DecodeOp: an action to be performed for a wire-type/field-type combination. -enum { - // Special ops: we don't write data to regular fields for these. - kUpb_DecodeOp_UnknownField = -1, - kUpb_DecodeOp_MessageSetItem = -2, + if (edition.size == 0) { + file->edition = NULL; + } else { + // TODO(b/267770604): How should we validate this? + file->edition = strviewdup(ctx, edition); + if (strlen(file->edition) != edition.size) { + _upb_DefBuilder_Errf(ctx, "Edition name contained embedded NULL"); + } + } - // Scalar-only ops. - kUpb_DecodeOp_Scalar1Byte = 0, - kUpb_DecodeOp_Scalar4Byte = 2, - kUpb_DecodeOp_Scalar8Byte = 3, - kUpb_DecodeOp_Enum = 1, + if (UPB_DESC(FileDescriptorProto_has_syntax)(file_proto)) { + upb_StringView syntax = UPB_DESC(FileDescriptorProto_syntax)(file_proto); - // Scalar/repeated ops. - kUpb_DecodeOp_String = 4, - kUpb_DecodeOp_Bytes = 5, - kUpb_DecodeOp_SubMessage = 6, + if (streql_view(syntax, "proto2")) { + file->syntax = kUpb_Syntax_Proto2; + } else if (streql_view(syntax, "proto3")) { + file->syntax = kUpb_Syntax_Proto3; + } else { + _upb_DefBuilder_Errf(ctx, "Invalid syntax '" UPB_STRINGVIEW_FORMAT "'", + UPB_STRINGVIEW_ARGS(syntax)); + } + } else { + file->syntax = kUpb_Syntax_Proto2; + } - // Repeated-only ops (also see macros below). - kUpb_DecodeOp_PackedEnum = 13, -}; + // Read options. + UPB_DEF_SET_OPTIONS(file->opts, FileDescriptorProto, FileOptions, file_proto); -// For packed fields it is helpful to be able to recover the lg2 of the data -// size from the op. -#define OP_FIXPCK_LG2(n) (n + 5) /* n in [2, 3] => op in [7, 8] */ -#define OP_VARPCK_LG2(n) (n + 9) /* n in [0, 2, 3] => op in [9, 11, 12] */ + // Verify dependencies. + strs = UPB_DESC(FileDescriptorProto_dependency)(file_proto, &n); + file->dep_count = n; + file->deps = _upb_DefBuilder_Alloc(ctx, sizeof(*file->deps) * n); -typedef union { - bool bool_val; - uint32_t uint32_val; - uint64_t uint64_val; - uint32_t size; -} wireval; + for (size_t i = 0; i < n; i++) { + upb_StringView str = strs[i]; + file->deps[i] = + upb_DefPool_FindFileByNameWithSize(ctx->symtab, str.data, str.size); + if (!file->deps[i]) { + _upb_DefBuilder_Errf(ctx, + "Depends on file '" UPB_STRINGVIEW_FORMAT + "', but it has not been loaded", + UPB_STRINGVIEW_ARGS(str)); + } + } -static const char* _upb_Decoder_DecodeMessage(upb_Decoder* d, const char* ptr, - upb_Message* msg, - const upb_MiniTable* layout); + public_deps = UPB_DESC(FileDescriptorProto_public_dependency)(file_proto, &n); + file->public_dep_count = n; + file->public_deps = + _upb_DefBuilder_Alloc(ctx, sizeof(*file->public_deps) * n); + int32_t* mutable_public_deps = (int32_t*)file->public_deps; + for (size_t i = 0; i < n; i++) { + if (public_deps[i] >= file->dep_count) { + _upb_DefBuilder_Errf(ctx, "public_dep %d is out of range", + (int)public_deps[i]); + } + mutable_public_deps[i] = public_deps[i]; + } -UPB_NORETURN static void* _upb_Decoder_ErrorJmp(upb_Decoder* d, - upb_DecodeStatus status) { - assert(status != kUpb_DecodeStatus_Ok); - d->status = status; - UPB_LONGJMP(d->err, 1); -} + weak_deps = UPB_DESC(FileDescriptorProto_weak_dependency)(file_proto, &n); + file->weak_dep_count = n; + file->weak_deps = _upb_DefBuilder_Alloc(ctx, sizeof(*file->weak_deps) * n); + int32_t* mutable_weak_deps = (int32_t*)file->weak_deps; + for (size_t i = 0; i < n; i++) { + if (weak_deps[i] >= file->dep_count) { + _upb_DefBuilder_Errf(ctx, "weak_dep %d is out of range", + (int)weak_deps[i]); + } + mutable_weak_deps[i] = weak_deps[i]; + } -const char* _upb_FastDecoder_ErrorJmp(upb_Decoder* d, int status) { - assert(status != kUpb_DecodeStatus_Ok); - d->status = status; - UPB_LONGJMP(d->err, 1); - return NULL; -} + // Create enums. + enums = UPB_DESC(FileDescriptorProto_enum_type)(file_proto, &n); + file->top_lvl_enum_count = n; + file->top_lvl_enums = _upb_EnumDefs_New(ctx, n, enums, NULL); -static void _upb_Decoder_VerifyUtf8(upb_Decoder* d, const char* buf, int len) { - if (!_upb_Decoder_VerifyUtf8Inline(buf, len)) { - _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_BadUtf8); - } -} + // Create extensions. + exts = UPB_DESC(FileDescriptorProto_extension)(file_proto, &n); + file->top_lvl_ext_count = n; + file->top_lvl_exts = _upb_Extensions_New(ctx, n, exts, file->package, NULL); -static bool _upb_Decoder_Reserve(upb_Decoder* d, upb_Array* arr, size_t elem) { - bool need_realloc = arr->capacity - arr->size < elem; - if (need_realloc && !_upb_array_realloc(arr, arr->size + elem, &d->arena)) { - _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); + // Create messages. + msgs = UPB_DESC(FileDescriptorProto_message_type)(file_proto, &n); + file->top_lvl_msg_count = n; + file->top_lvl_msgs = _upb_MessageDefs_New(ctx, n, msgs, NULL); + + // Create services. + services = UPB_DESC(FileDescriptorProto_service)(file_proto, &n); + file->service_count = n; + file->services = _upb_ServiceDefs_New(ctx, n, services); + + // Now that all names are in the table, build layouts and resolve refs. + + for (int i = 0; i < file->top_lvl_msg_count; i++) { + upb_MessageDef* m = (upb_MessageDef*)upb_FileDef_TopLevelMessage(file, i); + _upb_MessageDef_Resolve(ctx, m); } - return need_realloc; -} -typedef struct { - const char* ptr; - uint64_t val; -} _upb_DecodeLongVarintReturn; + for (int i = 0; i < file->top_lvl_ext_count; i++) { + upb_FieldDef* f = (upb_FieldDef*)upb_FileDef_TopLevelExtension(file, i); + _upb_FieldDef_Resolve(ctx, file->package, f); + } -UPB_NOINLINE -static _upb_DecodeLongVarintReturn _upb_Decoder_DecodeLongVarint( - const char* ptr, uint64_t val) { - _upb_DecodeLongVarintReturn ret = {NULL, 0}; - uint64_t byte; - int i; - for (i = 1; i < 10; i++) { - byte = (uint8_t)ptr[i]; - val += (byte - 1) << (i * 7); - if (!(byte & 0x80)) { - ret.ptr = ptr + i + 1; - ret.val = val; - return ret; - } + for (int i = 0; i < file->top_lvl_msg_count; i++) { + upb_MessageDef* m = (upb_MessageDef*)upb_FileDef_TopLevelMessage(file, i); + _upb_MessageDef_CreateMiniTable(ctx, (upb_MessageDef*)m); } - return ret; -} -UPB_FORCEINLINE -static const char* _upb_Decoder_DecodeVarint(upb_Decoder* d, const char* ptr, - uint64_t* val) { - uint64_t byte = (uint8_t)*ptr; - if (UPB_LIKELY((byte & 0x80) == 0)) { - *val = byte; - return ptr + 1; - } else { - _upb_DecodeLongVarintReturn res = _upb_Decoder_DecodeLongVarint(ptr, byte); - if (!res.ptr) _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); - *val = res.val; - return res.ptr; + for (int i = 0; i < file->top_lvl_ext_count; i++) { + upb_FieldDef* f = (upb_FieldDef*)upb_FileDef_TopLevelExtension(file, i); + _upb_FieldDef_BuildMiniTableExtension(ctx, f); } -} -UPB_FORCEINLINE -static const char* _upb_Decoder_DecodeTag(upb_Decoder* d, const char* ptr, - uint32_t* val) { - uint64_t byte = (uint8_t)*ptr; - if (UPB_LIKELY((byte & 0x80) == 0)) { - *val = byte; - return ptr + 1; - } else { - const char* start = ptr; - _upb_DecodeLongVarintReturn res = _upb_Decoder_DecodeLongVarint(ptr, byte); - if (!res.ptr || res.ptr - start > 5 || res.val > UINT32_MAX) { - _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); - } - *val = res.val; - return res.ptr; + for (int i = 0; i < file->top_lvl_msg_count; i++) { + upb_MessageDef* m = (upb_MessageDef*)upb_FileDef_TopLevelMessage(file, i); + _upb_MessageDef_LinkMiniTable(ctx, m); } -} -UPB_FORCEINLINE -static const char* upb_Decoder_DecodeSize(upb_Decoder* d, const char* ptr, - uint32_t* size) { - uint64_t size64; - ptr = _upb_Decoder_DecodeVarint(d, ptr, &size64); - if (size64 >= INT32_MAX || - !upb_EpsCopyInputStream_CheckSize(&d->input, ptr, (int)size64)) { - _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); + if (file->ext_count) { + bool ok = upb_ExtensionRegistry_AddArray( + _upb_DefPool_ExtReg(ctx->symtab), file->ext_layouts, file->ext_count); + if (!ok) _upb_DefBuilder_OomErr(ctx); } - *size = size64; - return ptr; } -static void _upb_Decoder_MungeInt32(wireval* val) { - if (!_upb_IsLittleEndian()) { - /* The next stage will memcpy(dst, &val, 4) */ - val->uint32_val = val->uint64_val; - } + +#include + + +// Must be last. + +bool upb_Message_HasFieldByDef(const upb_Message* msg, const upb_FieldDef* f) { + UPB_ASSERT(upb_FieldDef_HasPresence(f)); + return upb_Message_HasField(msg, upb_FieldDef_MiniTable(f)); } -static void _upb_Decoder_Munge(int type, wireval* val) { - switch (type) { - case kUpb_FieldType_Bool: - val->bool_val = val->uint64_val != 0; - break; - case kUpb_FieldType_SInt32: { - uint32_t n = val->uint64_val; - val->uint32_val = (n >> 1) ^ -(int32_t)(n & 1); - break; - } - case kUpb_FieldType_SInt64: { - uint64_t n = val->uint64_val; - val->uint64_val = (n >> 1) ^ -(int64_t)(n & 1); - break; - } - case kUpb_FieldType_Int32: - case kUpb_FieldType_UInt32: - case kUpb_FieldType_Enum: - _upb_Decoder_MungeInt32(val); - break; +const upb_FieldDef* upb_Message_WhichOneof(const upb_Message* msg, + const upb_OneofDef* o) { + const upb_FieldDef* f = upb_OneofDef_Field(o, 0); + if (upb_OneofDef_IsSynthetic(o)) { + UPB_ASSERT(upb_OneofDef_FieldCount(o) == 1); + return upb_Message_HasFieldByDef(msg, f) ? f : NULL; + } else { + const upb_MiniTableField* field = upb_FieldDef_MiniTable(f); + uint32_t oneof_case = upb_Message_WhichOneofFieldNumber(msg, field); + f = oneof_case ? upb_OneofDef_LookupNumber(o, oneof_case) : NULL; + UPB_ASSERT((f != NULL) == (oneof_case != 0)); + return f; } } -static upb_Message* _upb_Decoder_NewSubMessage(upb_Decoder* d, - const upb_MiniTableSub* subs, - const upb_MiniTableField* field, - upb_TaggedMessagePtr* target) { - const upb_MiniTable* subl = subs[field->UPB_PRIVATE(submsg_index)].submsg; - UPB_ASSERT(subl); - upb_Message* msg = _upb_Message_New(subl, &d->arena); - if (!msg) _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); - - // Extensions should not be unlinked. A message extension should not be - // registered until its sub-message type is available to be linked. - bool is_empty = subl == &_kUpb_MiniTable_Empty; - bool is_extension = field->mode & kUpb_LabelFlags_IsExtension; - UPB_ASSERT(!(is_empty && is_extension)); +upb_MessageValue upb_Message_GetFieldByDef(const upb_Message* msg, + const upb_FieldDef* f) { + upb_MessageValue default_val = upb_FieldDef_Default(f); + upb_MessageValue ret; + _upb_Message_GetField(msg, upb_FieldDef_MiniTable(f), &default_val, &ret); + return ret; +} - if (is_empty && !(d->options & kUpb_DecodeOption_ExperimentalAllowUnlinked)) { - _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_UnlinkedSubMessage); +upb_MutableMessageValue upb_Message_Mutable(upb_Message* msg, + const upb_FieldDef* f, + upb_Arena* a) { + UPB_ASSERT(upb_FieldDef_IsSubMessage(f) || upb_FieldDef_IsRepeated(f)); + if (upb_FieldDef_HasPresence(f) && !upb_Message_HasFieldByDef(msg, f)) { + // We need to skip the upb_Message_GetFieldByDef() call in this case. + goto make; } - upb_TaggedMessagePtr tagged = _upb_TaggedMessagePtr_Pack(msg, is_empty); - memcpy(target, &tagged, sizeof(tagged)); - return msg; -} - -static upb_Message* _upb_Decoder_ReuseSubMessage( - upb_Decoder* d, const upb_MiniTableSub* subs, - const upb_MiniTableField* field, upb_TaggedMessagePtr* target) { - upb_TaggedMessagePtr tagged = *target; - const upb_MiniTable* subl = subs[field->UPB_PRIVATE(submsg_index)].submsg; - UPB_ASSERT(subl); - if (!upb_TaggedMessagePtr_IsEmpty(tagged) || subl == &_kUpb_MiniTable_Empty) { - return _upb_TaggedMessagePtr_GetMessage(tagged); + upb_MessageValue val = upb_Message_GetFieldByDef(msg, f); + if (val.array_val) { + return (upb_MutableMessageValue){.array = (upb_Array*)val.array_val}; } - // We found an empty message from a previous parse that was performed before - // this field was linked. But it is linked now, so we want to allocate a new - // message of the correct type and promote data into it before continuing. - upb_Message* existing = _upb_TaggedMessagePtr_GetEmptyMessage(tagged); - upb_Message* promoted = _upb_Decoder_NewSubMessage(d, subs, field, target); - size_t size; - const char* unknown = upb_Message_GetUnknown(existing, &size); - upb_DecodeStatus status = upb_Decode(unknown, size, promoted, subl, d->extreg, - d->options, &d->arena); - if (status != kUpb_DecodeStatus_Ok) _upb_Decoder_ErrorJmp(d, status); - return promoted; -} + upb_MutableMessageValue ret; +make: + if (!a) return (upb_MutableMessageValue){.array = NULL}; + if (upb_FieldDef_IsMap(f)) { + const upb_MessageDef* entry = upb_FieldDef_MessageSubDef(f); + const upb_FieldDef* key = + upb_MessageDef_FindFieldByNumber(entry, kUpb_MapEntry_KeyFieldNumber); + const upb_FieldDef* value = + upb_MessageDef_FindFieldByNumber(entry, kUpb_MapEntry_ValueFieldNumber); + ret.map = + upb_Map_New(a, upb_FieldDef_CType(key), upb_FieldDef_CType(value)); + } else if (upb_FieldDef_IsRepeated(f)) { + ret.array = upb_Array_New(a, upb_FieldDef_CType(f)); + } else { + UPB_ASSERT(upb_FieldDef_IsSubMessage(f)); + const upb_MessageDef* m = upb_FieldDef_MessageSubDef(f); + ret.msg = upb_Message_New(upb_MessageDef_MiniTable(m), a); + } -static const char* _upb_Decoder_ReadString(upb_Decoder* d, const char* ptr, - int size, upb_StringView* str) { - const char* str_ptr = ptr; - ptr = upb_EpsCopyInputStream_ReadString(&d->input, &str_ptr, size, &d->arena); - if (!ptr) _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); - str->data = str_ptr; - str->size = size; - return ptr; -} + val.array_val = ret.array; + upb_Message_SetFieldByDef(msg, f, val, a); -UPB_FORCEINLINE -static const char* _upb_Decoder_RecurseSubMessage(upb_Decoder* d, - const char* ptr, - upb_Message* submsg, - const upb_MiniTable* subl, - uint32_t expected_end_group) { - if (--d->depth < 0) { - _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_MaxDepthExceeded); - } - ptr = _upb_Decoder_DecodeMessage(d, ptr, submsg, subl); - d->depth++; - if (d->end_group != expected_end_group) { - _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); - } - return ptr; + return ret; } -UPB_FORCEINLINE -static const char* _upb_Decoder_DecodeSubMessage( - upb_Decoder* d, const char* ptr, upb_Message* submsg, - const upb_MiniTableSub* subs, const upb_MiniTableField* field, int size) { - int saved_delta = upb_EpsCopyInputStream_PushLimit(&d->input, ptr, size); - const upb_MiniTable* subl = subs[field->UPB_PRIVATE(submsg_index)].submsg; - UPB_ASSERT(subl); - ptr = _upb_Decoder_RecurseSubMessage(d, ptr, submsg, subl, DECODE_NOGROUP); - upb_EpsCopyInputStream_PopLimit(&d->input, ptr, saved_delta); - return ptr; +bool upb_Message_SetFieldByDef(upb_Message* msg, const upb_FieldDef* f, + upb_MessageValue val, upb_Arena* a) { + return _upb_Message_SetField(msg, upb_FieldDef_MiniTable(f), &val, a); } -UPB_FORCEINLINE -static const char* _upb_Decoder_DecodeGroup(upb_Decoder* d, const char* ptr, - upb_Message* submsg, - const upb_MiniTable* subl, - uint32_t number) { - if (_upb_Decoder_IsDone(d, &ptr)) { - _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); - } - ptr = _upb_Decoder_RecurseSubMessage(d, ptr, submsg, subl, number); - d->end_group = DECODE_NOGROUP; - return ptr; +void upb_Message_ClearFieldByDef(upb_Message* msg, const upb_FieldDef* f) { + upb_Message_ClearField(msg, upb_FieldDef_MiniTable(f)); } -UPB_FORCEINLINE -static const char* _upb_Decoder_DecodeUnknownGroup(upb_Decoder* d, - const char* ptr, - uint32_t number) { - return _upb_Decoder_DecodeGroup(d, ptr, NULL, NULL, number); +void upb_Message_ClearByDef(upb_Message* msg, const upb_MessageDef* m) { + upb_Message_Clear(msg, upb_MessageDef_MiniTable(m)); } -UPB_FORCEINLINE -static const char* _upb_Decoder_DecodeKnownGroup( - upb_Decoder* d, const char* ptr, upb_Message* submsg, - const upb_MiniTableSub* subs, const upb_MiniTableField* field) { - const upb_MiniTable* subl = subs[field->UPB_PRIVATE(submsg_index)].submsg; - UPB_ASSERT(subl); - return _upb_Decoder_DecodeGroup(d, ptr, submsg, subl, field->number); -} +bool upb_Message_Next(const upb_Message* msg, const upb_MessageDef* m, + const upb_DefPool* ext_pool, const upb_FieldDef** out_f, + upb_MessageValue* out_val, size_t* iter) { + size_t i = *iter; + size_t n = upb_MessageDef_FieldCount(m); + UPB_UNUSED(ext_pool); -static char* upb_Decoder_EncodeVarint32(uint32_t val, char* ptr) { - do { - uint8_t byte = val & 0x7fU; - val >>= 7; - if (val) byte |= 0x80U; - *(ptr++) = byte; - } while (val); - return ptr; -} + // Iterate over normal fields, returning the first one that is set. + while (++i < n) { + const upb_FieldDef* f = upb_MessageDef_Field(m, i); + const upb_MiniTableField* field = upb_FieldDef_MiniTable(f); + upb_MessageValue val = upb_Message_GetFieldByDef(msg, f); -static void _upb_Decoder_AddUnknownVarints(upb_Decoder* d, upb_Message* msg, - uint32_t val1, uint32_t val2) { - char buf[20]; - char* end = buf; - end = upb_Decoder_EncodeVarint32(val1, end); - end = upb_Decoder_EncodeVarint32(val2, end); + // Skip field if unset or empty. + if (upb_MiniTableField_HasPresence(field)) { + if (!upb_Message_HasFieldByDef(msg, f)) continue; + } else { + switch (upb_FieldMode_Get(field)) { + case kUpb_FieldMode_Map: + if (!val.map_val || upb_Map_Size(val.map_val) == 0) continue; + break; + case kUpb_FieldMode_Array: + if (!val.array_val || upb_Array_Size(val.array_val) == 0) continue; + break; + case kUpb_FieldMode_Scalar: + if (!_upb_MiniTable_ValueIsNonZero(&val, field)) continue; + break; + } + } - if (!_upb_Message_AddUnknown(msg, buf, end - buf, &d->arena)) { - _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); + *out_val = val; + *out_f = f; + *iter = i; + return true; } -} -UPB_NOINLINE -static bool _upb_Decoder_CheckEnumSlow(upb_Decoder* d, const char* ptr, - upb_Message* msg, - const upb_MiniTableEnum* e, - const upb_MiniTableField* field, - uint32_t v) { - if (_upb_MiniTable_CheckEnumValueSlow(e, v)) return true; + if (ext_pool) { + // Return any extensions that are set. + size_t count; + const upb_Message_Extension* ext = _upb_Message_Getexts(msg, &count); + if (i - n < count) { + ext += count - 1 - (i - n); + memcpy(out_val, &ext->data, sizeof(*out_val)); + *out_f = upb_DefPool_FindExtensionByMiniTable(ext_pool, ext->ext); + *iter = i; + return true; + } + } - // Unrecognized enum goes into unknown fields. - // For packed fields the tag could be arbitrarily far in the past, so we - // just re-encode the tag and value here. - uint32_t tag = ((uint32_t)field->number << 3) | kUpb_WireType_Varint; - upb_Message* unknown_msg = - field->mode & kUpb_LabelFlags_IsExtension ? d->unknown_msg : msg; - _upb_Decoder_AddUnknownVarints(d, unknown_msg, tag, v); + *iter = i; return false; } -UPB_FORCEINLINE -static bool _upb_Decoder_CheckEnum(upb_Decoder* d, const char* ptr, - upb_Message* msg, const upb_MiniTableEnum* e, - const upb_MiniTableField* field, - wireval* val) { - uint32_t v = val->uint32_val; +bool _upb_Message_DiscardUnknown(upb_Message* msg, const upb_MessageDef* m, + int depth) { + size_t iter = kUpb_Message_Begin; + const upb_FieldDef* f; + upb_MessageValue val; + bool ret = true; - _kUpb_FastEnumCheck_Status status = _upb_MiniTable_CheckEnumValueFast(e, v); - if (UPB_LIKELY(status == _kUpb_FastEnumCheck_ValueIsInEnum)) return true; - return _upb_Decoder_CheckEnumSlow(d, ptr, msg, e, field, v); -} + if (--depth == 0) return false; -UPB_NOINLINE -static const char* _upb_Decoder_DecodeEnumArray(upb_Decoder* d, const char* ptr, - upb_Message* msg, - upb_Array* arr, - const upb_MiniTableSub* subs, - const upb_MiniTableField* field, - wireval* val) { - const upb_MiniTableEnum* e = subs[field->UPB_PRIVATE(submsg_index)].subenum; - if (!_upb_Decoder_CheckEnum(d, ptr, msg, e, field, val)) return ptr; - void* mem = UPB_PTR_AT(_upb_array_ptr(arr), arr->size * 4, void); - arr->size++; - memcpy(mem, val, 4); - return ptr; -} + _upb_Message_DiscardUnknown_shallow(msg); -UPB_FORCEINLINE -static const char* _upb_Decoder_DecodeFixedPacked( - upb_Decoder* d, const char* ptr, upb_Array* arr, wireval* val, - const upb_MiniTableField* field, int lg2) { - int mask = (1 << lg2) - 1; - size_t count = val->size >> lg2; - if ((val->size & mask) != 0) { - // Length isn't a round multiple of elem size. - _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); - } - _upb_Decoder_Reserve(d, arr, count); - void* mem = UPB_PTR_AT(_upb_array_ptr(arr), arr->size << lg2, void); - arr->size += count; - // Note: if/when the decoder supports multi-buffer input, we will need to - // handle buffer seams here. - if (_upb_IsLittleEndian()) { - ptr = upb_EpsCopyInputStream_Copy(&d->input, ptr, mem, val->size); - } else { - int delta = upb_EpsCopyInputStream_PushLimit(&d->input, ptr, val->size); - char* dst = mem; - while (!_upb_Decoder_IsDone(d, &ptr)) { - if (lg2 == 2) { - ptr = upb_WireReader_ReadFixed32(ptr, dst); - dst += 4; - } else { - UPB_ASSERT(lg2 == 3); - ptr = upb_WireReader_ReadFixed64(ptr, dst); - dst += 8; + while (upb_Message_Next(msg, m, NULL /*ext_pool*/, &f, &val, &iter)) { + const upb_MessageDef* subm = upb_FieldDef_MessageSubDef(f); + if (!subm) continue; + if (upb_FieldDef_IsMap(f)) { + const upb_FieldDef* val_f = upb_MessageDef_FindFieldByNumber(subm, 2); + const upb_MessageDef* val_m = upb_FieldDef_MessageSubDef(val_f); + upb_Map* map = (upb_Map*)val.map_val; + size_t iter = kUpb_Map_Begin; + + if (!val_m) continue; + + upb_MessageValue map_key, map_val; + while (upb_Map_Next(map, &map_key, &map_val, &iter)) { + if (!_upb_Message_DiscardUnknown((upb_Message*)map_val.msg_val, val_m, + depth)) { + ret = false; + } + } + } else if (upb_FieldDef_IsRepeated(f)) { + const upb_Array* arr = val.array_val; + size_t i, n = upb_Array_Size(arr); + for (i = 0; i < n; i++) { + upb_MessageValue elem = upb_Array_Get(arr, i); + if (!_upb_Message_DiscardUnknown((upb_Message*)elem.msg_val, subm, + depth)) { + ret = false; + } + } + } else { + if (!_upb_Message_DiscardUnknown((upb_Message*)val.msg_val, subm, + depth)) { + ret = false; } } - upb_EpsCopyInputStream_PopLimit(&d->input, ptr, delta); } - return ptr; + return ret; } -UPB_FORCEINLINE -static const char* _upb_Decoder_DecodeVarintPacked( - upb_Decoder* d, const char* ptr, upb_Array* arr, wireval* val, - const upb_MiniTableField* field, int lg2) { - int scale = 1 << lg2; - int saved_limit = upb_EpsCopyInputStream_PushLimit(&d->input, ptr, val->size); - char* out = UPB_PTR_AT(_upb_array_ptr(arr), arr->size << lg2, void); - while (!_upb_Decoder_IsDone(d, &ptr)) { - wireval elem; - ptr = _upb_Decoder_DecodeVarint(d, ptr, &elem.uint64_val); - _upb_Decoder_Munge(field->UPB_PRIVATE(descriptortype), &elem); - if (_upb_Decoder_Reserve(d, arr, 1)) { - out = UPB_PTR_AT(_upb_array_ptr(arr), arr->size << lg2, void); - } - arr->size++; - memcpy(out, &elem, scale); - out += scale; - } - upb_EpsCopyInputStream_PopLimit(&d->input, ptr, saved_limit); - return ptr; +bool upb_Message_DiscardUnknown(upb_Message* msg, const upb_MessageDef* m, + int maxdepth) { + return _upb_Message_DiscardUnknown(msg, m, maxdepth); } -UPB_NOINLINE -static const char* _upb_Decoder_DecodeEnumPacked( - upb_Decoder* d, const char* ptr, upb_Message* msg, upb_Array* arr, - const upb_MiniTableSub* subs, const upb_MiniTableField* field, - wireval* val) { - const upb_MiniTableEnum* e = subs[field->UPB_PRIVATE(submsg_index)].subenum; - int saved_limit = upb_EpsCopyInputStream_PushLimit(&d->input, ptr, val->size); - char* out = UPB_PTR_AT(_upb_array_ptr(arr), arr->size * 4, void); - while (!_upb_Decoder_IsDone(d, &ptr)) { - wireval elem; - ptr = _upb_Decoder_DecodeVarint(d, ptr, &elem.uint64_val); - _upb_Decoder_MungeInt32(&elem); - if (!_upb_Decoder_CheckEnum(d, ptr, msg, e, field, &elem)) { - continue; - } - if (_upb_Decoder_Reserve(d, arr, 1)) { - out = UPB_PTR_AT(_upb_array_ptr(arr), arr->size * 4, void); - } - arr->size++; - memcpy(out, &elem, 4); - out += 4; - } - upb_EpsCopyInputStream_PopLimit(&d->input, ptr, saved_limit); - return ptr; -} -upb_Array* _upb_Decoder_CreateArray(upb_Decoder* d, - const upb_MiniTableField* field) { - /* Maps descriptor type -> elem_size_lg2. */ - static const uint8_t kElemSizeLg2[] = { - [0] = -1, // invalid descriptor type - [kUpb_FieldType_Double] = 3, - [kUpb_FieldType_Float] = 2, - [kUpb_FieldType_Int64] = 3, - [kUpb_FieldType_UInt64] = 3, - [kUpb_FieldType_Int32] = 2, - [kUpb_FieldType_Fixed64] = 3, - [kUpb_FieldType_Fixed32] = 2, - [kUpb_FieldType_Bool] = 0, - [kUpb_FieldType_String] = UPB_SIZE(3, 4), - [kUpb_FieldType_Group] = UPB_SIZE(2, 3), - [kUpb_FieldType_Message] = UPB_SIZE(2, 3), - [kUpb_FieldType_Bytes] = UPB_SIZE(3, 4), - [kUpb_FieldType_UInt32] = 2, - [kUpb_FieldType_Enum] = 2, - [kUpb_FieldType_SFixed32] = 2, - [kUpb_FieldType_SFixed64] = 3, - [kUpb_FieldType_SInt32] = 2, - [kUpb_FieldType_SInt64] = 3, - }; - size_t lg2 = kElemSizeLg2[field->UPB_PRIVATE(descriptortype)]; - upb_Array* ret = _upb_Array_New(&d->arena, 4, lg2); - if (!ret) _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); - return ret; -} +// Must be last. -static const char* _upb_Decoder_DecodeToArray(upb_Decoder* d, const char* ptr, - upb_Message* msg, - const upb_MiniTableSub* subs, - const upb_MiniTableField* field, - wireval* val, int op) { - upb_Array** arrp = UPB_PTR_AT(msg, field->offset, void); - upb_Array* arr = *arrp; - void* mem; +struct upb_MessageDef { + const UPB_DESC(MessageOptions) * opts; + const upb_MiniTable* layout; + const upb_FileDef* file; + const upb_MessageDef* containing_type; + const char* full_name; - if (arr) { - _upb_Decoder_Reserve(d, arr, 1); + // Tables for looking up fields by number and name. + upb_inttable itof; + upb_strtable ntof; + + /* All nested defs. + * MEM: We could save some space here by putting nested defs in a contiguous + * region and calculating counts from offsets or vice-versa. */ + const upb_FieldDef* fields; + const upb_OneofDef* oneofs; + const upb_ExtensionRange* ext_ranges; + const upb_StringView* res_names; + const upb_MessageDef* nested_msgs; + const upb_MessageReservedRange* res_ranges; + const upb_EnumDef* nested_enums; + const upb_FieldDef* nested_exts; + + // TODO(salo): These counters don't need anywhere near 32 bits. + int field_count; + int real_oneof_count; + int oneof_count; + int ext_range_count; + int res_range_count; + int res_name_count; + int nested_msg_count; + int nested_enum_count; + int nested_ext_count; + bool in_message_set; + bool is_sorted; + upb_WellKnown well_known_type; +#if UINTPTR_MAX == 0xffffffff + uint32_t padding; // Increase size to a multiple of 8. +#endif +}; + +static void assign_msg_wellknowntype(upb_MessageDef* m) { + const char* name = m->full_name; + if (name == NULL) { + m->well_known_type = kUpb_WellKnown_Unspecified; + return; + } + if (!strcmp(name, "google.protobuf.Any")) { + m->well_known_type = kUpb_WellKnown_Any; + } else if (!strcmp(name, "google.protobuf.FieldMask")) { + m->well_known_type = kUpb_WellKnown_FieldMask; + } else if (!strcmp(name, "google.protobuf.Duration")) { + m->well_known_type = kUpb_WellKnown_Duration; + } else if (!strcmp(name, "google.protobuf.Timestamp")) { + m->well_known_type = kUpb_WellKnown_Timestamp; + } else if (!strcmp(name, "google.protobuf.DoubleValue")) { + m->well_known_type = kUpb_WellKnown_DoubleValue; + } else if (!strcmp(name, "google.protobuf.FloatValue")) { + m->well_known_type = kUpb_WellKnown_FloatValue; + } else if (!strcmp(name, "google.protobuf.Int64Value")) { + m->well_known_type = kUpb_WellKnown_Int64Value; + } else if (!strcmp(name, "google.protobuf.UInt64Value")) { + m->well_known_type = kUpb_WellKnown_UInt64Value; + } else if (!strcmp(name, "google.protobuf.Int32Value")) { + m->well_known_type = kUpb_WellKnown_Int32Value; + } else if (!strcmp(name, "google.protobuf.UInt32Value")) { + m->well_known_type = kUpb_WellKnown_UInt32Value; + } else if (!strcmp(name, "google.protobuf.BoolValue")) { + m->well_known_type = kUpb_WellKnown_BoolValue; + } else if (!strcmp(name, "google.protobuf.StringValue")) { + m->well_known_type = kUpb_WellKnown_StringValue; + } else if (!strcmp(name, "google.protobuf.BytesValue")) { + m->well_known_type = kUpb_WellKnown_BytesValue; + } else if (!strcmp(name, "google.protobuf.Value")) { + m->well_known_type = kUpb_WellKnown_Value; + } else if (!strcmp(name, "google.protobuf.ListValue")) { + m->well_known_type = kUpb_WellKnown_ListValue; + } else if (!strcmp(name, "google.protobuf.Struct")) { + m->well_known_type = kUpb_WellKnown_Struct; } else { - arr = _upb_Decoder_CreateArray(d, field); - *arrp = arr; + m->well_known_type = kUpb_WellKnown_Unspecified; } +} - switch (op) { - case kUpb_DecodeOp_Scalar1Byte: - case kUpb_DecodeOp_Scalar4Byte: - case kUpb_DecodeOp_Scalar8Byte: - /* Append scalar value. */ - mem = UPB_PTR_AT(_upb_array_ptr(arr), arr->size << op, void); - arr->size++; - memcpy(mem, val, 1 << op); - return ptr; - case kUpb_DecodeOp_String: - _upb_Decoder_VerifyUtf8(d, ptr, val->size); - /* Fallthrough. */ - case kUpb_DecodeOp_Bytes: { - /* Append bytes. */ - upb_StringView* str = (upb_StringView*)_upb_array_ptr(arr) + arr->size; - arr->size++; - return _upb_Decoder_ReadString(d, ptr, val->size, str); - } - case kUpb_DecodeOp_SubMessage: { - /* Append submessage / group. */ - upb_TaggedMessagePtr* target = UPB_PTR_AT( - _upb_array_ptr(arr), arr->size * sizeof(void*), upb_TaggedMessagePtr); - upb_Message* submsg = _upb_Decoder_NewSubMessage(d, subs, field, target); - arr->size++; - if (UPB_UNLIKELY(field->UPB_PRIVATE(descriptortype) == - kUpb_FieldType_Group)) { - return _upb_Decoder_DecodeKnownGroup(d, ptr, submsg, subs, field); - } else { - return _upb_Decoder_DecodeSubMessage(d, ptr, submsg, subs, field, - val->size); - } +upb_MessageDef* _upb_MessageDef_At(const upb_MessageDef* m, int i) { + return (upb_MessageDef*)&m[i]; +} + +bool _upb_MessageDef_IsValidExtensionNumber(const upb_MessageDef* m, int n) { + for (int i = 0; i < m->ext_range_count; i++) { + const upb_ExtensionRange* r = upb_MessageDef_ExtensionRange(m, i); + if (upb_ExtensionRange_Start(r) <= n && n < upb_ExtensionRange_End(r)) { + return true; } - case OP_FIXPCK_LG2(2): - case OP_FIXPCK_LG2(3): - return _upb_Decoder_DecodeFixedPacked(d, ptr, arr, val, field, - op - OP_FIXPCK_LG2(0)); - case OP_VARPCK_LG2(0): - case OP_VARPCK_LG2(2): - case OP_VARPCK_LG2(3): - return _upb_Decoder_DecodeVarintPacked(d, ptr, arr, val, field, - op - OP_VARPCK_LG2(0)); - case kUpb_DecodeOp_Enum: - return _upb_Decoder_DecodeEnumArray(d, ptr, msg, arr, subs, field, val); - case kUpb_DecodeOp_PackedEnum: - return _upb_Decoder_DecodeEnumPacked(d, ptr, msg, arr, subs, field, val); - default: - UPB_UNREACHABLE(); } + return false; } -upb_Map* _upb_Decoder_CreateMap(upb_Decoder* d, const upb_MiniTable* entry) { - /* Maps descriptor type -> upb map size. */ - static const uint8_t kSizeInMap[] = { - [0] = -1, // invalid descriptor type */ - [kUpb_FieldType_Double] = 8, - [kUpb_FieldType_Float] = 4, - [kUpb_FieldType_Int64] = 8, - [kUpb_FieldType_UInt64] = 8, - [kUpb_FieldType_Int32] = 4, - [kUpb_FieldType_Fixed64] = 8, - [kUpb_FieldType_Fixed32] = 4, - [kUpb_FieldType_Bool] = 1, - [kUpb_FieldType_String] = UPB_MAPTYPE_STRING, - [kUpb_FieldType_Group] = sizeof(void*), - [kUpb_FieldType_Message] = sizeof(void*), - [kUpb_FieldType_Bytes] = UPB_MAPTYPE_STRING, - [kUpb_FieldType_UInt32] = 4, - [kUpb_FieldType_Enum] = 4, - [kUpb_FieldType_SFixed32] = 4, - [kUpb_FieldType_SFixed64] = 8, - [kUpb_FieldType_SInt32] = 4, - [kUpb_FieldType_SInt64] = 8, - }; +const UPB_DESC(MessageOptions) * + upb_MessageDef_Options(const upb_MessageDef* m) { + return m->opts; +} - const upb_MiniTableField* key_field = &entry->fields[0]; - const upb_MiniTableField* val_field = &entry->fields[1]; - char key_size = kSizeInMap[key_field->UPB_PRIVATE(descriptortype)]; - char val_size = kSizeInMap[val_field->UPB_PRIVATE(descriptortype)]; - UPB_ASSERT(key_field->offset == offsetof(upb_MapEntryData, k)); - UPB_ASSERT(val_field->offset == offsetof(upb_MapEntryData, v)); - upb_Map* ret = _upb_Map_New(&d->arena, key_size, val_size); - if (!ret) _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); - return ret; +bool upb_MessageDef_HasOptions(const upb_MessageDef* m) { + return m->opts != (void*)kUpbDefOptDefault; } -static const char* _upb_Decoder_DecodeToMap(upb_Decoder* d, const char* ptr, - upb_Message* msg, - const upb_MiniTableSub* subs, - const upb_MiniTableField* field, - wireval* val) { - upb_Map** map_p = UPB_PTR_AT(msg, field->offset, upb_Map*); - upb_Map* map = *map_p; - upb_MapEntry ent; - UPB_ASSERT(upb_MiniTableField_Type(field) == kUpb_FieldType_Message); - const upb_MiniTable* entry = subs[field->UPB_PRIVATE(submsg_index)].submsg; +const char* upb_MessageDef_FullName(const upb_MessageDef* m) { + return m->full_name; +} - UPB_ASSERT(entry); - UPB_ASSERT(entry->field_count == 2); - UPB_ASSERT(!upb_IsRepeatedOrMap(&entry->fields[0])); - UPB_ASSERT(!upb_IsRepeatedOrMap(&entry->fields[1])); +const upb_FileDef* upb_MessageDef_File(const upb_MessageDef* m) { + return m->file; +} - if (!map) { - map = _upb_Decoder_CreateMap(d, entry); - *map_p = map; - } +const upb_MessageDef* upb_MessageDef_ContainingType(const upb_MessageDef* m) { + return m->containing_type; +} - // Parse map entry. - memset(&ent, 0, sizeof(ent)); +const char* upb_MessageDef_Name(const upb_MessageDef* m) { + return _upb_DefBuilder_FullToShort(m->full_name); +} - if (entry->fields[1].UPB_PRIVATE(descriptortype) == kUpb_FieldType_Message || - entry->fields[1].UPB_PRIVATE(descriptortype) == kUpb_FieldType_Group) { - // Create proactively to handle the case where it doesn't appear. - upb_TaggedMessagePtr msg; - _upb_Decoder_NewSubMessage(d, entry->subs, &entry->fields[1], &msg); - ent.data.v.val = upb_value_uintptr(msg); - } +upb_Syntax upb_MessageDef_Syntax(const upb_MessageDef* m) { + return upb_FileDef_Syntax(m->file); +} - ptr = - _upb_Decoder_DecodeSubMessage(d, ptr, &ent.data, subs, field, val->size); - // check if ent had any unknown fields - size_t size; - upb_Message_GetUnknown(&ent.data, &size); - if (size != 0) { - char* buf; - size_t size; - uint32_t tag = ((uint32_t)field->number << 3) | kUpb_WireType_Delimited; - upb_EncodeStatus status = - upb_Encode(&ent.data, entry, 0, &d->arena, &buf, &size); - if (status != kUpb_EncodeStatus_Ok) { - _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); - } - _upb_Decoder_AddUnknownVarints(d, msg, tag, size); - if (!_upb_Message_AddUnknown(msg, buf, size, &d->arena)) { - _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); - } - } else { - if (_upb_Map_Insert(map, &ent.data.k, map->key_size, &ent.data.v, - map->val_size, - &d->arena) == kUpb_MapInsertStatus_OutOfMemory) { - _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); - } - } - return ptr; +const upb_FieldDef* upb_MessageDef_FindFieldByNumber(const upb_MessageDef* m, + uint32_t i) { + upb_value val; + return upb_inttable_lookup(&m->itof, i, &val) ? upb_value_getconstptr(val) + : NULL; } -static const char* _upb_Decoder_DecodeToSubMessage( - upb_Decoder* d, const char* ptr, upb_Message* msg, - const upb_MiniTableSub* subs, const upb_MiniTableField* field, wireval* val, - int op) { - void* mem = UPB_PTR_AT(msg, field->offset, void); - int type = field->UPB_PRIVATE(descriptortype); +const upb_FieldDef* upb_MessageDef_FindFieldByNameWithSize( + const upb_MessageDef* m, const char* name, size_t size) { + upb_value val; - if (UPB_UNLIKELY(op == kUpb_DecodeOp_Enum) && - !_upb_Decoder_CheckEnum(d, ptr, msg, - subs[field->UPB_PRIVATE(submsg_index)].subenum, - field, val)) { - return ptr; + if (!upb_strtable_lookup2(&m->ntof, name, size, &val)) { + return NULL; } - /* Set presence if necessary. */ - if (field->presence > 0) { - _upb_sethas_field(msg, field); - } else if (field->presence < 0) { - /* Oneof case */ - uint32_t* oneof_case = _upb_oneofcase_field(msg, field); - if (op == kUpb_DecodeOp_SubMessage && *oneof_case != field->number) { - memset(mem, 0, sizeof(void*)); - } - *oneof_case = field->number; - } + return _upb_DefType_Unpack(val, UPB_DEFTYPE_FIELD); +} - /* Store into message. */ - switch (op) { - case kUpb_DecodeOp_SubMessage: { - upb_TaggedMessagePtr* submsgp = mem; - upb_Message* submsg; - if (*submsgp) { - submsg = _upb_Decoder_ReuseSubMessage(d, subs, field, submsgp); - } else { - submsg = _upb_Decoder_NewSubMessage(d, subs, field, submsgp); - } - if (UPB_UNLIKELY(type == kUpb_FieldType_Group)) { - ptr = _upb_Decoder_DecodeKnownGroup(d, ptr, submsg, subs, field); - } else { - ptr = _upb_Decoder_DecodeSubMessage(d, ptr, submsg, subs, field, - val->size); - } - break; - } - case kUpb_DecodeOp_String: - _upb_Decoder_VerifyUtf8(d, ptr, val->size); - /* Fallthrough. */ - case kUpb_DecodeOp_Bytes: - return _upb_Decoder_ReadString(d, ptr, val->size, mem); - case kUpb_DecodeOp_Scalar8Byte: - memcpy(mem, val, 8); - break; - case kUpb_DecodeOp_Enum: - case kUpb_DecodeOp_Scalar4Byte: - memcpy(mem, val, 4); - break; - case kUpb_DecodeOp_Scalar1Byte: - memcpy(mem, val, 1); - break; - default: - UPB_UNREACHABLE(); +const upb_OneofDef* upb_MessageDef_FindOneofByNameWithSize( + const upb_MessageDef* m, const char* name, size_t size) { + upb_value val; + + if (!upb_strtable_lookup2(&m->ntof, name, size, &val)) { + return NULL; } - return ptr; + return _upb_DefType_Unpack(val, UPB_DEFTYPE_ONEOF); } -UPB_NOINLINE -const char* _upb_Decoder_CheckRequired(upb_Decoder* d, const char* ptr, - const upb_Message* msg, - const upb_MiniTable* l) { - assert(l->required_count); - if (UPB_LIKELY((d->options & kUpb_DecodeOption_CheckRequired) == 0)) { - return ptr; - } - uint64_t msg_head; - memcpy(&msg_head, msg, 8); - msg_head = _upb_BigEndian_Swap64(msg_head); - if (upb_MiniTable_requiredmask(l) & ~msg_head) { - d->missing_required = true; - } - return ptr; +bool _upb_MessageDef_Insert(upb_MessageDef* m, const char* name, size_t len, + upb_value v, upb_Arena* a) { + return upb_strtable_insert(&m->ntof, name, len, v, a); } -UPB_FORCEINLINE -static bool _upb_Decoder_TryFastDispatch(upb_Decoder* d, const char** ptr, - upb_Message* msg, - const upb_MiniTable* layout) { -#if UPB_FASTTABLE - if (layout && layout->table_mask != (unsigned char)-1) { - uint16_t tag = _upb_FastDecoder_LoadTag(*ptr); - intptr_t table = decode_totable(layout); - *ptr = _upb_FastDecoder_TagDispatch(d, *ptr, msg, table, 0, tag); - return true; +bool upb_MessageDef_FindByNameWithSize(const upb_MessageDef* m, + const char* name, size_t len, + const upb_FieldDef** out_f, + const upb_OneofDef** out_o) { + upb_value val; + + if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) { + return false; } -#endif - return false; + + const upb_FieldDef* f = _upb_DefType_Unpack(val, UPB_DEFTYPE_FIELD); + const upb_OneofDef* o = _upb_DefType_Unpack(val, UPB_DEFTYPE_ONEOF); + if (out_f) *out_f = f; + if (out_o) *out_o = o; + return f || o; /* False if this was a JSON name. */ } -static const char* upb_Decoder_SkipField(upb_Decoder* d, const char* ptr, - uint32_t tag) { - int field_number = tag >> 3; - int wire_type = tag & 7; - switch (wire_type) { - case kUpb_WireType_Varint: { - uint64_t val; - return _upb_Decoder_DecodeVarint(d, ptr, &val); - } - case kUpb_WireType_64Bit: - return ptr + 8; - case kUpb_WireType_32Bit: - return ptr + 4; - case kUpb_WireType_Delimited: { - uint32_t size; - ptr = upb_Decoder_DecodeSize(d, ptr, &size); - return ptr + size; - } - case kUpb_WireType_StartGroup: - return _upb_Decoder_DecodeUnknownGroup(d, ptr, field_number); - default: - _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); +const upb_FieldDef* upb_MessageDef_FindByJsonNameWithSize( + const upb_MessageDef* m, const char* name, size_t size) { + upb_value val; + const upb_FieldDef* f; + + if (!upb_strtable_lookup2(&m->ntof, name, size, &val)) { + return NULL; } + + f = _upb_DefType_Unpack(val, UPB_DEFTYPE_FIELD); + if (!f) f = _upb_DefType_Unpack(val, UPB_DEFTYPE_FIELD_JSONNAME); + + return f; } -enum { - kStartItemTag = ((kUpb_MsgSet_Item << 3) | kUpb_WireType_StartGroup), - kEndItemTag = ((kUpb_MsgSet_Item << 3) | kUpb_WireType_EndGroup), - kTypeIdTag = ((kUpb_MsgSet_TypeId << 3) | kUpb_WireType_Varint), - kMessageTag = ((kUpb_MsgSet_Message << 3) | kUpb_WireType_Delimited), -}; +int upb_MessageDef_ExtensionRangeCount(const upb_MessageDef* m) { + return m->ext_range_count; +} -static void upb_Decoder_AddKnownMessageSetItem( - upb_Decoder* d, upb_Message* msg, const upb_MiniTableExtension* item_mt, - const char* data, uint32_t size) { - upb_Message_Extension* ext = - _upb_Message_GetOrCreateExtension(msg, item_mt, &d->arena); - if (UPB_UNLIKELY(!ext)) { - _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); - } - upb_Message* submsg = _upb_Decoder_NewSubMessage( - d, &ext->ext->sub, &ext->ext->field, (upb_TaggedMessagePtr*)&ext->data); - upb_DecodeStatus status = upb_Decode(data, size, submsg, item_mt->sub.submsg, - d->extreg, d->options, &d->arena); - if (status != kUpb_DecodeStatus_Ok) _upb_Decoder_ErrorJmp(d, status); +int upb_MessageDef_ReservedRangeCount(const upb_MessageDef* m) { + return m->res_range_count; } -static void upb_Decoder_AddUnknownMessageSetItem(upb_Decoder* d, - upb_Message* msg, - uint32_t type_id, - const char* message_data, - uint32_t message_size) { - char buf[60]; - char* ptr = buf; - ptr = upb_Decoder_EncodeVarint32(kStartItemTag, ptr); - ptr = upb_Decoder_EncodeVarint32(kTypeIdTag, ptr); - ptr = upb_Decoder_EncodeVarint32(type_id, ptr); - ptr = upb_Decoder_EncodeVarint32(kMessageTag, ptr); - ptr = upb_Decoder_EncodeVarint32(message_size, ptr); - char* split = ptr; +int upb_MessageDef_ReservedNameCount(const upb_MessageDef* m) { + return m->res_name_count; +} - ptr = upb_Decoder_EncodeVarint32(kEndItemTag, ptr); - char* end = ptr; +int upb_MessageDef_FieldCount(const upb_MessageDef* m) { + return m->field_count; +} - if (!_upb_Message_AddUnknown(msg, buf, split - buf, &d->arena) || - !_upb_Message_AddUnknown(msg, message_data, message_size, &d->arena) || - !_upb_Message_AddUnknown(msg, split, end - split, &d->arena)) { - _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); - } +int upb_MessageDef_OneofCount(const upb_MessageDef* m) { + return m->oneof_count; } -static void upb_Decoder_AddMessageSetItem(upb_Decoder* d, upb_Message* msg, - const upb_MiniTable* t, - uint32_t type_id, const char* data, - uint32_t size) { - const upb_MiniTableExtension* item_mt = - upb_ExtensionRegistry_Lookup(d->extreg, t, type_id); - if (item_mt) { - upb_Decoder_AddKnownMessageSetItem(d, msg, item_mt, data, size); - } else { - upb_Decoder_AddUnknownMessageSetItem(d, msg, type_id, data, size); - } +int upb_MessageDef_RealOneofCount(const upb_MessageDef* m) { + return m->real_oneof_count; } -static const char* upb_Decoder_DecodeMessageSetItem( - upb_Decoder* d, const char* ptr, upb_Message* msg, - const upb_MiniTable* layout) { - uint32_t type_id = 0; - upb_StringView preserved = {NULL, 0}; - typedef enum { - kUpb_HaveId = 1 << 0, - kUpb_HavePayload = 1 << 1, - } StateMask; - StateMask state_mask = 0; - while (!_upb_Decoder_IsDone(d, &ptr)) { - uint32_t tag; - ptr = _upb_Decoder_DecodeTag(d, ptr, &tag); - switch (tag) { - case kEndItemTag: - return ptr; - case kTypeIdTag: { - uint64_t tmp; - ptr = _upb_Decoder_DecodeVarint(d, ptr, &tmp); - if (state_mask & kUpb_HaveId) break; // Ignore dup. - state_mask |= kUpb_HaveId; - type_id = tmp; - if (state_mask & kUpb_HavePayload) { - upb_Decoder_AddMessageSetItem(d, msg, layout, type_id, preserved.data, - preserved.size); - } - break; - } - case kMessageTag: { - uint32_t size; - ptr = upb_Decoder_DecodeSize(d, ptr, &size); - const char* data = ptr; - ptr += size; - if (state_mask & kUpb_HavePayload) break; // Ignore dup. - state_mask |= kUpb_HavePayload; - if (state_mask & kUpb_HaveId) { - upb_Decoder_AddMessageSetItem(d, msg, layout, type_id, data, size); - } else { - // Out of order, we must preserve the payload. - preserved.data = data; - preserved.size = size; - } - break; - } - default: - // We do not preserve unexpected fields inside a message set item. - ptr = upb_Decoder_SkipField(d, ptr, tag); - break; - } - } - _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); +int upb_MessageDef_NestedMessageCount(const upb_MessageDef* m) { + return m->nested_msg_count; } -static const upb_MiniTableField* _upb_Decoder_FindField(upb_Decoder* d, - const upb_MiniTable* t, - uint32_t field_number, - int* last_field_index) { - static upb_MiniTableField none = { - 0, 0, 0, 0, kUpb_FakeFieldType_FieldNotFound, 0}; - if (t == NULL) return &none; +int upb_MessageDef_NestedEnumCount(const upb_MessageDef* m) { + return m->nested_enum_count; +} - size_t idx = ((size_t)field_number) - 1; // 0 wraps to SIZE_MAX - if (idx < t->dense_below) { - /* Fastest case: index into dense fields. */ - goto found; - } +int upb_MessageDef_NestedExtensionCount(const upb_MessageDef* m) { + return m->nested_ext_count; +} - if (t->dense_below < t->field_count) { - /* Linear search non-dense fields. Resume scanning from last_field_index - * since fields are usually in order. */ - size_t last = *last_field_index; - for (idx = last; idx < t->field_count; idx++) { - if (t->fields[idx].number == field_number) { - goto found; - } - } +const upb_MiniTable* upb_MessageDef_MiniTable(const upb_MessageDef* m) { + return m->layout; +} - for (idx = t->dense_below; idx < last; idx++) { - if (t->fields[idx].number == field_number) { - goto found; - } - } - } +const upb_ExtensionRange* upb_MessageDef_ExtensionRange(const upb_MessageDef* m, + int i) { + UPB_ASSERT(0 <= i && i < m->ext_range_count); + return _upb_ExtensionRange_At(m->ext_ranges, i); +} - if (d->extreg) { - switch (t->ext) { - case kUpb_ExtMode_Extendable: { - const upb_MiniTableExtension* ext = - upb_ExtensionRegistry_Lookup(d->extreg, t, field_number); - if (ext) return &ext->field; - break; - } - case kUpb_ExtMode_IsMessageSet: - if (field_number == kUpb_MsgSet_Item) { - static upb_MiniTableField item = { - 0, 0, 0, 0, kUpb_FakeFieldType_MessageSetItem, 0}; - return &item; - } - break; - } - } +const upb_MessageReservedRange* upb_MessageDef_ReservedRange( + const upb_MessageDef* m, int i) { + UPB_ASSERT(0 <= i && i < m->res_range_count); + return _upb_MessageReservedRange_At(m->res_ranges, i); +} - return &none; /* Unknown field. */ +upb_StringView upb_MessageDef_ReservedName(const upb_MessageDef* m, int i) { + UPB_ASSERT(0 <= i && i < m->res_name_count); + return m->res_names[i]; +} -found: - UPB_ASSERT(t->fields[idx].number == field_number); - *last_field_index = idx; - return &t->fields[idx]; +const upb_FieldDef* upb_MessageDef_Field(const upb_MessageDef* m, int i) { + UPB_ASSERT(0 <= i && i < m->field_count); + return _upb_FieldDef_At(m->fields, i); } -int _upb_Decoder_GetVarintOp(const upb_MiniTableField* field) { - static const int8_t kVarintOps[] = { - [kUpb_FakeFieldType_FieldNotFound] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_Double] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_Float] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_Int64] = kUpb_DecodeOp_Scalar8Byte, - [kUpb_FieldType_UInt64] = kUpb_DecodeOp_Scalar8Byte, - [kUpb_FieldType_Int32] = kUpb_DecodeOp_Scalar4Byte, - [kUpb_FieldType_Fixed64] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_Fixed32] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_Bool] = kUpb_DecodeOp_Scalar1Byte, - [kUpb_FieldType_String] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_Group] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_Message] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_Bytes] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_UInt32] = kUpb_DecodeOp_Scalar4Byte, - [kUpb_FieldType_Enum] = kUpb_DecodeOp_Enum, - [kUpb_FieldType_SFixed32] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_SFixed64] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_SInt32] = kUpb_DecodeOp_Scalar4Byte, - [kUpb_FieldType_SInt64] = kUpb_DecodeOp_Scalar8Byte, - [kUpb_FakeFieldType_MessageSetItem] = kUpb_DecodeOp_UnknownField, - }; +const upb_OneofDef* upb_MessageDef_Oneof(const upb_MessageDef* m, int i) { + UPB_ASSERT(0 <= i && i < m->oneof_count); + return _upb_OneofDef_At(m->oneofs, i); +} - return kVarintOps[field->UPB_PRIVATE(descriptortype)]; +const upb_MessageDef* upb_MessageDef_NestedMessage(const upb_MessageDef* m, + int i) { + UPB_ASSERT(0 <= i && i < m->nested_msg_count); + return &m->nested_msgs[i]; } -UPB_FORCEINLINE -static void _upb_Decoder_CheckUnlinked(upb_Decoder* d, const upb_MiniTable* mt, - const upb_MiniTableField* field, - int* op) { - // If sub-message is not linked, treat as unknown. - if (field->mode & kUpb_LabelFlags_IsExtension) return; - const upb_MiniTableSub* sub = &mt->subs[field->UPB_PRIVATE(submsg_index)]; - if ((d->options & kUpb_DecodeOption_ExperimentalAllowUnlinked) || - sub->submsg != &_kUpb_MiniTable_Empty) { - return; - } -#ifndef NDEBUG - const upb_MiniTableField* oneof = upb_MiniTable_GetOneof(mt, field); - if (oneof) { - // All other members of the oneof must be message fields that are also - // unlinked. - do { - assert(upb_MiniTableField_CType(oneof) == kUpb_CType_Message); - const upb_MiniTableSub* oneof_sub = - &mt->subs[oneof->UPB_PRIVATE(submsg_index)]; - assert(!oneof_sub); - } while (upb_MiniTable_NextOneofField(mt, &oneof)); - } -#endif // NDEBUG - *op = kUpb_DecodeOp_UnknownField; +const upb_EnumDef* upb_MessageDef_NestedEnum(const upb_MessageDef* m, int i) { + UPB_ASSERT(0 <= i && i < m->nested_enum_count); + return _upb_EnumDef_At(m->nested_enums, i); } -int _upb_Decoder_GetDelimitedOp(upb_Decoder* d, const upb_MiniTable* mt, - const upb_MiniTableField* field) { - enum { kRepeatedBase = 19 }; +const upb_FieldDef* upb_MessageDef_NestedExtension(const upb_MessageDef* m, + int i) { + UPB_ASSERT(0 <= i && i < m->nested_ext_count); + return _upb_FieldDef_At(m->nested_exts, i); +} - static const int8_t kDelimitedOps[] = { - /* For non-repeated field type. */ - [kUpb_FakeFieldType_FieldNotFound] = - kUpb_DecodeOp_UnknownField, // Field not found. - [kUpb_FieldType_Double] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_Float] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_Int64] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_UInt64] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_Int32] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_Fixed64] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_Fixed32] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_Bool] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_String] = kUpb_DecodeOp_String, - [kUpb_FieldType_Group] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_Message] = kUpb_DecodeOp_SubMessage, - [kUpb_FieldType_Bytes] = kUpb_DecodeOp_Bytes, - [kUpb_FieldType_UInt32] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_Enum] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_SFixed32] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_SFixed64] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_SInt32] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_SInt64] = kUpb_DecodeOp_UnknownField, - [kUpb_FakeFieldType_MessageSetItem] = kUpb_DecodeOp_UnknownField, - // For repeated field type. */ - [kRepeatedBase + kUpb_FieldType_Double] = OP_FIXPCK_LG2(3), - [kRepeatedBase + kUpb_FieldType_Float] = OP_FIXPCK_LG2(2), - [kRepeatedBase + kUpb_FieldType_Int64] = OP_VARPCK_LG2(3), - [kRepeatedBase + kUpb_FieldType_UInt64] = OP_VARPCK_LG2(3), - [kRepeatedBase + kUpb_FieldType_Int32] = OP_VARPCK_LG2(2), - [kRepeatedBase + kUpb_FieldType_Fixed64] = OP_FIXPCK_LG2(3), - [kRepeatedBase + kUpb_FieldType_Fixed32] = OP_FIXPCK_LG2(2), - [kRepeatedBase + kUpb_FieldType_Bool] = OP_VARPCK_LG2(0), - [kRepeatedBase + kUpb_FieldType_String] = kUpb_DecodeOp_String, - [kRepeatedBase + kUpb_FieldType_Group] = kUpb_DecodeOp_SubMessage, - [kRepeatedBase + kUpb_FieldType_Message] = kUpb_DecodeOp_SubMessage, - [kRepeatedBase + kUpb_FieldType_Bytes] = kUpb_DecodeOp_Bytes, - [kRepeatedBase + kUpb_FieldType_UInt32] = OP_VARPCK_LG2(2), - [kRepeatedBase + kUpb_FieldType_Enum] = kUpb_DecodeOp_PackedEnum, - [kRepeatedBase + kUpb_FieldType_SFixed32] = OP_FIXPCK_LG2(2), - [kRepeatedBase + kUpb_FieldType_SFixed64] = OP_FIXPCK_LG2(3), - [kRepeatedBase + kUpb_FieldType_SInt32] = OP_VARPCK_LG2(2), - [kRepeatedBase + kUpb_FieldType_SInt64] = OP_VARPCK_LG2(3), - // Omitting kUpb_FakeFieldType_MessageSetItem, because we never emit a - // repeated msgset type - }; +upb_WellKnown upb_MessageDef_WellKnownType(const upb_MessageDef* m) { + return m->well_known_type; +} - int ndx = field->UPB_PRIVATE(descriptortype); - if (upb_FieldMode_Get(field) == kUpb_FieldMode_Array) ndx += kRepeatedBase; - int op = kDelimitedOps[ndx]; +bool _upb_MessageDef_InMessageSet(const upb_MessageDef* m) { + return m->in_message_set; +} - if (op == kUpb_DecodeOp_SubMessage) { - _upb_Decoder_CheckUnlinked(d, mt, field, &op); - } +const upb_FieldDef* upb_MessageDef_FindFieldByName(const upb_MessageDef* m, + const char* name) { + return upb_MessageDef_FindFieldByNameWithSize(m, name, strlen(name)); +} - return op; +const upb_OneofDef* upb_MessageDef_FindOneofByName(const upb_MessageDef* m, + const char* name) { + return upb_MessageDef_FindOneofByNameWithSize(m, name, strlen(name)); } -UPB_FORCEINLINE -static const char* _upb_Decoder_DecodeWireValue(upb_Decoder* d, const char* ptr, - const upb_MiniTable* mt, - const upb_MiniTableField* field, - int wire_type, wireval* val, - int* op) { - static const unsigned kFixed32OkMask = (1 << kUpb_FieldType_Float) | - (1 << kUpb_FieldType_Fixed32) | - (1 << kUpb_FieldType_SFixed32); +bool upb_MessageDef_IsMapEntry(const upb_MessageDef* m) { + return UPB_DESC(MessageOptions_map_entry)(m->opts); +} - static const unsigned kFixed64OkMask = (1 << kUpb_FieldType_Double) | - (1 << kUpb_FieldType_Fixed64) | - (1 << kUpb_FieldType_SFixed64); +bool upb_MessageDef_IsMessageSet(const upb_MessageDef* m) { + return UPB_DESC(MessageOptions_message_set_wire_format)(m->opts); +} - switch (wire_type) { - case kUpb_WireType_Varint: - ptr = _upb_Decoder_DecodeVarint(d, ptr, &val->uint64_val); - *op = _upb_Decoder_GetVarintOp(field); - _upb_Decoder_Munge(field->UPB_PRIVATE(descriptortype), val); - return ptr; - case kUpb_WireType_32Bit: - *op = kUpb_DecodeOp_Scalar4Byte; - if (((1 << field->UPB_PRIVATE(descriptortype)) & kFixed32OkMask) == 0) { - *op = kUpb_DecodeOp_UnknownField; - } - return upb_WireReader_ReadFixed32(ptr, &val->uint32_val); - case kUpb_WireType_64Bit: - *op = kUpb_DecodeOp_Scalar8Byte; - if (((1 << field->UPB_PRIVATE(descriptortype)) & kFixed64OkMask) == 0) { - *op = kUpb_DecodeOp_UnknownField; - } - return upb_WireReader_ReadFixed64(ptr, &val->uint64_val); - case kUpb_WireType_Delimited: - ptr = upb_Decoder_DecodeSize(d, ptr, &val->size); - *op = _upb_Decoder_GetDelimitedOp(d, mt, field); - return ptr; - case kUpb_WireType_StartGroup: - val->uint32_val = field->number; - if (field->UPB_PRIVATE(descriptortype) == kUpb_FieldType_Group) { - *op = kUpb_DecodeOp_SubMessage; - _upb_Decoder_CheckUnlinked(d, mt, field, op); - } else if (field->UPB_PRIVATE(descriptortype) == - kUpb_FakeFieldType_MessageSetItem) { - *op = kUpb_DecodeOp_MessageSetItem; - } else { - *op = kUpb_DecodeOp_UnknownField; - } - return ptr; - default: - break; - } - _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); +static upb_MiniTable* _upb_MessageDef_MakeMiniTable(upb_DefBuilder* ctx, + const upb_MessageDef* m) { + upb_StringView desc; + // Note: this will assign layout_index for fields, so upb_FieldDef_MiniTable() + // is safe to call only after this call. + bool ok = upb_MessageDef_MiniDescriptorEncode(m, ctx->tmp_arena, &desc); + if (!ok) _upb_DefBuilder_OomErr(ctx); + + void** scratch_data = _upb_DefPool_ScratchData(ctx->symtab); + size_t* scratch_size = _upb_DefPool_ScratchSize(ctx->symtab); + upb_MiniTable* ret = upb_MiniTable_BuildWithBuf( + desc.data, desc.size, ctx->platform, ctx->arena, scratch_data, + scratch_size, ctx->status); + if (!ret) _upb_DefBuilder_FailJmp(ctx); + + return ret; } -UPB_FORCEINLINE -static const char* _upb_Decoder_DecodeKnownField( - upb_Decoder* d, const char* ptr, upb_Message* msg, - const upb_MiniTable* layout, const upb_MiniTableField* field, int op, - wireval* val) { - const upb_MiniTableSub* subs = layout->subs; - uint8_t mode = field->mode; +void _upb_MessageDef_Resolve(upb_DefBuilder* ctx, upb_MessageDef* m) { + for (int i = 0; i < m->field_count; i++) { + upb_FieldDef* f = (upb_FieldDef*)upb_MessageDef_Field(m, i); + _upb_FieldDef_Resolve(ctx, m->full_name, f); + } - if (UPB_UNLIKELY(mode & kUpb_LabelFlags_IsExtension)) { - const upb_MiniTableExtension* ext_layout = - (const upb_MiniTableExtension*)field; - upb_Message_Extension* ext = - _upb_Message_GetOrCreateExtension(msg, ext_layout, &d->arena); - if (UPB_UNLIKELY(!ext)) { - _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); + m->in_message_set = false; + for (int i = 0; i < upb_MessageDef_NestedExtensionCount(m); i++) { + upb_FieldDef* ext = (upb_FieldDef*)upb_MessageDef_NestedExtension(m, i); + _upb_FieldDef_Resolve(ctx, m->full_name, ext); + if (upb_FieldDef_Type(ext) == kUpb_FieldType_Message && + upb_FieldDef_Label(ext) == kUpb_Label_Optional && + upb_FieldDef_MessageSubDef(ext) == m && + UPB_DESC(MessageOptions_message_set_wire_format)( + upb_MessageDef_Options(upb_FieldDef_ContainingType(ext)))) { + m->in_message_set = true; } - d->unknown_msg = msg; - msg = &ext->data; - subs = &ext->ext->sub; } - switch (mode & kUpb_FieldMode_Mask) { - case kUpb_FieldMode_Array: - return _upb_Decoder_DecodeToArray(d, ptr, msg, subs, field, val, op); - case kUpb_FieldMode_Map: - return _upb_Decoder_DecodeToMap(d, ptr, msg, subs, field, val); - case kUpb_FieldMode_Scalar: - return _upb_Decoder_DecodeToSubMessage(d, ptr, msg, subs, field, val, op); - default: - UPB_UNREACHABLE(); + for (int i = 0; i < upb_MessageDef_NestedMessageCount(m); i++) { + upb_MessageDef* n = (upb_MessageDef*)upb_MessageDef_NestedMessage(m, i); + _upb_MessageDef_Resolve(ctx, n); } } -static const char* _upb_Decoder_ReverseSkipVarint(const char* ptr, - uint32_t val) { - uint32_t seen = 0; - do { - ptr--; - seen <<= 7; - seen |= *ptr & 0x7f; - } while (seen != val); - return ptr; -} - -static const char* _upb_Decoder_DecodeUnknownField(upb_Decoder* d, - const char* ptr, - upb_Message* msg, - int field_number, - int wire_type, wireval val) { - if (field_number == 0) _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); +void _upb_MessageDef_InsertField(upb_DefBuilder* ctx, upb_MessageDef* m, + const upb_FieldDef* f) { + const int32_t field_number = upb_FieldDef_Number(f); - // Since unknown fields are the uncommon case, we do a little extra work here - // to walk backwards through the buffer to find the field start. This frees - // up a register in the fast paths (when the field is known), which leads to - // significant speedups in benchmarks. - const char* start = ptr; + if (field_number <= 0 || field_number > kUpb_MaxFieldNumber) { + _upb_DefBuilder_Errf(ctx, "invalid field number (%u)", field_number); + } - if (wire_type == kUpb_WireType_Delimited) ptr += val.size; - if (msg) { - switch (wire_type) { - case kUpb_WireType_Varint: - case kUpb_WireType_Delimited: - start--; - while (start[-1] & 0x80) start--; - break; - case kUpb_WireType_32Bit: - start -= 4; - break; - case kUpb_WireType_64Bit: - start -= 8; - break; - default: - break; - } + const char* json_name = upb_FieldDef_JsonName(f); + const char* shortname = upb_FieldDef_Name(f); + const size_t shortnamelen = strlen(shortname); - assert(start == d->debug_valstart); - uint32_t tag = ((uint32_t)field_number << 3) | wire_type; - start = _upb_Decoder_ReverseSkipVarint(start, tag); - assert(start == d->debug_tagstart); + upb_value v = upb_value_constptr(f); - if (wire_type == kUpb_WireType_StartGroup) { - d->unknown = start; - d->unknown_msg = msg; - ptr = _upb_Decoder_DecodeUnknownGroup(d, ptr, field_number); - start = d->unknown; - d->unknown = NULL; - } - if (!_upb_Message_AddUnknown(msg, start, ptr - start, &d->arena)) { - _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); - } - } else if (wire_type == kUpb_WireType_StartGroup) { - ptr = _upb_Decoder_DecodeUnknownGroup(d, ptr, field_number); + upb_value existing_v; + if (upb_strtable_lookup(&m->ntof, shortname, &existing_v)) { + _upb_DefBuilder_Errf(ctx, "duplicate field name (%s)", shortname); } - return ptr; -} -UPB_NOINLINE -static const char* _upb_Decoder_DecodeMessage(upb_Decoder* d, const char* ptr, - upb_Message* msg, - const upb_MiniTable* layout) { - int last_field_index = 0; + const upb_value field_v = _upb_DefType_Pack(f, UPB_DEFTYPE_FIELD); + bool ok = + _upb_MessageDef_Insert(m, shortname, shortnamelen, field_v, ctx->arena); + if (!ok) _upb_DefBuilder_OomErr(ctx); -#if UPB_FASTTABLE - // The first time we want to skip fast dispatch, because we may have just been - // invoked by the fast parser to handle a case that it bailed on. - if (!_upb_Decoder_IsDone(d, &ptr)) goto nofast; -#endif + if (strcmp(shortname, json_name) != 0) { + if (upb_strtable_lookup(&m->ntof, json_name, &v)) { + _upb_DefBuilder_Errf(ctx, "duplicate json_name (%s)", json_name); + } - while (!_upb_Decoder_IsDone(d, &ptr)) { - uint32_t tag; - const upb_MiniTableField* field; - int field_number; - int wire_type; - wireval val; - int op; + const size_t json_size = strlen(json_name); + const upb_value json_v = _upb_DefType_Pack(f, UPB_DEFTYPE_FIELD_JSONNAME); + ok = _upb_MessageDef_Insert(m, json_name, json_size, json_v, ctx->arena); + if (!ok) _upb_DefBuilder_OomErr(ctx); + } - if (_upb_Decoder_TryFastDispatch(d, &ptr, msg, layout)) break; + if (upb_inttable_lookup(&m->itof, field_number, NULL)) { + _upb_DefBuilder_Errf(ctx, "duplicate field number (%u)", field_number); + } -#if UPB_FASTTABLE - nofast: -#endif + ok = upb_inttable_insert(&m->itof, field_number, v, ctx->arena); + if (!ok) _upb_DefBuilder_OomErr(ctx); +} -#ifndef NDEBUG - d->debug_tagstart = ptr; -#endif +void _upb_MessageDef_CreateMiniTable(upb_DefBuilder* ctx, upb_MessageDef* m) { + if (ctx->layout == NULL) { + m->layout = _upb_MessageDef_MakeMiniTable(ctx, m); + } else { + UPB_ASSERT(ctx->msg_count < ctx->layout->msg_count); + m->layout = ctx->layout->msgs[ctx->msg_count++]; + UPB_ASSERT(m->field_count == m->layout->field_count); - UPB_ASSERT(ptr < d->input.limit_ptr); - ptr = _upb_Decoder_DecodeTag(d, ptr, &tag); - field_number = tag >> 3; - wire_type = tag & 7; + // We don't need the result of this call, but it will assign layout_index + // for all the fields in O(n lg n) time. + _upb_FieldDefs_Sorted(m->fields, m->field_count, ctx->tmp_arena); + } -#ifndef NDEBUG - d->debug_valstart = ptr; -#endif + for (int i = 0; i < m->nested_msg_count; i++) { + upb_MessageDef* nested = + (upb_MessageDef*)upb_MessageDef_NestedMessage(m, i); + _upb_MessageDef_CreateMiniTable(ctx, nested); + } +} - if (wire_type == kUpb_WireType_EndGroup) { - d->end_group = field_number; - return ptr; - } +void _upb_MessageDef_LinkMiniTable(upb_DefBuilder* ctx, + const upb_MessageDef* m) { + for (int i = 0; i < upb_MessageDef_NestedExtensionCount(m); i++) { + const upb_FieldDef* ext = upb_MessageDef_NestedExtension(m, i); + _upb_FieldDef_BuildMiniTableExtension(ctx, ext); + } - field = _upb_Decoder_FindField(d, layout, field_number, &last_field_index); - ptr = _upb_Decoder_DecodeWireValue(d, ptr, layout, field, wire_type, &val, - &op); + for (int i = 0; i < m->nested_msg_count; i++) { + _upb_MessageDef_LinkMiniTable(ctx, upb_MessageDef_NestedMessage(m, i)); + } - if (op >= 0) { - ptr = _upb_Decoder_DecodeKnownField(d, ptr, msg, layout, field, op, &val); - } else { - switch (op) { - case kUpb_DecodeOp_UnknownField: - ptr = _upb_Decoder_DecodeUnknownField(d, ptr, msg, field_number, - wire_type, val); - break; - case kUpb_DecodeOp_MessageSetItem: - ptr = upb_Decoder_DecodeMessageSetItem(d, ptr, msg, layout); - break; + if (ctx->layout) return; + + for (int i = 0; i < m->field_count; i++) { + const upb_FieldDef* f = upb_MessageDef_Field(m, i); + const upb_MessageDef* sub_m = upb_FieldDef_MessageSubDef(f); + const upb_EnumDef* sub_e = upb_FieldDef_EnumSubDef(f); + const int layout_index = _upb_FieldDef_LayoutIndex(f); + upb_MiniTable* mt = (upb_MiniTable*)upb_MessageDef_MiniTable(m); + + UPB_ASSERT(layout_index < m->field_count); + upb_MiniTableField* mt_f = + (upb_MiniTableField*)&m->layout->fields[layout_index]; + if (sub_m) { + if (!mt->subs) { + _upb_DefBuilder_Errf(ctx, "unexpected submsg for (%s)", m->full_name); + } + UPB_ASSERT(mt_f); + UPB_ASSERT(sub_m->layout); + if (UPB_UNLIKELY(!upb_MiniTable_SetSubMessage(mt, mt_f, sub_m->layout))) { + _upb_DefBuilder_Errf(ctx, "invalid submsg for (%s)", m->full_name); + } + } else if (_upb_FieldDef_IsClosedEnum(f)) { + const upb_MiniTableEnum* mt_e = _upb_EnumDef_MiniTable(sub_e); + if (UPB_UNLIKELY(!upb_MiniTable_SetSubEnum(mt, mt_f, mt_e))) { + _upb_DefBuilder_Errf(ctx, "invalid subenum for (%s)", m->full_name); } } } - return UPB_UNLIKELY(layout && layout->required_count) - ? _upb_Decoder_CheckRequired(d, ptr, msg, layout) - : ptr; -} - -const char* _upb_FastDecoder_DecodeGeneric(struct upb_Decoder* d, - const char* ptr, upb_Message* msg, - intptr_t table, uint64_t hasbits, - uint64_t data) { - (void)data; - *(uint32_t*)msg |= hasbits; - return _upb_Decoder_DecodeMessage(d, ptr, msg, decode_totablep(table)); +#ifndef NDEBUG + for (int i = 0; i < m->field_count; i++) { + const upb_FieldDef* f = upb_MessageDef_Field(m, i); + const int layout_index = _upb_FieldDef_LayoutIndex(f); + UPB_ASSERT(layout_index < m->layout->field_count); + const upb_MiniTableField* mt_f = &m->layout->fields[layout_index]; + UPB_ASSERT(upb_FieldDef_Type(f) == upb_MiniTableField_Type(mt_f)); + UPB_ASSERT(upb_FieldDef_CType(f) == upb_MiniTableField_CType(mt_f)); + UPB_ASSERT(upb_FieldDef_HasPresence(f) == + upb_MiniTableField_HasPresence(mt_f)); + } +#endif } -static upb_DecodeStatus _upb_Decoder_DecodeTop(struct upb_Decoder* d, - const char* buf, void* msg, - const upb_MiniTable* l) { - if (!_upb_Decoder_TryFastDispatch(d, &buf, msg, l)) { - _upb_Decoder_DecodeMessage(d, buf, msg, l); +static uint64_t _upb_MessageDef_Modifiers(const upb_MessageDef* m) { + uint64_t out = 0; + if (upb_FileDef_Syntax(m->file) == kUpb_Syntax_Proto3) { + out |= kUpb_MessageModifier_ValidateUtf8; + out |= kUpb_MessageModifier_DefaultIsPacked; } - if (d->end_group != DECODE_NOGROUP) return kUpb_DecodeStatus_Malformed; - if (d->missing_required) return kUpb_DecodeStatus_MissingRequired; - return kUpb_DecodeStatus_Ok; + if (m->ext_range_count) { + out |= kUpb_MessageModifier_IsExtendable; + } + return out; } -UPB_NOINLINE -const char* _upb_Decoder_IsDoneFallback(upb_EpsCopyInputStream* e, - const char* ptr, int overrun) { - return _upb_EpsCopyInputStream_IsDoneFallbackInline( - e, ptr, overrun, _upb_Decoder_BufferFlipCallback); -} +static bool _upb_MessageDef_EncodeMap(upb_DescState* s, const upb_MessageDef* m, + upb_Arena* a) { + if (m->field_count != 2) return false; -static upb_DecodeStatus upb_Decoder_Decode(upb_Decoder* const decoder, - const char* const buf, - void* const msg, - const upb_MiniTable* const l, - upb_Arena* const arena) { - if (UPB_SETJMP(decoder->err) == 0) { - decoder->status = _upb_Decoder_DecodeTop(decoder, buf, msg, l); - } else { - UPB_ASSERT(decoder->status != kUpb_DecodeStatus_Ok); - } + const upb_FieldDef* key_field = upb_MessageDef_Field(m, 0); + const upb_FieldDef* val_field = upb_MessageDef_Field(m, 1); + if (key_field == NULL || val_field == NULL) return false; - _upb_MemBlock* blocks = - upb_Atomic_Load(&decoder->arena.blocks, memory_order_relaxed); - arena->head = decoder->arena.head; - upb_Atomic_Store(&arena->blocks, blocks, memory_order_relaxed); - return decoder->status; + UPB_ASSERT(_upb_FieldDef_LayoutIndex(key_field) == 0); + UPB_ASSERT(_upb_FieldDef_LayoutIndex(val_field) == 1); + + s->ptr = upb_MtDataEncoder_EncodeMap( + &s->e, s->ptr, upb_FieldDef_Type(key_field), upb_FieldDef_Type(val_field), + _upb_FieldDef_Modifiers(key_field), _upb_FieldDef_Modifiers(val_field)); + return true; } -upb_DecodeStatus upb_Decode(const char* buf, size_t size, void* msg, - const upb_MiniTable* l, - const upb_ExtensionRegistry* extreg, int options, - upb_Arena* arena) { - upb_Decoder decoder; - unsigned depth = (unsigned)options >> 16; - - upb_EpsCopyInputStream_Init(&decoder.input, &buf, size, - options & kUpb_DecodeOption_AliasString); +static bool _upb_MessageDef_EncodeMessage(upb_DescState* s, + const upb_MessageDef* m, + upb_Arena* a) { + const upb_FieldDef** sorted = NULL; + if (!m->is_sorted) { + sorted = _upb_FieldDefs_Sorted(m->fields, m->field_count, a); + if (!sorted) return false; + } - decoder.extreg = extreg; - decoder.unknown = NULL; - decoder.depth = depth ? depth : kUpb_WireFormat_DefaultDepthLimit; - decoder.end_group = DECODE_NOGROUP; - decoder.options = (uint16_t)options; - decoder.missing_required = false; - decoder.status = kUpb_DecodeStatus_Ok; + s->ptr = upb_MtDataEncoder_StartMessage(&s->e, s->ptr, + _upb_MessageDef_Modifiers(m)); - // Violating the encapsulation of the arena for performance reasons. - // This is a temporary arena that we swap into and swap out of when we are - // done. The temporary arena only needs to be able to handle allocation, - // not fuse or free, so it does not need many of the members to be initialized - // (particularly parent_or_count). - _upb_MemBlock* blocks = upb_Atomic_Load(&arena->blocks, memory_order_relaxed); - decoder.arena.head = arena->head; - decoder.arena.block_alloc = arena->block_alloc; - upb_Atomic_Init(&decoder.arena.blocks, blocks); + for (int i = 0; i < m->field_count; i++) { + const upb_FieldDef* f = sorted ? sorted[i] : upb_MessageDef_Field(m, i); + const upb_FieldType type = upb_FieldDef_Type(f); + const int number = upb_FieldDef_Number(f); + const uint64_t modifiers = _upb_FieldDef_Modifiers(f); - return upb_Decoder_Decode(&decoder, buf, msg, l, arena); -} + if (!_upb_DescState_Grow(s, a)) return false; + s->ptr = upb_MtDataEncoder_PutField(&s->e, s->ptr, type, number, modifiers); + } -#undef OP_FIXPCK_LG2 -#undef OP_VARPCK_LG2 + for (int i = 0; i < m->real_oneof_count; i++) { + if (!_upb_DescState_Grow(s, a)) return false; + s->ptr = upb_MtDataEncoder_StartOneof(&s->e, s->ptr); -// Fast decoder: ~3x the speed of decode.c, but requires x86-64/ARM64. -// Also the table size grows by 2x. -// -// Could potentially be ported to other 64-bit archs that pass at least six -// arguments in registers and have 8 unused high bits in pointers. -// -// The overall design is to create specialized functions for every possible -// field type (eg. oneof boolean field with a 1 byte tag) and then dispatch -// to the specialized function as quickly as possible. + const upb_OneofDef* o = upb_MessageDef_Oneof(m, i); + const int field_count = upb_OneofDef_FieldCount(o); + for (int j = 0; j < field_count; j++) { + const int number = upb_FieldDef_Number(upb_OneofDef_Field(o, j)); + if (!_upb_DescState_Grow(s, a)) return false; + s->ptr = upb_MtDataEncoder_PutOneofField(&s->e, s->ptr, number); + } + } + return true; +} -// Must be last. +static bool _upb_MessageDef_EncodeMessageSet(upb_DescState* s, + const upb_MessageDef* m, + upb_Arena* a) { + s->ptr = upb_MtDataEncoder_EncodeMessageSet(&s->e, s->ptr); -#if UPB_FASTTABLE + return true; +} -// The standard set of arguments passed to each parsing function. -// Thanks to x86-64 calling conventions, these will stay in registers. -#define UPB_PARSE_PARAMS \ - upb_Decoder *d, const char *ptr, upb_Message *msg, intptr_t table, \ - uint64_t hasbits, uint64_t data +bool upb_MessageDef_MiniDescriptorEncode(const upb_MessageDef* m, upb_Arena* a, + upb_StringView* out) { + upb_DescState s; + _upb_DescState_Init(&s); -#define UPB_PARSE_ARGS d, ptr, msg, table, hasbits, data + if (!_upb_DescState_Grow(&s, a)) return false; -#define RETURN_GENERIC(m) \ - /* Uncomment either of these for debugging purposes. */ \ - /* fprintf(stderr, m); */ \ - /*__builtin_trap(); */ \ - return _upb_FastDecoder_DecodeGeneric(d, ptr, msg, table, hasbits, 0); + if (upb_MessageDef_IsMapEntry(m)) { + if (!_upb_MessageDef_EncodeMap(&s, m, a)) return false; + } else if (UPB_DESC(MessageOptions_message_set_wire_format)(m->opts)) { + if (!_upb_MessageDef_EncodeMessageSet(&s, m, a)) return false; + } else { + if (!_upb_MessageDef_EncodeMessage(&s, m, a)) return false; + } -typedef enum { - CARD_s = 0, /* Singular (optional, non-repeated) */ - CARD_o = 1, /* Oneof */ - CARD_r = 2, /* Repeated */ - CARD_p = 3 /* Packed Repeated */ -} upb_card; + if (!_upb_DescState_Grow(&s, a)) return false; + *s.ptr = '\0'; -UPB_NOINLINE -static const char* fastdecode_isdonefallback(UPB_PARSE_PARAMS) { - int overrun = data; - ptr = _upb_EpsCopyInputStream_IsDoneFallbackInline( - &d->input, ptr, overrun, _upb_Decoder_BufferFlipCallback); - data = _upb_FastDecoder_LoadTag(ptr); - UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); + out->data = s.buf; + out->size = s.ptr - s.buf; + return true; } -UPB_FORCEINLINE -static const char* fastdecode_dispatch(UPB_PARSE_PARAMS) { - int overrun; - switch (upb_EpsCopyInputStream_IsDoneStatus(&d->input, ptr, &overrun)) { - case kUpb_IsDoneStatus_Done: - *(uint32_t*)msg |= hasbits; // Sync hasbits. - const upb_MiniTable* l = decode_totablep(table); - return UPB_UNLIKELY(l->required_count) - ? _upb_Decoder_CheckRequired(d, ptr, msg, l) - : ptr; - case kUpb_IsDoneStatus_NotDone: - break; - case kUpb_IsDoneStatus_NeedFallback: - data = overrun; - UPB_MUSTTAIL return fastdecode_isdonefallback(UPB_PARSE_ARGS); +static upb_StringView* _upb_ReservedNames_New(upb_DefBuilder* ctx, int n, + const upb_StringView* protos) { + upb_StringView* sv = _upb_DefBuilder_Alloc(ctx, sizeof(upb_StringView) * n); + for (int i = 0; i < n; i++) { + sv[i].data = + upb_strdup2(protos[i].data, protos[i].size, _upb_DefBuilder_Arena(ctx)); + sv[i].size = protos[i].size; } - - // Read two bytes of tag data (for a one-byte tag, the high byte is junk). - data = _upb_FastDecoder_LoadTag(ptr); - UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); + return sv; } -UPB_FORCEINLINE -static bool fastdecode_checktag(uint16_t data, int tagbytes) { - if (tagbytes == 1) { - return (data & 0xff) == 0; - } else { - return data == 0; - } -} +static void create_msgdef(upb_DefBuilder* ctx, const char* prefix, + const UPB_DESC(DescriptorProto) * msg_proto, + const upb_MessageDef* containing_type, + upb_MessageDef* m) { + const UPB_DESC(OneofDescriptorProto)* const* oneofs; + const UPB_DESC(FieldDescriptorProto)* const* fields; + const UPB_DESC(DescriptorProto_ExtensionRange)* const* ext_ranges; + const UPB_DESC(DescriptorProto_ReservedRange)* const* res_ranges; + const upb_StringView* res_names; + size_t n_oneof, n_field, n_enum, n_ext, n_msg; + size_t n_ext_range, n_res_range, n_res_name; + upb_StringView name; -UPB_FORCEINLINE -static const char* fastdecode_longsize(const char* ptr, int* size) { - int i; - UPB_ASSERT(*size & 0x80); - *size &= 0xff; - for (i = 0; i < 3; i++) { - ptr++; - size_t byte = (uint8_t)ptr[-1]; - *size += (byte - 1) << (7 + 7 * i); - if (UPB_LIKELY((byte & 0x80) == 0)) return ptr; - } - ptr++; - size_t byte = (uint8_t)ptr[-1]; - // len is limited by 2gb not 4gb, hence 8 and not 16 as normally expected - // for a 32 bit varint. - if (UPB_UNLIKELY(byte >= 8)) return NULL; - *size += (byte - 1) << 28; - return ptr; -} + // Must happen before _upb_DefBuilder_Add() + m->file = _upb_DefBuilder_File(ctx); -UPB_FORCEINLINE -static const char* fastdecode_delimited( - upb_Decoder* d, const char* ptr, - upb_EpsCopyInputStream_ParseDelimitedFunc* func, void* ctx) { - ptr++; + m->containing_type = containing_type; + m->is_sorted = true; - // Sign-extend so varint greater than one byte becomes negative, causing - // fast delimited parse to fail. - int len = (int8_t)ptr[-1]; + name = UPB_DESC(DescriptorProto_name)(msg_proto); - if (!upb_EpsCopyInputStream_TryParseDelimitedFast(&d->input, &ptr, len, func, - ctx)) { - // Slow case: Sub-message is >=128 bytes and/or exceeds the current buffer. - // If it exceeds the buffer limit, limit/limit_ptr will change during - // sub-message parsing, so we need to preserve delta, not limit. - if (UPB_UNLIKELY(len & 0x80)) { - // Size varint >1 byte (length >= 128). - ptr = fastdecode_longsize(ptr, &len); - if (!ptr) { - // Corrupt wire format: size exceeded INT_MAX. - return NULL; - } - } - if (!upb_EpsCopyInputStream_CheckSize(&d->input, ptr, len)) { - // Corrupt wire format: invalid limit. - return NULL; - } - int delta = upb_EpsCopyInputStream_PushLimit(&d->input, ptr, len); - ptr = func(&d->input, ptr, ctx); - upb_EpsCopyInputStream_PopLimit(&d->input, ptr, delta); - } - return ptr; -} + m->full_name = _upb_DefBuilder_MakeFullName(ctx, prefix, name); + _upb_DefBuilder_Add(ctx, m->full_name, _upb_DefType_Pack(m, UPB_DEFTYPE_MSG)); -/* singular, oneof, repeated field handling ***********************************/ + oneofs = UPB_DESC(DescriptorProto_oneof_decl)(msg_proto, &n_oneof); + fields = UPB_DESC(DescriptorProto_field)(msg_proto, &n_field); + ext_ranges = + UPB_DESC(DescriptorProto_extension_range)(msg_proto, &n_ext_range); + res_ranges = + UPB_DESC(DescriptorProto_reserved_range)(msg_proto, &n_res_range); + res_names = UPB_DESC(DescriptorProto_reserved_name)(msg_proto, &n_res_name); -typedef struct { - upb_Array* arr; - void* end; -} fastdecode_arr; + bool ok = upb_inttable_init(&m->itof, ctx->arena); + if (!ok) _upb_DefBuilder_OomErr(ctx); -typedef enum { - FD_NEXT_ATLIMIT, - FD_NEXT_SAMEFIELD, - FD_NEXT_OTHERFIELD -} fastdecode_next; + ok = upb_strtable_init(&m->ntof, n_oneof + n_field, ctx->arena); + if (!ok) _upb_DefBuilder_OomErr(ctx); -typedef struct { - void* dst; - fastdecode_next next; - uint32_t tag; -} fastdecode_nextret; + UPB_DEF_SET_OPTIONS(m->opts, DescriptorProto, MessageOptions, msg_proto); -UPB_FORCEINLINE -static void* fastdecode_resizearr(upb_Decoder* d, void* dst, - fastdecode_arr* farr, int valbytes) { - if (UPB_UNLIKELY(dst == farr->end)) { - size_t old_size = farr->arr->capacity; - size_t old_bytes = old_size * valbytes; - size_t new_size = old_size * 2; - size_t new_bytes = new_size * valbytes; - char* old_ptr = _upb_array_ptr(farr->arr); - char* new_ptr = upb_Arena_Realloc(&d->arena, old_ptr, old_bytes, new_bytes); - uint8_t elem_size_lg2 = __builtin_ctz(valbytes); - farr->arr->capacity = new_size; - farr->arr->data = _upb_array_tagptr(new_ptr, elem_size_lg2); - dst = (void*)(new_ptr + (old_size * valbytes)); - farr->end = (void*)(new_ptr + (new_size * valbytes)); + m->oneof_count = n_oneof; + m->oneofs = _upb_OneofDefs_New(ctx, n_oneof, oneofs, m); + + m->field_count = n_field; + m->fields = + _upb_FieldDefs_New(ctx, n_field, fields, m->full_name, m, &m->is_sorted); + + // Message Sets may not contain fields. + if (UPB_UNLIKELY(UPB_DESC(MessageOptions_message_set_wire_format)(m->opts))) { + if (UPB_UNLIKELY(n_field > 0)) { + _upb_DefBuilder_Errf(ctx, "invalid message set (%s)", m->full_name); + } } - return dst; + + m->ext_range_count = n_ext_range; + m->ext_ranges = _upb_ExtensionRanges_New(ctx, n_ext_range, ext_ranges, m); + + m->res_range_count = n_res_range; + m->res_ranges = + _upb_MessageReservedRanges_New(ctx, n_res_range, res_ranges, m); + + m->res_name_count = n_res_name; + m->res_names = _upb_ReservedNames_New(ctx, n_res_name, res_names); + + const size_t synthetic_count = _upb_OneofDefs_Finalize(ctx, m); + m->real_oneof_count = m->oneof_count - synthetic_count; + + assign_msg_wellknowntype(m); + upb_inttable_compact(&m->itof, ctx->arena); + + const UPB_DESC(EnumDescriptorProto)* const* enums = + UPB_DESC(DescriptorProto_enum_type)(msg_proto, &n_enum); + m->nested_enum_count = n_enum; + m->nested_enums = _upb_EnumDefs_New(ctx, n_enum, enums, m); + + const UPB_DESC(FieldDescriptorProto)* const* exts = + UPB_DESC(DescriptorProto_extension)(msg_proto, &n_ext); + m->nested_ext_count = n_ext; + m->nested_exts = _upb_Extensions_New(ctx, n_ext, exts, m->full_name, m); + + const UPB_DESC(DescriptorProto)* const* msgs = + UPB_DESC(DescriptorProto_nested_type)(msg_proto, &n_msg); + m->nested_msg_count = n_msg; + m->nested_msgs = _upb_MessageDefs_New(ctx, n_msg, msgs, m); } -UPB_FORCEINLINE -static bool fastdecode_tagmatch(uint32_t tag, uint64_t data, int tagbytes) { - if (tagbytes == 1) { - return (uint8_t)tag == (uint8_t)data; - } else { - return (uint16_t)tag == (uint16_t)data; +// Allocate and initialize an array of |n| message defs. +upb_MessageDef* _upb_MessageDefs_New( + upb_DefBuilder* ctx, int n, const UPB_DESC(DescriptorProto) * const* protos, + const upb_MessageDef* containing_type) { + _upb_DefType_CheckPadding(sizeof(upb_MessageDef)); + + const char* name = containing_type ? containing_type->full_name + : _upb_FileDef_RawPackage(ctx->file); + + upb_MessageDef* m = _upb_DefBuilder_Alloc(ctx, sizeof(upb_MessageDef) * n); + for (int i = 0; i < n; i++) { + create_msgdef(ctx, name, protos[i], containing_type, &m[i]); } + return m; } -UPB_FORCEINLINE -static void fastdecode_commitarr(void* dst, fastdecode_arr* farr, - int valbytes) { - farr->arr->size = - (size_t)((char*)dst - (char*)_upb_array_ptr(farr->arr)) / valbytes; -} -UPB_FORCEINLINE -static fastdecode_nextret fastdecode_nextrepeated(upb_Decoder* d, void* dst, - const char** ptr, - fastdecode_arr* farr, - uint64_t data, int tagbytes, - int valbytes) { - fastdecode_nextret ret; - dst = (char*)dst + valbytes; +// Must be last. - if (UPB_LIKELY(!_upb_Decoder_IsDone(d, ptr))) { - ret.tag = _upb_FastDecoder_LoadTag(*ptr); - if (fastdecode_tagmatch(ret.tag, data, tagbytes)) { - ret.next = FD_NEXT_SAMEFIELD; - } else { - fastdecode_commitarr(dst, farr, valbytes); - ret.next = FD_NEXT_OTHERFIELD; - } - } else { - fastdecode_commitarr(dst, farr, valbytes); - ret.next = FD_NEXT_ATLIMIT; - } +struct upb_MessageReservedRange { + int32_t start; + int32_t end; +}; - ret.dst = dst; - return ret; +upb_MessageReservedRange* _upb_MessageReservedRange_At( + const upb_MessageReservedRange* r, int i) { + return (upb_MessageReservedRange*)&r[i]; } -UPB_FORCEINLINE -static void* fastdecode_fieldmem(upb_Message* msg, uint64_t data) { - size_t ofs = data >> 48; - return (char*)msg + ofs; +int32_t upb_MessageReservedRange_Start(const upb_MessageReservedRange* r) { + return r->start; +} +int32_t upb_MessageReservedRange_End(const upb_MessageReservedRange* r) { + return r->end; } -UPB_FORCEINLINE -static void* fastdecode_getfield(upb_Decoder* d, const char* ptr, - upb_Message* msg, uint64_t* data, - uint64_t* hasbits, fastdecode_arr* farr, - int valbytes, upb_card card) { - switch (card) { - case CARD_s: { - uint8_t hasbit_index = *data >> 24; - // Set hasbit and return pointer to scalar field. - *hasbits |= 1ull << hasbit_index; - return fastdecode_fieldmem(msg, *data); - } - case CARD_o: { - uint16_t case_ofs = *data >> 32; - uint32_t* oneof_case = UPB_PTR_AT(msg, case_ofs, uint32_t); - uint8_t field_number = *data >> 24; - *oneof_case = field_number; - return fastdecode_fieldmem(msg, *data); - } - case CARD_r: { - // Get pointer to upb_Array and allocate/expand if necessary. - uint8_t elem_size_lg2 = __builtin_ctz(valbytes); - upb_Array** arr_p = fastdecode_fieldmem(msg, *data); - char* begin; - *(uint32_t*)msg |= *hasbits; - *hasbits = 0; - if (UPB_LIKELY(!*arr_p)) { - farr->arr = _upb_Array_New(&d->arena, 8, elem_size_lg2); - *arr_p = farr->arr; - } else { - farr->arr = *arr_p; - } - begin = _upb_array_ptr(farr->arr); - farr->end = begin + (farr->arr->capacity * valbytes); - *data = _upb_FastDecoder_LoadTag(ptr); - return begin + (farr->arr->size * valbytes); +upb_MessageReservedRange* _upb_MessageReservedRanges_New( + upb_DefBuilder* ctx, int n, + const UPB_DESC(DescriptorProto_ReservedRange) * const* protos, + const upb_MessageDef* m) { + upb_MessageReservedRange* r = + _upb_DefBuilder_Alloc(ctx, sizeof(upb_MessageReservedRange) * n); + + for (int i = 0; i < n; i++) { + const int32_t start = + UPB_DESC(DescriptorProto_ReservedRange_start)(protos[i]); + const int32_t end = UPB_DESC(DescriptorProto_ReservedRange_end)(protos[i]); + const int32_t max = kUpb_MaxFieldNumber + 1; + + // A full validation would also check that each range is disjoint, and that + // none of the fields overlap with the extension ranges, but we are just + // sanity checking here. + if (start < 1 || end <= start || end > max) { + _upb_DefBuilder_Errf(ctx, + "Reserved range (%d, %d) is invalid, message=%s\n", + (int)start, (int)end, upb_MessageDef_FullName(m)); } - default: - UPB_UNREACHABLE(); + + r[i].start = start; + r[i].end = end; } + + return r; } -UPB_FORCEINLINE -static bool fastdecode_flippacked(uint64_t* data, int tagbytes) { - *data ^= (0x2 ^ 0x0); // Patch data to match packed wiretype. - return fastdecode_checktag(*data, tagbytes); + + +// Must be last. + +struct upb_MethodDef { + const UPB_DESC(MethodOptions) * opts; + upb_ServiceDef* service; + const char* full_name; + const upb_MessageDef* input_type; + const upb_MessageDef* output_type; + int index; + bool client_streaming; + bool server_streaming; +}; + +upb_MethodDef* _upb_MethodDef_At(const upb_MethodDef* m, int i) { + return (upb_MethodDef*)&m[i]; } -#define FASTDECODE_CHECKPACKED(tagbytes, card, func) \ - if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { \ - if (card == CARD_r && fastdecode_flippacked(&data, tagbytes)) { \ - UPB_MUSTTAIL return func(UPB_PARSE_ARGS); \ - } \ - RETURN_GENERIC("packed check tag mismatch\n"); \ - } +const upb_ServiceDef* upb_MethodDef_Service(const upb_MethodDef* m) { + return m->service; +} -/* varint fields **************************************************************/ +const UPB_DESC(MethodOptions) * upb_MethodDef_Options(const upb_MethodDef* m) { + return m->opts; +} -UPB_FORCEINLINE -static uint64_t fastdecode_munge(uint64_t val, int valbytes, bool zigzag) { - if (valbytes == 1) { - return val != 0; - } else if (zigzag) { - if (valbytes == 4) { - uint32_t n = val; - return (n >> 1) ^ -(int32_t)(n & 1); - } else if (valbytes == 8) { - return (val >> 1) ^ -(int64_t)(val & 1); - } - UPB_UNREACHABLE(); - } - return val; +bool upb_MethodDef_HasOptions(const upb_MethodDef* m) { + return m->opts != (void*)kUpbDefOptDefault; } -UPB_FORCEINLINE -static const char* fastdecode_varint64(const char* ptr, uint64_t* val) { - ptr++; - *val = (uint8_t)ptr[-1]; - if (UPB_UNLIKELY(*val & 0x80)) { - int i; - for (i = 0; i < 8; i++) { - ptr++; - uint64_t byte = (uint8_t)ptr[-1]; - *val += (byte - 1) << (7 + 7 * i); - if (UPB_LIKELY((byte & 0x80) == 0)) goto done; - } - ptr++; - uint64_t byte = (uint8_t)ptr[-1]; - if (byte > 1) { - return NULL; - } - *val += (byte - 1) << 63; - } -done: - UPB_ASSUME(ptr != NULL); - return ptr; +const char* upb_MethodDef_FullName(const upb_MethodDef* m) { + return m->full_name; } -#define FASTDECODE_UNPACKEDVARINT(d, ptr, msg, table, hasbits, data, tagbytes, \ - valbytes, card, zigzag, packed) \ - uint64_t val; \ - void* dst; \ - fastdecode_arr farr; \ - \ - FASTDECODE_CHECKPACKED(tagbytes, card, packed); \ - \ - dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, valbytes, \ - card); \ - if (card == CARD_r) { \ - if (UPB_UNLIKELY(!dst)) { \ - RETURN_GENERIC("need array resize\n"); \ - } \ - } \ - \ - again: \ - if (card == CARD_r) { \ - dst = fastdecode_resizearr(d, dst, &farr, valbytes); \ - } \ - \ - ptr += tagbytes; \ - ptr = fastdecode_varint64(ptr, &val); \ - if (ptr == NULL) _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \ - val = fastdecode_munge(val, valbytes, zigzag); \ - memcpy(dst, &val, valbytes); \ - \ - if (card == CARD_r) { \ - fastdecode_nextret ret = fastdecode_nextrepeated( \ - d, dst, &ptr, &farr, data, tagbytes, valbytes); \ - switch (ret.next) { \ - case FD_NEXT_SAMEFIELD: \ - dst = ret.dst; \ - goto again; \ - case FD_NEXT_OTHERFIELD: \ - data = ret.tag; \ - UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); \ - case FD_NEXT_ATLIMIT: \ - return ptr; \ - } \ - } \ - \ - UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); +const char* upb_MethodDef_Name(const upb_MethodDef* m) { + return _upb_DefBuilder_FullToShort(m->full_name); +} -typedef struct { - uint8_t valbytes; - bool zigzag; - void* dst; - fastdecode_arr farr; -} fastdecode_varintdata; +int upb_MethodDef_Index(const upb_MethodDef* m) { return m->index; } -UPB_FORCEINLINE -static const char* fastdecode_topackedvarint(upb_EpsCopyInputStream* e, - const char* ptr, void* ctx) { - upb_Decoder* d = (upb_Decoder*)e; - fastdecode_varintdata* data = ctx; - void* dst = data->dst; - uint64_t val; +const upb_MessageDef* upb_MethodDef_InputType(const upb_MethodDef* m) { + return m->input_type; +} - while (!_upb_Decoder_IsDone(d, &ptr)) { - dst = fastdecode_resizearr(d, dst, &data->farr, data->valbytes); - ptr = fastdecode_varint64(ptr, &val); - if (ptr == NULL) return NULL; - val = fastdecode_munge(val, data->valbytes, data->zigzag); - memcpy(dst, &val, data->valbytes); - dst = (char*)dst + data->valbytes; - } +const upb_MessageDef* upb_MethodDef_OutputType(const upb_MethodDef* m) { + return m->output_type; +} - fastdecode_commitarr(dst, &data->farr, data->valbytes); - return ptr; +bool upb_MethodDef_ClientStreaming(const upb_MethodDef* m) { + return m->client_streaming; } -#define FASTDECODE_PACKEDVARINT(d, ptr, msg, table, hasbits, data, tagbytes, \ - valbytes, zigzag, unpacked) \ - fastdecode_varintdata ctx = {valbytes, zigzag}; \ - \ - FASTDECODE_CHECKPACKED(tagbytes, CARD_r, unpacked); \ - \ - ctx.dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &ctx.farr, \ - valbytes, CARD_r); \ - if (UPB_UNLIKELY(!ctx.dst)) { \ - RETURN_GENERIC("need array resize\n"); \ - } \ - \ - ptr += tagbytes; \ - ptr = fastdecode_delimited(d, ptr, &fastdecode_topackedvarint, &ctx); \ - \ - if (UPB_UNLIKELY(ptr == NULL)) { \ - _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \ - } \ - \ - UPB_MUSTTAIL return fastdecode_dispatch(d, ptr, msg, table, hasbits, 0); +bool upb_MethodDef_ServerStreaming(const upb_MethodDef* m) { + return m->server_streaming; +} -#define FASTDECODE_VARINT(d, ptr, msg, table, hasbits, data, tagbytes, \ - valbytes, card, zigzag, unpacked, packed) \ - if (card == CARD_p) { \ - FASTDECODE_PACKEDVARINT(d, ptr, msg, table, hasbits, data, tagbytes, \ - valbytes, zigzag, unpacked); \ - } else { \ - FASTDECODE_UNPACKEDVARINT(d, ptr, msg, table, hasbits, data, tagbytes, \ - valbytes, card, zigzag, packed); \ - } +static void create_method(upb_DefBuilder* ctx, + const UPB_DESC(MethodDescriptorProto) * method_proto, + upb_ServiceDef* s, upb_MethodDef* m) { + upb_StringView name = UPB_DESC(MethodDescriptorProto_name)(method_proto); -#define z_ZZ true -#define b_ZZ false -#define v_ZZ false + m->service = s; + m->full_name = + _upb_DefBuilder_MakeFullName(ctx, upb_ServiceDef_FullName(s), name); + m->client_streaming = + UPB_DESC(MethodDescriptorProto_client_streaming)(method_proto); + m->server_streaming = + UPB_DESC(MethodDescriptorProto_server_streaming)(method_proto); + m->input_type = _upb_DefBuilder_Resolve( + ctx, m->full_name, m->full_name, + UPB_DESC(MethodDescriptorProto_input_type)(method_proto), + UPB_DEFTYPE_MSG); + m->output_type = _upb_DefBuilder_Resolve( + ctx, m->full_name, m->full_name, + UPB_DESC(MethodDescriptorProto_output_type)(method_proto), + UPB_DEFTYPE_MSG); -/* Generate all combinations: - * {s,o,r,p} x {b1,v4,z4,v8,z8} x {1bt,2bt} */ + UPB_DEF_SET_OPTIONS(m->opts, MethodDescriptorProto, MethodOptions, + method_proto); +} -#define F(card, type, valbytes, tagbytes) \ - UPB_NOINLINE \ - const char* upb_p##card##type##valbytes##_##tagbytes##bt(UPB_PARSE_PARAMS) { \ - FASTDECODE_VARINT(d, ptr, msg, table, hasbits, data, tagbytes, valbytes, \ - CARD_##card, type##_ZZ, \ - upb_pr##type##valbytes##_##tagbytes##bt, \ - upb_pp##type##valbytes##_##tagbytes##bt); \ +// Allocate and initialize an array of |n| method defs belonging to |s|. +upb_MethodDef* _upb_MethodDefs_New( + upb_DefBuilder* ctx, int n, + const UPB_DESC(MethodDescriptorProto) * const* protos, upb_ServiceDef* s) { + upb_MethodDef* m = _upb_DefBuilder_Alloc(ctx, sizeof(upb_MethodDef) * n); + for (int i = 0; i < n; i++) { + create_method(ctx, protos[i], s, &m[i]); + m[i].index = i; } + return m; +} -#define TYPES(card, tagbytes) \ - F(card, b, 1, tagbytes) \ - F(card, v, 4, tagbytes) \ - F(card, v, 8, tagbytes) \ - F(card, z, 4, tagbytes) \ - F(card, z, 8, tagbytes) -#define TAGBYTES(card) \ - TYPES(card, 1) \ - TYPES(card, 2) +#include +#include +#include -TAGBYTES(s) -TAGBYTES(o) -TAGBYTES(r) -TAGBYTES(p) -#undef z_ZZ -#undef b_ZZ -#undef v_ZZ -#undef o_ONEOF -#undef s_ONEOF -#undef r_ONEOF -#undef F -#undef TYPES -#undef TAGBYTES -#undef FASTDECODE_UNPACKEDVARINT -#undef FASTDECODE_PACKEDVARINT -#undef FASTDECODE_VARINT +// Must be last. -/* fixed fields ***************************************************************/ +struct upb_OneofDef { + const UPB_DESC(OneofOptions) * opts; + const upb_MessageDef* parent; + const char* full_name; + int field_count; + bool synthetic; + const upb_FieldDef** fields; + upb_strtable ntof; // lookup a field by name + upb_inttable itof; // lookup a field by number (index) +#if UINTPTR_MAX == 0xffffffff + uint32_t padding; // Increase size to a multiple of 8. +#endif +}; -#define FASTDECODE_UNPACKEDFIXED(d, ptr, msg, table, hasbits, data, tagbytes, \ - valbytes, card, packed) \ - void* dst; \ - fastdecode_arr farr; \ - \ - FASTDECODE_CHECKPACKED(tagbytes, card, packed) \ - \ - dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, valbytes, \ - card); \ - if (card == CARD_r) { \ - if (UPB_UNLIKELY(!dst)) { \ - RETURN_GENERIC("couldn't allocate array in arena\n"); \ - } \ - } \ - \ - again: \ - if (card == CARD_r) { \ - dst = fastdecode_resizearr(d, dst, &farr, valbytes); \ - } \ - \ - ptr += tagbytes; \ - memcpy(dst, ptr, valbytes); \ - ptr += valbytes; \ - \ - if (card == CARD_r) { \ - fastdecode_nextret ret = fastdecode_nextrepeated( \ - d, dst, &ptr, &farr, data, tagbytes, valbytes); \ - switch (ret.next) { \ - case FD_NEXT_SAMEFIELD: \ - dst = ret.dst; \ - goto again; \ - case FD_NEXT_OTHERFIELD: \ - data = ret.tag; \ - UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); \ - case FD_NEXT_ATLIMIT: \ - return ptr; \ - } \ - } \ - \ - UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); +upb_OneofDef* _upb_OneofDef_At(const upb_OneofDef* o, int i) { + return (upb_OneofDef*)&o[i]; +} -#define FASTDECODE_PACKEDFIXED(d, ptr, msg, table, hasbits, data, tagbytes, \ - valbytes, unpacked) \ - FASTDECODE_CHECKPACKED(tagbytes, CARD_r, unpacked) \ - \ - ptr += tagbytes; \ - int size = (uint8_t)ptr[0]; \ - ptr++; \ - if (size & 0x80) { \ - ptr = fastdecode_longsize(ptr, &size); \ - } \ - \ - if (UPB_UNLIKELY(!upb_EpsCopyInputStream_CheckDataSizeAvailable( \ - &d->input, ptr, size) || \ - (size % valbytes) != 0)) { \ - _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \ - } \ - \ - upb_Array** arr_p = fastdecode_fieldmem(msg, data); \ - upb_Array* arr = *arr_p; \ - uint8_t elem_size_lg2 = __builtin_ctz(valbytes); \ - int elems = size / valbytes; \ - \ - if (UPB_LIKELY(!arr)) { \ - *arr_p = arr = _upb_Array_New(&d->arena, elems, elem_size_lg2); \ - if (!arr) { \ - _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \ - } \ - } else { \ - _upb_Array_ResizeUninitialized(arr, elems, &d->arena); \ - } \ - \ - char* dst = _upb_array_ptr(arr); \ - memcpy(dst, ptr, size); \ - arr->size = elems; \ - \ - ptr += size; \ - UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); +const UPB_DESC(OneofOptions) * upb_OneofDef_Options(const upb_OneofDef* o) { + return o->opts; +} -#define FASTDECODE_FIXED(d, ptr, msg, table, hasbits, data, tagbytes, \ - valbytes, card, unpacked, packed) \ - if (card == CARD_p) { \ - FASTDECODE_PACKEDFIXED(d, ptr, msg, table, hasbits, data, tagbytes, \ - valbytes, unpacked); \ - } else { \ - FASTDECODE_UNPACKEDFIXED(d, ptr, msg, table, hasbits, data, tagbytes, \ - valbytes, card, packed); \ - } +bool upb_OneofDef_HasOptions(const upb_OneofDef* o) { + return o->opts != (void*)kUpbDefOptDefault; +} -/* Generate all combinations: - * {s,o,r,p} x {f4,f8} x {1bt,2bt} */ +const char* upb_OneofDef_FullName(const upb_OneofDef* o) { + return o->full_name; +} -#define F(card, valbytes, tagbytes) \ - UPB_NOINLINE \ - const char* upb_p##card##f##valbytes##_##tagbytes##bt(UPB_PARSE_PARAMS) { \ - FASTDECODE_FIXED(d, ptr, msg, table, hasbits, data, tagbytes, valbytes, \ - CARD_##card, upb_ppf##valbytes##_##tagbytes##bt, \ - upb_prf##valbytes##_##tagbytes##bt); \ - } +const char* upb_OneofDef_Name(const upb_OneofDef* o) { + return _upb_DefBuilder_FullToShort(o->full_name); +} -#define TYPES(card, tagbytes) \ - F(card, 4, tagbytes) \ - F(card, 8, tagbytes) +const upb_MessageDef* upb_OneofDef_ContainingType(const upb_OneofDef* o) { + return o->parent; +} -#define TAGBYTES(card) \ - TYPES(card, 1) \ - TYPES(card, 2) +int upb_OneofDef_FieldCount(const upb_OneofDef* o) { return o->field_count; } -TAGBYTES(s) -TAGBYTES(o) -TAGBYTES(r) -TAGBYTES(p) +const upb_FieldDef* upb_OneofDef_Field(const upb_OneofDef* o, int i) { + UPB_ASSERT(i < o->field_count); + return o->fields[i]; +} -#undef F -#undef TYPES -#undef TAGBYTES -#undef FASTDECODE_UNPACKEDFIXED -#undef FASTDECODE_PACKEDFIXED +int upb_OneofDef_numfields(const upb_OneofDef* o) { return o->field_count; } -/* string fields **************************************************************/ +uint32_t upb_OneofDef_Index(const upb_OneofDef* o) { + // Compute index in our parent's array. + return o - upb_MessageDef_Oneof(o->parent, 0); +} -typedef const char* fastdecode_copystr_func(struct upb_Decoder* d, - const char* ptr, upb_Message* msg, - const upb_MiniTable* table, - uint64_t hasbits, - upb_StringView* dst); +bool upb_OneofDef_IsSynthetic(const upb_OneofDef* o) { return o->synthetic; } -UPB_NOINLINE -static const char* fastdecode_verifyutf8(upb_Decoder* d, const char* ptr, - upb_Message* msg, intptr_t table, - uint64_t hasbits, uint64_t data) { - upb_StringView* dst = (upb_StringView*)data; - if (!_upb_Decoder_VerifyUtf8Inline(dst->data, dst->size)) { - _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_BadUtf8); +const upb_FieldDef* upb_OneofDef_LookupNameWithSize(const upb_OneofDef* o, + const char* name, + size_t size) { + upb_value val; + return upb_strtable_lookup2(&o->ntof, name, size, &val) + ? upb_value_getptr(val) + : NULL; +} + +const upb_FieldDef* upb_OneofDef_LookupName(const upb_OneofDef* o, + const char* name) { + return upb_OneofDef_LookupNameWithSize(o, name, strlen(name)); +} + +const upb_FieldDef* upb_OneofDef_LookupNumber(const upb_OneofDef* o, + uint32_t num) { + upb_value val; + return upb_inttable_lookup(&o->itof, num, &val) ? upb_value_getptr(val) + : NULL; +} + +void _upb_OneofDef_Insert(upb_DefBuilder* ctx, upb_OneofDef* o, + const upb_FieldDef* f, const char* name, + size_t size) { + o->field_count++; + if (_upb_FieldDef_IsProto3Optional(f)) o->synthetic = true; + + const int number = upb_FieldDef_Number(f); + const upb_value v = upb_value_constptr(f); + + // TODO(salo): This lookup is unfortunate because we also perform it when + // inserting into the message's table. Unfortunately that step occurs after + // this one and moving things around could be tricky so let's leave it for + // a future refactoring. + const bool number_exists = upb_inttable_lookup(&o->itof, number, NULL); + if (UPB_UNLIKELY(number_exists)) { + _upb_DefBuilder_Errf(ctx, "oneof fields have the same number (%d)", number); + } + + // TODO(salo): More redundant work happening here. + const bool name_exists = upb_strtable_lookup2(&o->ntof, name, size, NULL); + if (UPB_UNLIKELY(name_exists)) { + _upb_DefBuilder_Errf(ctx, "oneof fields have the same name (%.*s)", + (int)size, name); + } + + const bool ok = upb_inttable_insert(&o->itof, number, v, ctx->arena) && + upb_strtable_insert(&o->ntof, name, size, v, ctx->arena); + if (UPB_UNLIKELY(!ok)) { + _upb_DefBuilder_OomErr(ctx); } - UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); } -#define FASTDECODE_LONGSTRING(d, ptr, msg, table, hasbits, dst, validate_utf8) \ - int size = (uint8_t)ptr[0]; /* Could plumb through hasbits. */ \ - ptr++; \ - if (size & 0x80) { \ - ptr = fastdecode_longsize(ptr, &size); \ - } \ - \ - if (UPB_UNLIKELY(!upb_EpsCopyInputStream_CheckSize(&d->input, ptr, size))) { \ - dst->size = 0; \ - _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \ - } \ - \ - const char* s_ptr = ptr; \ - ptr = upb_EpsCopyInputStream_ReadString(&d->input, &s_ptr, size, &d->arena); \ - if (!ptr) _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); \ - dst->data = s_ptr; \ - dst->size = size; \ - \ - if (validate_utf8) { \ - data = (uint64_t)dst; \ - UPB_MUSTTAIL return fastdecode_verifyutf8(UPB_PARSE_ARGS); \ - } else { \ - UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); \ +// Returns the synthetic count. +size_t _upb_OneofDefs_Finalize(upb_DefBuilder* ctx, upb_MessageDef* m) { + int synthetic_count = 0; + + for (int i = 0; i < upb_MessageDef_OneofCount(m); i++) { + upb_OneofDef* o = (upb_OneofDef*)upb_MessageDef_Oneof(m, i); + + if (o->synthetic && o->field_count != 1) { + _upb_DefBuilder_Errf(ctx, + "Synthetic oneofs must have one field, not %d: %s", + o->field_count, upb_OneofDef_Name(o)); + } + + if (o->synthetic) { + synthetic_count++; + } else if (synthetic_count != 0) { + _upb_DefBuilder_Errf( + ctx, "Synthetic oneofs must be after all other oneofs: %s", + upb_OneofDef_Name(o)); + } + + o->fields = + _upb_DefBuilder_Alloc(ctx, sizeof(upb_FieldDef*) * o->field_count); + o->field_count = 0; } -UPB_NOINLINE -static const char* fastdecode_longstring_utf8(struct upb_Decoder* d, - const char* ptr, upb_Message* msg, - intptr_t table, uint64_t hasbits, - uint64_t data) { - upb_StringView* dst = (upb_StringView*)data; - FASTDECODE_LONGSTRING(d, ptr, msg, table, hasbits, dst, true); + for (int i = 0; i < upb_MessageDef_FieldCount(m); i++) { + const upb_FieldDef* f = upb_MessageDef_Field(m, i); + upb_OneofDef* o = (upb_OneofDef*)upb_FieldDef_ContainingOneof(f); + if (o) { + o->fields[o->field_count++] = f; + } + } + + return synthetic_count; } -UPB_NOINLINE -static const char* fastdecode_longstring_noutf8( - struct upb_Decoder* d, const char* ptr, upb_Message* msg, intptr_t table, - uint64_t hasbits, uint64_t data) { - upb_StringView* dst = (upb_StringView*)data; - FASTDECODE_LONGSTRING(d, ptr, msg, table, hasbits, dst, false); +static void create_oneofdef(upb_DefBuilder* ctx, upb_MessageDef* m, + const UPB_DESC(OneofDescriptorProto) * oneof_proto, + const upb_OneofDef* _o) { + upb_OneofDef* o = (upb_OneofDef*)_o; + upb_StringView name = UPB_DESC(OneofDescriptorProto_name)(oneof_proto); + + o->parent = m; + o->full_name = + _upb_DefBuilder_MakeFullName(ctx, upb_MessageDef_FullName(m), name); + o->field_count = 0; + o->synthetic = false; + + UPB_DEF_SET_OPTIONS(o->opts, OneofDescriptorProto, OneofOptions, oneof_proto); + + if (upb_MessageDef_FindByNameWithSize(m, name.data, name.size, NULL, NULL)) { + _upb_DefBuilder_Errf(ctx, "duplicate oneof name (%s)", o->full_name); + } + + upb_value v = _upb_DefType_Pack(o, UPB_DEFTYPE_ONEOF); + bool ok = _upb_MessageDef_Insert(m, name.data, name.size, v, ctx->arena); + if (!ok) _upb_DefBuilder_OomErr(ctx); + + ok = upb_inttable_init(&o->itof, ctx->arena); + if (!ok) _upb_DefBuilder_OomErr(ctx); + + ok = upb_strtable_init(&o->ntof, 4, ctx->arena); + if (!ok) _upb_DefBuilder_OomErr(ctx); } -UPB_FORCEINLINE -static void fastdecode_docopy(upb_Decoder* d, const char* ptr, uint32_t size, - int copy, char* data, upb_StringView* dst) { - d->arena.head.ptr += copy; - dst->data = data; - UPB_UNPOISON_MEMORY_REGION(data, copy); - memcpy(data, ptr, copy); - UPB_POISON_MEMORY_REGION(data + size, copy - size); +// Allocate and initialize an array of |n| oneof defs. +upb_OneofDef* _upb_OneofDefs_New( + upb_DefBuilder* ctx, int n, + const UPB_DESC(OneofDescriptorProto) * const* protos, upb_MessageDef* m) { + _upb_DefType_CheckPadding(sizeof(upb_OneofDef)); + + upb_OneofDef* o = _upb_DefBuilder_Alloc(ctx, sizeof(upb_OneofDef) * n); + for (int i = 0; i < n; i++) { + create_oneofdef(ctx, m, protos[i], &o[i]); + } + return o; } -#define FASTDECODE_COPYSTRING(d, ptr, msg, table, hasbits, data, tagbytes, \ - card, validate_utf8) \ - upb_StringView* dst; \ - fastdecode_arr farr; \ - int64_t size; \ - size_t arena_has; \ - size_t common_has; \ - char* buf; \ - \ - UPB_ASSERT(!upb_EpsCopyInputStream_AliasingAvailable(&d->input, ptr, 0)); \ - UPB_ASSERT(fastdecode_checktag(data, tagbytes)); \ - \ - dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, \ - sizeof(upb_StringView), card); \ - \ - again: \ - if (card == CARD_r) { \ - dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_StringView)); \ - } \ - \ - size = (uint8_t)ptr[tagbytes]; \ - ptr += tagbytes + 1; \ - dst->size = size; \ - \ - buf = d->arena.head.ptr; \ - arena_has = _upb_ArenaHas(&d->arena); \ - common_has = UPB_MIN(arena_has, \ - upb_EpsCopyInputStream_BytesAvailable(&d->input, ptr)); \ - \ - if (UPB_LIKELY(size <= 15 - tagbytes)) { \ - if (arena_has < 16) goto longstr; \ - d->arena.head.ptr += 16; \ - memcpy(buf, ptr - tagbytes - 1, 16); \ - dst->data = buf + tagbytes + 1; \ - } else if (UPB_LIKELY(size <= 32)) { \ - if (UPB_UNLIKELY(common_has < 32)) goto longstr; \ - fastdecode_docopy(d, ptr, size, 32, buf, dst); \ - } else if (UPB_LIKELY(size <= 64)) { \ - if (UPB_UNLIKELY(common_has < 64)) goto longstr; \ - fastdecode_docopy(d, ptr, size, 64, buf, dst); \ - } else if (UPB_LIKELY(size < 128)) { \ - if (UPB_UNLIKELY(common_has < 128)) goto longstr; \ - fastdecode_docopy(d, ptr, size, 128, buf, dst); \ - } else { \ - goto longstr; \ - } \ - \ - ptr += size; \ - \ - if (card == CARD_r) { \ - if (validate_utf8 && \ - !_upb_Decoder_VerifyUtf8Inline(dst->data, dst->size)) { \ - _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_BadUtf8); \ - } \ - fastdecode_nextret ret = fastdecode_nextrepeated( \ - d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_StringView)); \ - switch (ret.next) { \ - case FD_NEXT_SAMEFIELD: \ - dst = ret.dst; \ - goto again; \ - case FD_NEXT_OTHERFIELD: \ - data = ret.tag; \ - UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); \ - case FD_NEXT_ATLIMIT: \ - return ptr; \ - } \ - } \ - \ - if (card != CARD_r && validate_utf8) { \ - data = (uint64_t)dst; \ - UPB_MUSTTAIL return fastdecode_verifyutf8(UPB_PARSE_ARGS); \ - } \ - \ - UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); \ - \ - longstr: \ - if (card == CARD_r) { \ - fastdecode_commitarr(dst + 1, &farr, sizeof(upb_StringView)); \ - } \ - ptr--; \ - if (validate_utf8) { \ - UPB_MUSTTAIL return fastdecode_longstring_utf8(d, ptr, msg, table, \ - hasbits, (uint64_t)dst); \ - } else { \ - UPB_MUSTTAIL return fastdecode_longstring_noutf8(d, ptr, msg, table, \ - hasbits, (uint64_t)dst); \ - } -#define FASTDECODE_STRING(d, ptr, msg, table, hasbits, data, tagbytes, card, \ - copyfunc, validate_utf8) \ - upb_StringView* dst; \ - fastdecode_arr farr; \ - int64_t size; \ - \ - if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { \ - RETURN_GENERIC("string field tag mismatch\n"); \ - } \ - \ - if (UPB_UNLIKELY( \ - !upb_EpsCopyInputStream_AliasingAvailable(&d->input, ptr, 0))) { \ - UPB_MUSTTAIL return copyfunc(UPB_PARSE_ARGS); \ - } \ - \ - dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, \ - sizeof(upb_StringView), card); \ - \ - again: \ - if (card == CARD_r) { \ - dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_StringView)); \ - } \ - \ - size = (int8_t)ptr[tagbytes]; \ - ptr += tagbytes + 1; \ - \ - if (UPB_UNLIKELY( \ - !upb_EpsCopyInputStream_AliasingAvailable(&d->input, ptr, size))) { \ - ptr--; \ - if (validate_utf8) { \ - return fastdecode_longstring_utf8(d, ptr, msg, table, hasbits, \ - (uint64_t)dst); \ - } else { \ - return fastdecode_longstring_noutf8(d, ptr, msg, table, hasbits, \ - (uint64_t)dst); \ - } \ - } \ - \ - dst->data = ptr; \ - dst->size = size; \ - ptr = upb_EpsCopyInputStream_ReadStringAliased(&d->input, &dst->data, \ - dst->size); \ - \ - if (card == CARD_r) { \ - if (validate_utf8 && \ - !_upb_Decoder_VerifyUtf8Inline(dst->data, dst->size)) { \ - _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_BadUtf8); \ - } \ - fastdecode_nextret ret = fastdecode_nextrepeated( \ - d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_StringView)); \ - switch (ret.next) { \ - case FD_NEXT_SAMEFIELD: \ - dst = ret.dst; \ - goto again; \ - case FD_NEXT_OTHERFIELD: \ - data = ret.tag; \ - UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); \ - case FD_NEXT_ATLIMIT: \ - return ptr; \ - } \ - } \ - \ - if (card != CARD_r && validate_utf8) { \ - data = (uint64_t)dst; \ - UPB_MUSTTAIL return fastdecode_verifyutf8(UPB_PARSE_ARGS); \ - } \ - \ - UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); -/* Generate all combinations: - * {p,c} x {s,o,r} x {s, b} x {1bt,2bt} */ +// Must be last. -#define s_VALIDATE true -#define b_VALIDATE false +struct upb_ServiceDef { + const UPB_DESC(ServiceOptions) * opts; + const upb_FileDef* file; + const char* full_name; + upb_MethodDef* methods; + int method_count; + int index; +}; -#define F(card, tagbytes, type) \ - UPB_NOINLINE \ - const char* upb_c##card##type##_##tagbytes##bt(UPB_PARSE_PARAMS) { \ - FASTDECODE_COPYSTRING(d, ptr, msg, table, hasbits, data, tagbytes, \ - CARD_##card, type##_VALIDATE); \ - } \ - const char* upb_p##card##type##_##tagbytes##bt(UPB_PARSE_PARAMS) { \ - FASTDECODE_STRING(d, ptr, msg, table, hasbits, data, tagbytes, \ - CARD_##card, upb_c##card##type##_##tagbytes##bt, \ - type##_VALIDATE); \ - } +upb_ServiceDef* _upb_ServiceDef_At(const upb_ServiceDef* s, int index) { + return (upb_ServiceDef*)&s[index]; +} -#define UTF8(card, tagbytes) \ - F(card, tagbytes, s) \ - F(card, tagbytes, b) +const UPB_DESC(ServiceOptions) * + upb_ServiceDef_Options(const upb_ServiceDef* s) { + return s->opts; +} -#define TAGBYTES(card) \ - UTF8(card, 1) \ - UTF8(card, 2) +bool upb_ServiceDef_HasOptions(const upb_ServiceDef* s) { + return s->opts != (void*)kUpbDefOptDefault; +} -TAGBYTES(s) -TAGBYTES(o) -TAGBYTES(r) +const char* upb_ServiceDef_FullName(const upb_ServiceDef* s) { + return s->full_name; +} -#undef s_VALIDATE -#undef b_VALIDATE -#undef F -#undef TAGBYTES -#undef FASTDECODE_LONGSTRING -#undef FASTDECODE_COPYSTRING -#undef FASTDECODE_STRING +const char* upb_ServiceDef_Name(const upb_ServiceDef* s) { + return _upb_DefBuilder_FullToShort(s->full_name); +} -/* message fields *************************************************************/ +int upb_ServiceDef_Index(const upb_ServiceDef* s) { return s->index; } -UPB_INLINE -upb_Message* decode_newmsg_ceil(upb_Decoder* d, const upb_MiniTable* l, - int msg_ceil_bytes) { - size_t size = l->size + sizeof(upb_Message_Internal); - char* msg_data; - if (UPB_LIKELY(msg_ceil_bytes > 0 && - _upb_ArenaHas(&d->arena) >= msg_ceil_bytes)) { - UPB_ASSERT(size <= (size_t)msg_ceil_bytes); - msg_data = d->arena.head.ptr; - d->arena.head.ptr += size; - UPB_UNPOISON_MEMORY_REGION(msg_data, msg_ceil_bytes); - memset(msg_data, 0, msg_ceil_bytes); - UPB_POISON_MEMORY_REGION(msg_data + size, msg_ceil_bytes - size); - } else { - msg_data = (char*)upb_Arena_Malloc(&d->arena, size); - memset(msg_data, 0, size); - } - return msg_data + sizeof(upb_Message_Internal); +const upb_FileDef* upb_ServiceDef_File(const upb_ServiceDef* s) { + return s->file; } -typedef struct { - intptr_t table; - upb_Message* msg; -} fastdecode_submsgdata; +int upb_ServiceDef_MethodCount(const upb_ServiceDef* s) { + return s->method_count; +} -UPB_FORCEINLINE -static const char* fastdecode_tosubmsg(upb_EpsCopyInputStream* e, - const char* ptr, void* ctx) { - upb_Decoder* d = (upb_Decoder*)e; - fastdecode_submsgdata* submsg = ctx; - ptr = fastdecode_dispatch(d, ptr, submsg->msg, submsg->table, 0, 0); - UPB_ASSUME(ptr != NULL); - return ptr; +const upb_MethodDef* upb_ServiceDef_Method(const upb_ServiceDef* s, int i) { + return (i < 0 || i >= s->method_count) ? NULL + : _upb_MethodDef_At(s->methods, i); } -#define FASTDECODE_SUBMSG(d, ptr, msg, table, hasbits, data, tagbytes, \ - msg_ceil_bytes, card) \ - \ - if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { \ - RETURN_GENERIC("submessage field tag mismatch\n"); \ - } \ - \ - if (--d->depth == 0) { \ - _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_MaxDepthExceeded); \ - } \ - \ - upb_Message** dst; \ - uint32_t submsg_idx = (data >> 16) & 0xff; \ - const upb_MiniTable* tablep = decode_totablep(table); \ - const upb_MiniTable* subtablep = tablep->subs[submsg_idx].submsg; \ - fastdecode_submsgdata submsg = {decode_totable(subtablep)}; \ - fastdecode_arr farr; \ - \ - if (subtablep->table_mask == (uint8_t)-1) { \ - RETURN_GENERIC("submessage doesn't have fast tables."); \ - } \ - \ - dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, \ - sizeof(upb_Message*), card); \ - \ - if (card == CARD_s) { \ - *(uint32_t*)msg |= hasbits; \ - hasbits = 0; \ - } \ - \ - again: \ - if (card == CARD_r) { \ - dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_Message*)); \ - } \ - \ - submsg.msg = *dst; \ - \ - if (card == CARD_r || UPB_LIKELY(!submsg.msg)) { \ - *dst = submsg.msg = decode_newmsg_ceil(d, subtablep, msg_ceil_bytes); \ - } \ - \ - ptr += tagbytes; \ - ptr = fastdecode_delimited(d, ptr, fastdecode_tosubmsg, &submsg); \ - \ - if (UPB_UNLIKELY(ptr == NULL || d->end_group != DECODE_NOGROUP)) { \ - _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \ - } \ - \ - if (card == CARD_r) { \ - fastdecode_nextret ret = fastdecode_nextrepeated( \ - d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_Message*)); \ - switch (ret.next) { \ - case FD_NEXT_SAMEFIELD: \ - dst = ret.dst; \ - goto again; \ - case FD_NEXT_OTHERFIELD: \ - d->depth++; \ - data = ret.tag; \ - UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); \ - case FD_NEXT_ATLIMIT: \ - d->depth++; \ - return ptr; \ - } \ - } \ - \ - d->depth++; \ - UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); - -#define F(card, tagbytes, size_ceil, ceil_arg) \ - const char* upb_p##card##m_##tagbytes##bt_max##size_ceil##b( \ - UPB_PARSE_PARAMS) { \ - FASTDECODE_SUBMSG(d, ptr, msg, table, hasbits, data, tagbytes, ceil_arg, \ - CARD_##card); \ +const upb_MethodDef* upb_ServiceDef_FindMethodByName(const upb_ServiceDef* s, + const char* name) { + for (int i = 0; i < s->method_count; i++) { + const upb_MethodDef* m = _upb_MethodDef_At(s->methods, i); + if (strcmp(name, upb_MethodDef_Name(m)) == 0) { + return m; + } } + return NULL; +} -#define SIZES(card, tagbytes) \ - F(card, tagbytes, 64, 64) \ - F(card, tagbytes, 128, 128) \ - F(card, tagbytes, 192, 192) \ - F(card, tagbytes, 256, 256) \ - F(card, tagbytes, max, -1) +static void create_service(upb_DefBuilder* ctx, + const UPB_DESC(ServiceDescriptorProto) * svc_proto, + upb_ServiceDef* s) { + upb_StringView name; + size_t n; -#define TAGBYTES(card) \ - SIZES(card, 1) \ - SIZES(card, 2) + // Must happen before _upb_DefBuilder_Add() + s->file = _upb_DefBuilder_File(ctx); -TAGBYTES(s) -TAGBYTES(o) -TAGBYTES(r) + name = UPB_DESC(ServiceDescriptorProto_name)(svc_proto); + const char* package = _upb_FileDef_RawPackage(s->file); + s->full_name = _upb_DefBuilder_MakeFullName(ctx, package, name); + _upb_DefBuilder_Add(ctx, s->full_name, + _upb_DefType_Pack(s, UPB_DEFTYPE_SERVICE)); -#undef TAGBYTES -#undef SIZES -#undef F -#undef FASTDECODE_SUBMSG + const UPB_DESC(MethodDescriptorProto)* const* methods = + UPB_DESC(ServiceDescriptorProto_method)(svc_proto, &n); + s->method_count = n; + s->methods = _upb_MethodDefs_New(ctx, n, methods, s); -#endif /* UPB_FASTTABLE */ + UPB_DEF_SET_OPTIONS(s->opts, ServiceDescriptorProto, ServiceOptions, + svc_proto); +} -// We encode backwards, to avoid pre-computing lengths (one-pass encode). +upb_ServiceDef* _upb_ServiceDefs_New( + upb_DefBuilder* ctx, int n, + const UPB_DESC(ServiceDescriptorProto) * const* protos) { + _upb_DefType_CheckPadding(sizeof(upb_ServiceDef)); + + upb_ServiceDef* s = _upb_DefBuilder_Alloc(ctx, sizeof(upb_ServiceDef) * n); + for (int i = 0; i < n; i++) { + create_service(ctx, protos[i], &s[i]); + s[i].index = i; + } + return s; +} #include @@ -12497,2170 +11715,2962 @@ TAGBYTES(r) // Must be last. -#define UPB_PB_VARINT_MAX_LEN 10 - -UPB_NOINLINE -static size_t encode_varint64(uint64_t val, char* buf) { - size_t i = 0; - do { - uint8_t byte = val & 0x7fU; - val >>= 7; - if (val) byte |= 0x80U; - buf[i++] = byte; - } while (val); - return i; -} +// A few fake field types for our tables. +enum { + kUpb_FakeFieldType_FieldNotFound = 0, + kUpb_FakeFieldType_MessageSetItem = 19, +}; -static uint32_t encode_zz32(int32_t n) { - return ((uint32_t)n << 1) ^ (n >> 31); -} -static uint64_t encode_zz64(int64_t n) { - return ((uint64_t)n << 1) ^ (n >> 63); -} +// DecodeOp: an action to be performed for a wire-type/field-type combination. +enum { + // Special ops: we don't write data to regular fields for these. + kUpb_DecodeOp_UnknownField = -1, + kUpb_DecodeOp_MessageSetItem = -2, -typedef struct { - upb_EncodeStatus status; - jmp_buf err; - upb_Arena* arena; - char *buf, *ptr, *limit; - int options; - int depth; - _upb_mapsorter sorter; -} upb_encstate; + // Scalar-only ops. + kUpb_DecodeOp_Scalar1Byte = 0, + kUpb_DecodeOp_Scalar4Byte = 2, + kUpb_DecodeOp_Scalar8Byte = 3, + kUpb_DecodeOp_Enum = 1, -static size_t upb_roundup_pow2(size_t bytes) { - size_t ret = 128; - while (ret < bytes) { - ret *= 2; - } - return ret; -} + // Scalar/repeated ops. + kUpb_DecodeOp_String = 4, + kUpb_DecodeOp_Bytes = 5, + kUpb_DecodeOp_SubMessage = 6, -UPB_NORETURN static void encode_err(upb_encstate* e, upb_EncodeStatus s) { - UPB_ASSERT(s != kUpb_EncodeStatus_Ok); - e->status = s; - UPB_LONGJMP(e->err, 1); -} + // Repeated-only ops (also see macros below). + kUpb_DecodeOp_PackedEnum = 13, +}; -UPB_NOINLINE -static void encode_growbuffer(upb_encstate* e, size_t bytes) { - size_t old_size = e->limit - e->buf; - size_t new_size = upb_roundup_pow2(bytes + (e->limit - e->ptr)); - char* new_buf = upb_Arena_Realloc(e->arena, e->buf, old_size, new_size); +// For packed fields it is helpful to be able to recover the lg2 of the data +// size from the op. +#define OP_FIXPCK_LG2(n) (n + 5) /* n in [2, 3] => op in [7, 8] */ +#define OP_VARPCK_LG2(n) (n + 9) /* n in [0, 2, 3] => op in [9, 11, 12] */ - if (!new_buf) encode_err(e, kUpb_EncodeStatus_OutOfMemory); +typedef union { + bool bool_val; + uint32_t uint32_val; + uint64_t uint64_val; + uint32_t size; +} wireval; - // We want previous data at the end, realloc() put it at the beginning. - // TODO(salo): This is somewhat inefficient since we are copying twice. - // Maybe create a realloc() that copies to the end of the new buffer? - if (old_size > 0) { - memmove(new_buf + new_size - old_size, e->buf, old_size); - } +static const char* _upb_Decoder_DecodeMessage(upb_Decoder* d, const char* ptr, + upb_Message* msg, + const upb_MiniTable* layout); - e->ptr = new_buf + new_size - (e->limit - e->ptr); - e->limit = new_buf + new_size; - e->buf = new_buf; +UPB_NORETURN static void* _upb_Decoder_ErrorJmp(upb_Decoder* d, + upb_DecodeStatus status) { + assert(status != kUpb_DecodeStatus_Ok); + d->status = status; + UPB_LONGJMP(d->err, 1); +} - e->ptr -= bytes; +const char* _upb_FastDecoder_ErrorJmp(upb_Decoder* d, int status) { + assert(status != kUpb_DecodeStatus_Ok); + d->status = status; + UPB_LONGJMP(d->err, 1); + return NULL; } -/* Call to ensure that at least "bytes" bytes are available for writing at - * e->ptr. Returns false if the bytes could not be allocated. */ -UPB_FORCEINLINE -static void encode_reserve(upb_encstate* e, size_t bytes) { - if ((size_t)(e->ptr - e->buf) < bytes) { - encode_growbuffer(e, bytes); - return; +static void _upb_Decoder_VerifyUtf8(upb_Decoder* d, const char* buf, int len) { + if (!_upb_Decoder_VerifyUtf8Inline(buf, len)) { + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_BadUtf8); } - - e->ptr -= bytes; } -/* Writes the given bytes to the buffer, handling reserve/advance. */ -static void encode_bytes(upb_encstate* e, const void* data, size_t len) { - if (len == 0) return; /* memcpy() with zero size is UB */ - encode_reserve(e, len); - memcpy(e->ptr, data, len); +static bool _upb_Decoder_Reserve(upb_Decoder* d, upb_Array* arr, size_t elem) { + bool need_realloc = arr->capacity - arr->size < elem; + if (need_realloc && !_upb_array_realloc(arr, arr->size + elem, &d->arena)) { + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); + } + return need_realloc; } -static void encode_fixed64(upb_encstate* e, uint64_t val) { - val = _upb_BigEndian_Swap64(val); - encode_bytes(e, &val, sizeof(uint64_t)); -} - -static void encode_fixed32(upb_encstate* e, uint32_t val) { - val = _upb_BigEndian_Swap32(val); - encode_bytes(e, &val, sizeof(uint32_t)); -} +typedef struct { + const char* ptr; + uint64_t val; +} _upb_DecodeLongVarintReturn; UPB_NOINLINE -static void encode_longvarint(upb_encstate* e, uint64_t val) { - size_t len; - char* start; - - encode_reserve(e, UPB_PB_VARINT_MAX_LEN); - len = encode_varint64(val, e->ptr); - start = e->ptr + UPB_PB_VARINT_MAX_LEN - len; - memmove(start, e->ptr, len); - e->ptr = start; +static _upb_DecodeLongVarintReturn _upb_Decoder_DecodeLongVarint( + const char* ptr, uint64_t val) { + _upb_DecodeLongVarintReturn ret = {NULL, 0}; + uint64_t byte; + int i; + for (i = 1; i < 10; i++) { + byte = (uint8_t)ptr[i]; + val += (byte - 1) << (i * 7); + if (!(byte & 0x80)) { + ret.ptr = ptr + i + 1; + ret.val = val; + return ret; + } + } + return ret; } UPB_FORCEINLINE -static void encode_varint(upb_encstate* e, uint64_t val) { - if (val < 128 && e->ptr != e->buf) { - --e->ptr; - *e->ptr = val; +static const char* _upb_Decoder_DecodeVarint(upb_Decoder* d, const char* ptr, + uint64_t* val) { + uint64_t byte = (uint8_t)*ptr; + if (UPB_LIKELY((byte & 0x80) == 0)) { + *val = byte; + return ptr + 1; } else { - encode_longvarint(e, val); + _upb_DecodeLongVarintReturn res = _upb_Decoder_DecodeLongVarint(ptr, byte); + if (!res.ptr) _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); + *val = res.val; + return res.ptr; } } -static void encode_double(upb_encstate* e, double d) { - uint64_t u64; - UPB_ASSERT(sizeof(double) == sizeof(uint64_t)); - memcpy(&u64, &d, sizeof(uint64_t)); - encode_fixed64(e, u64); -} - -static void encode_float(upb_encstate* e, float d) { - uint32_t u32; - UPB_ASSERT(sizeof(float) == sizeof(uint32_t)); - memcpy(&u32, &d, sizeof(uint32_t)); - encode_fixed32(e, u32); -} - -static void encode_tag(upb_encstate* e, uint32_t field_number, - uint8_t wire_type) { - encode_varint(e, (field_number << 3) | wire_type); -} - -static void encode_fixedarray(upb_encstate* e, const upb_Array* arr, - size_t elem_size, uint32_t tag) { - size_t bytes = arr->size * elem_size; - const char* data = _upb_array_constptr(arr); - const char* ptr = data + bytes - elem_size; - - if (tag || !_upb_IsLittleEndian()) { - while (true) { - if (elem_size == 4) { - uint32_t val; - memcpy(&val, ptr, sizeof(val)); - val = _upb_BigEndian_Swap32(val); - encode_bytes(e, &val, elem_size); - } else { - UPB_ASSERT(elem_size == 8); - uint64_t val; - memcpy(&val, ptr, sizeof(val)); - val = _upb_BigEndian_Swap64(val); - encode_bytes(e, &val, elem_size); - } - - if (tag) encode_varint(e, tag); - if (ptr == data) break; - ptr -= elem_size; - } +UPB_FORCEINLINE +static const char* _upb_Decoder_DecodeTag(upb_Decoder* d, const char* ptr, + uint32_t* val) { + uint64_t byte = (uint8_t)*ptr; + if (UPB_LIKELY((byte & 0x80) == 0)) { + *val = byte; + return ptr + 1; } else { - encode_bytes(e, data, bytes); + const char* start = ptr; + _upb_DecodeLongVarintReturn res = _upb_Decoder_DecodeLongVarint(ptr, byte); + if (!res.ptr || res.ptr - start > 5 || res.val > UINT32_MAX) { + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); + } + *val = res.val; + return res.ptr; } } -static void encode_message(upb_encstate* e, const upb_Message* msg, - const upb_MiniTable* m, size_t* size); - -static void encode_TaggedMessagePtr(upb_encstate* e, - upb_TaggedMessagePtr tagged, - const upb_MiniTable* m, size_t* size) { - if (upb_TaggedMessagePtr_IsEmpty(tagged)) { - m = &_kUpb_MiniTable_Empty; +UPB_FORCEINLINE +static const char* upb_Decoder_DecodeSize(upb_Decoder* d, const char* ptr, + uint32_t* size) { + uint64_t size64; + ptr = _upb_Decoder_DecodeVarint(d, ptr, &size64); + if (size64 >= INT32_MAX || + !upb_EpsCopyInputStream_CheckSize(&d->input, ptr, (int)size64)) { + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); } - encode_message(e, _upb_TaggedMessagePtr_GetMessage(tagged), m, size); + *size = size64; + return ptr; } -static void encode_scalar(upb_encstate* e, const void* _field_mem, - const upb_MiniTableSub* subs, - const upb_MiniTableField* f) { - const char* field_mem = _field_mem; - int wire_type; - -#define CASE(ctype, type, wtype, encodeval) \ - { \ - ctype val = *(ctype*)field_mem; \ - encode_##type(e, encodeval); \ - wire_type = wtype; \ - break; \ +static void _upb_Decoder_MungeInt32(wireval* val) { + if (!_upb_IsLittleEndian()) { + /* The next stage will memcpy(dst, &val, 4) */ + val->uint32_val = val->uint64_val; } +} - switch (f->UPB_PRIVATE(descriptortype)) { - case kUpb_FieldType_Double: - CASE(double, double, kUpb_WireType_64Bit, val); - case kUpb_FieldType_Float: - CASE(float, float, kUpb_WireType_32Bit, val); - case kUpb_FieldType_Int64: - case kUpb_FieldType_UInt64: - CASE(uint64_t, varint, kUpb_WireType_Varint, val); - case kUpb_FieldType_UInt32: - CASE(uint32_t, varint, kUpb_WireType_Varint, val); - case kUpb_FieldType_Int32: - case kUpb_FieldType_Enum: - CASE(int32_t, varint, kUpb_WireType_Varint, (int64_t)val); - case kUpb_FieldType_SFixed64: - case kUpb_FieldType_Fixed64: - CASE(uint64_t, fixed64, kUpb_WireType_64Bit, val); - case kUpb_FieldType_Fixed32: - case kUpb_FieldType_SFixed32: - CASE(uint32_t, fixed32, kUpb_WireType_32Bit, val); +static void _upb_Decoder_Munge(int type, wireval* val) { + switch (type) { case kUpb_FieldType_Bool: - CASE(bool, varint, kUpb_WireType_Varint, val); - case kUpb_FieldType_SInt32: - CASE(int32_t, varint, kUpb_WireType_Varint, encode_zz32(val)); - case kUpb_FieldType_SInt64: - CASE(int64_t, varint, kUpb_WireType_Varint, encode_zz64(val)); - case kUpb_FieldType_String: - case kUpb_FieldType_Bytes: { - upb_StringView view = *(upb_StringView*)field_mem; - encode_bytes(e, view.data, view.size); - encode_varint(e, view.size); - wire_type = kUpb_WireType_Delimited; + val->bool_val = val->uint64_val != 0; break; - } - case kUpb_FieldType_Group: { - size_t size; - upb_TaggedMessagePtr submsg = *(upb_TaggedMessagePtr*)field_mem; - const upb_MiniTable* subm = subs[f->UPB_PRIVATE(submsg_index)].submsg; - if (submsg == 0) { - return; - } - if (--e->depth == 0) encode_err(e, kUpb_EncodeStatus_MaxDepthExceeded); - encode_tag(e, f->number, kUpb_WireType_EndGroup); - encode_TaggedMessagePtr(e, submsg, subm, &size); - wire_type = kUpb_WireType_StartGroup; - e->depth++; + case kUpb_FieldType_SInt32: { + uint32_t n = val->uint64_val; + val->uint32_val = (n >> 1) ^ -(int32_t)(n & 1); break; } - case kUpb_FieldType_Message: { - size_t size; - upb_TaggedMessagePtr submsg = *(upb_TaggedMessagePtr*)field_mem; - const upb_MiniTable* subm = subs[f->UPB_PRIVATE(submsg_index)].submsg; - if (submsg == 0) { - return; - } - if (--e->depth == 0) encode_err(e, kUpb_EncodeStatus_MaxDepthExceeded); - encode_TaggedMessagePtr(e, submsg, subm, &size); - encode_varint(e, size); - wire_type = kUpb_WireType_Delimited; - e->depth++; + case kUpb_FieldType_SInt64: { + uint64_t n = val->uint64_val; + val->uint64_val = (n >> 1) ^ -(int64_t)(n & 1); break; } - default: - UPB_UNREACHABLE(); + case kUpb_FieldType_Int32: + case kUpb_FieldType_UInt32: + case kUpb_FieldType_Enum: + _upb_Decoder_MungeInt32(val); + break; } -#undef CASE - - encode_tag(e, f->number, wire_type); } -static void encode_array(upb_encstate* e, const upb_Message* msg, - const upb_MiniTableSub* subs, - const upb_MiniTableField* f) { - const upb_Array* arr = *UPB_PTR_AT(msg, f->offset, upb_Array*); - bool packed = f->mode & kUpb_LabelFlags_IsPacked; - size_t pre_len = e->limit - e->ptr; +static upb_Message* _upb_Decoder_NewSubMessage(upb_Decoder* d, + const upb_MiniTableSub* subs, + const upb_MiniTableField* field, + upb_TaggedMessagePtr* target) { + const upb_MiniTable* subl = subs[field->UPB_PRIVATE(submsg_index)].submsg; + UPB_ASSERT(subl); + upb_Message* msg = _upb_Message_New(subl, &d->arena); + if (!msg) _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); - if (arr == NULL || arr->size == 0) { - return; - } + // Extensions should not be unlinked. A message extension should not be + // registered until its sub-message type is available to be linked. + bool is_empty = subl == &_kUpb_MiniTable_Empty; + bool is_extension = field->mode & kUpb_LabelFlags_IsExtension; + UPB_ASSERT(!(is_empty && is_extension)); -#define VARINT_CASE(ctype, encode) \ - { \ - const ctype* start = _upb_array_constptr(arr); \ - const ctype* ptr = start + arr->size; \ - uint32_t tag = packed ? 0 : (f->number << 3) | kUpb_WireType_Varint; \ - do { \ - ptr--; \ - encode_varint(e, encode); \ - if (tag) encode_varint(e, tag); \ - } while (ptr != start); \ - } \ - break; + if (is_empty && !(d->options & kUpb_DecodeOption_ExperimentalAllowUnlinked)) { + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_UnlinkedSubMessage); + } -#define TAG(wire_type) (packed ? 0 : (f->number << 3 | wire_type)) + upb_TaggedMessagePtr tagged = _upb_TaggedMessagePtr_Pack(msg, is_empty); + memcpy(target, &tagged, sizeof(tagged)); + return msg; +} - switch (f->UPB_PRIVATE(descriptortype)) { - case kUpb_FieldType_Double: - encode_fixedarray(e, arr, sizeof(double), TAG(kUpb_WireType_64Bit)); - break; - case kUpb_FieldType_Float: - encode_fixedarray(e, arr, sizeof(float), TAG(kUpb_WireType_32Bit)); - break; - case kUpb_FieldType_SFixed64: - case kUpb_FieldType_Fixed64: - encode_fixedarray(e, arr, sizeof(uint64_t), TAG(kUpb_WireType_64Bit)); - break; - case kUpb_FieldType_Fixed32: - case kUpb_FieldType_SFixed32: - encode_fixedarray(e, arr, sizeof(uint32_t), TAG(kUpb_WireType_32Bit)); - break; - case kUpb_FieldType_Int64: - case kUpb_FieldType_UInt64: - VARINT_CASE(uint64_t, *ptr); - case kUpb_FieldType_UInt32: - VARINT_CASE(uint32_t, *ptr); - case kUpb_FieldType_Int32: - case kUpb_FieldType_Enum: - VARINT_CASE(int32_t, (int64_t)*ptr); - case kUpb_FieldType_Bool: - VARINT_CASE(bool, *ptr); - case kUpb_FieldType_SInt32: - VARINT_CASE(int32_t, encode_zz32(*ptr)); - case kUpb_FieldType_SInt64: - VARINT_CASE(int64_t, encode_zz64(*ptr)); - case kUpb_FieldType_String: - case kUpb_FieldType_Bytes: { - const upb_StringView* start = _upb_array_constptr(arr); - const upb_StringView* ptr = start + arr->size; - do { - ptr--; - encode_bytes(e, ptr->data, ptr->size); - encode_varint(e, ptr->size); - encode_tag(e, f->number, kUpb_WireType_Delimited); - } while (ptr != start); - return; - } - case kUpb_FieldType_Group: { - const upb_TaggedMessagePtr* start = _upb_array_constptr(arr); - const upb_TaggedMessagePtr* ptr = start + arr->size; - const upb_MiniTable* subm = subs[f->UPB_PRIVATE(submsg_index)].submsg; - if (--e->depth == 0) encode_err(e, kUpb_EncodeStatus_MaxDepthExceeded); - do { - size_t size; - ptr--; - encode_tag(e, f->number, kUpb_WireType_EndGroup); - encode_TaggedMessagePtr(e, *ptr, subm, &size); - encode_tag(e, f->number, kUpb_WireType_StartGroup); - } while (ptr != start); - e->depth++; - return; - } - case kUpb_FieldType_Message: { - const upb_TaggedMessagePtr* start = _upb_array_constptr(arr); - const upb_TaggedMessagePtr* ptr = start + arr->size; - const upb_MiniTable* subm = subs[f->UPB_PRIVATE(submsg_index)].submsg; - if (--e->depth == 0) encode_err(e, kUpb_EncodeStatus_MaxDepthExceeded); - do { - size_t size; - ptr--; - encode_TaggedMessagePtr(e, *ptr, subm, &size); - encode_varint(e, size); - encode_tag(e, f->number, kUpb_WireType_Delimited); - } while (ptr != start); - e->depth++; - return; - } - } -#undef VARINT_CASE - - if (packed) { - encode_varint(e, e->limit - e->ptr - pre_len); - encode_tag(e, f->number, kUpb_WireType_Delimited); +static upb_Message* _upb_Decoder_ReuseSubMessage( + upb_Decoder* d, const upb_MiniTableSub* subs, + const upb_MiniTableField* field, upb_TaggedMessagePtr* target) { + upb_TaggedMessagePtr tagged = *target; + const upb_MiniTable* subl = subs[field->UPB_PRIVATE(submsg_index)].submsg; + UPB_ASSERT(subl); + if (!upb_TaggedMessagePtr_IsEmpty(tagged) || subl == &_kUpb_MiniTable_Empty) { + return _upb_TaggedMessagePtr_GetMessage(tagged); } -} -static void encode_mapentry(upb_encstate* e, uint32_t number, - const upb_MiniTable* layout, - const upb_MapEntry* ent) { - const upb_MiniTableField* key_field = &layout->fields[0]; - const upb_MiniTableField* val_field = &layout->fields[1]; - size_t pre_len = e->limit - e->ptr; + // We found an empty message from a previous parse that was performed before + // this field was linked. But it is linked now, so we want to allocate a new + // message of the correct type and promote data into it before continuing. + upb_Message* existing = _upb_TaggedMessagePtr_GetEmptyMessage(tagged); + upb_Message* promoted = _upb_Decoder_NewSubMessage(d, subs, field, target); size_t size; - encode_scalar(e, &ent->data.v, layout->subs, val_field); - encode_scalar(e, &ent->data.k, layout->subs, key_field); - size = (e->limit - e->ptr) - pre_len; - encode_varint(e, size); - encode_tag(e, number, kUpb_WireType_Delimited); + const char* unknown = upb_Message_GetUnknown(existing, &size); + upb_DecodeStatus status = upb_Decode(unknown, size, promoted, subl, d->extreg, + d->options, &d->arena); + if (status != kUpb_DecodeStatus_Ok) _upb_Decoder_ErrorJmp(d, status); + return promoted; } -static void encode_map(upb_encstate* e, const upb_Message* msg, - const upb_MiniTableSub* subs, - const upb_MiniTableField* f) { - const upb_Map* map = *UPB_PTR_AT(msg, f->offset, const upb_Map*); - const upb_MiniTable* layout = subs[f->UPB_PRIVATE(submsg_index)].submsg; - UPB_ASSERT(layout->field_count == 2); - - if (map == NULL) return; - - if (e->options & kUpb_EncodeOption_Deterministic) { - _upb_sortedmap sorted; - _upb_mapsorter_pushmap(&e->sorter, - layout->fields[0].UPB_PRIVATE(descriptortype), map, - &sorted); - upb_MapEntry ent; - while (_upb_sortedmap_next(&e->sorter, map, &sorted, &ent)) { - encode_mapentry(e, f->number, layout, &ent); - } - _upb_mapsorter_popmap(&e->sorter, &sorted); - } else { - intptr_t iter = UPB_STRTABLE_BEGIN; - upb_StringView key; - upb_value val; - while (upb_strtable_next2(&map->table, &key, &val, &iter)) { - upb_MapEntry ent; - _upb_map_fromkey(key, &ent.data.k, map->key_size); - _upb_map_fromvalue(val, &ent.data.v, map->val_size); - encode_mapentry(e, f->number, layout, &ent); - } - } +static const char* _upb_Decoder_ReadString(upb_Decoder* d, const char* ptr, + int size, upb_StringView* str) { + const char* str_ptr = ptr; + ptr = upb_EpsCopyInputStream_ReadString(&d->input, &str_ptr, size, &d->arena); + if (!ptr) _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); + str->data = str_ptr; + str->size = size; + return ptr; } -static bool encode_shouldencode(upb_encstate* e, const upb_Message* msg, - const upb_MiniTableSub* subs, - const upb_MiniTableField* f) { - if (f->presence == 0) { - /* Proto3 presence or map/array. */ - const void* mem = UPB_PTR_AT(msg, f->offset, void); - switch (_upb_MiniTableField_GetRep(f)) { - case kUpb_FieldRep_1Byte: { - char ch; - memcpy(&ch, mem, 1); - return ch != 0; - } - case kUpb_FieldRep_4Byte: { - uint32_t u32; - memcpy(&u32, mem, 4); - return u32 != 0; - } - case kUpb_FieldRep_8Byte: { - uint64_t u64; - memcpy(&u64, mem, 8); - return u64 != 0; - } - case kUpb_FieldRep_StringView: { - const upb_StringView* str = (const upb_StringView*)mem; - return str->size != 0; - } - default: - UPB_UNREACHABLE(); - } - } else if (f->presence > 0) { - /* Proto2 presence: hasbit. */ - return _upb_hasbit_field(msg, f); - } else { - /* Field is in a oneof. */ - return _upb_getoneofcase_field(msg, f) == f->number; +UPB_FORCEINLINE +static const char* _upb_Decoder_RecurseSubMessage(upb_Decoder* d, + const char* ptr, + upb_Message* submsg, + const upb_MiniTable* subl, + uint32_t expected_end_group) { + if (--d->depth < 0) { + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_MaxDepthExceeded); } -} - -static void encode_field(upb_encstate* e, const upb_Message* msg, - const upb_MiniTableSub* subs, - const upb_MiniTableField* field) { - switch (upb_FieldMode_Get(field)) { - case kUpb_FieldMode_Array: - encode_array(e, msg, subs, field); - break; - case kUpb_FieldMode_Map: - encode_map(e, msg, subs, field); - break; - case kUpb_FieldMode_Scalar: - encode_scalar(e, UPB_PTR_AT(msg, field->offset, void), subs, field); - break; - default: - UPB_UNREACHABLE(); + ptr = _upb_Decoder_DecodeMessage(d, ptr, submsg, subl); + d->depth++; + if (d->end_group != expected_end_group) { + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); } + return ptr; } -static void encode_msgset_item(upb_encstate* e, - const upb_Message_Extension* ext) { - size_t size; - encode_tag(e, kUpb_MsgSet_Item, kUpb_WireType_EndGroup); - encode_message(e, ext->data.ptr, ext->ext->sub.submsg, &size); - encode_varint(e, size); - encode_tag(e, kUpb_MsgSet_Message, kUpb_WireType_Delimited); - encode_varint(e, ext->ext->field.number); - encode_tag(e, kUpb_MsgSet_TypeId, kUpb_WireType_Varint); - encode_tag(e, kUpb_MsgSet_Item, kUpb_WireType_StartGroup); +UPB_FORCEINLINE +static const char* _upb_Decoder_DecodeSubMessage( + upb_Decoder* d, const char* ptr, upb_Message* submsg, + const upb_MiniTableSub* subs, const upb_MiniTableField* field, int size) { + int saved_delta = upb_EpsCopyInputStream_PushLimit(&d->input, ptr, size); + const upb_MiniTable* subl = subs[field->UPB_PRIVATE(submsg_index)].submsg; + UPB_ASSERT(subl); + ptr = _upb_Decoder_RecurseSubMessage(d, ptr, submsg, subl, DECODE_NOGROUP); + upb_EpsCopyInputStream_PopLimit(&d->input, ptr, saved_delta); + return ptr; } -static void encode_ext(upb_encstate* e, const upb_Message_Extension* ext, - bool is_message_set) { - if (UPB_UNLIKELY(is_message_set)) { - encode_msgset_item(e, ext); - } else { - encode_field(e, &ext->data, &ext->ext->sub, &ext->ext->field); +UPB_FORCEINLINE +static const char* _upb_Decoder_DecodeGroup(upb_Decoder* d, const char* ptr, + upb_Message* submsg, + const upb_MiniTable* subl, + uint32_t number) { + if (_upb_Decoder_IsDone(d, &ptr)) { + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); } + ptr = _upb_Decoder_RecurseSubMessage(d, ptr, submsg, subl, number); + d->end_group = DECODE_NOGROUP; + return ptr; } -static void encode_message(upb_encstate* e, const upb_Message* msg, - const upb_MiniTable* m, size_t* size) { - size_t pre_len = e->limit - e->ptr; +UPB_FORCEINLINE +static const char* _upb_Decoder_DecodeUnknownGroup(upb_Decoder* d, + const char* ptr, + uint32_t number) { + return _upb_Decoder_DecodeGroup(d, ptr, NULL, NULL, number); +} - if ((e->options & kUpb_EncodeOption_CheckRequired) && m->required_count) { - uint64_t msg_head; - memcpy(&msg_head, msg, 8); - msg_head = _upb_BigEndian_Swap64(msg_head); - if (upb_MiniTable_requiredmask(m) & ~msg_head) { - encode_err(e, kUpb_EncodeStatus_MissingRequired); - } - } - - if ((e->options & kUpb_EncodeOption_SkipUnknown) == 0) { - size_t unknown_size; - const char* unknown = upb_Message_GetUnknown(msg, &unknown_size); +UPB_FORCEINLINE +static const char* _upb_Decoder_DecodeKnownGroup( + upb_Decoder* d, const char* ptr, upb_Message* submsg, + const upb_MiniTableSub* subs, const upb_MiniTableField* field) { + const upb_MiniTable* subl = subs[field->UPB_PRIVATE(submsg_index)].submsg; + UPB_ASSERT(subl); + return _upb_Decoder_DecodeGroup(d, ptr, submsg, subl, field->number); +} - if (unknown) { - encode_bytes(e, unknown, unknown_size); - } - } +static char* upb_Decoder_EncodeVarint32(uint32_t val, char* ptr) { + do { + uint8_t byte = val & 0x7fU; + val >>= 7; + if (val) byte |= 0x80U; + *(ptr++) = byte; + } while (val); + return ptr; +} - if (m->ext != kUpb_ExtMode_NonExtendable) { - /* Encode all extensions together. Unlike C++, we do not attempt to keep - * these in field number order relative to normal fields or even to each - * other. */ - size_t ext_count; - const upb_Message_Extension* ext = _upb_Message_Getexts(msg, &ext_count); - if (ext_count) { - if (e->options & kUpb_EncodeOption_Deterministic) { - _upb_sortedmap sorted; - _upb_mapsorter_pushexts(&e->sorter, ext, ext_count, &sorted); - while (_upb_sortedmap_nextext(&e->sorter, &sorted, &ext)) { - encode_ext(e, ext, m->ext == kUpb_ExtMode_IsMessageSet); - } - _upb_mapsorter_popmap(&e->sorter, &sorted); - } else { - const upb_Message_Extension* end = ext + ext_count; - for (; ext != end; ext++) { - encode_ext(e, ext, m->ext == kUpb_ExtMode_IsMessageSet); - } - } - } - } +static void _upb_Decoder_AddUnknownVarints(upb_Decoder* d, upb_Message* msg, + uint32_t val1, uint32_t val2) { + char buf[20]; + char* end = buf; + end = upb_Decoder_EncodeVarint32(val1, end); + end = upb_Decoder_EncodeVarint32(val2, end); - if (m->field_count) { - const upb_MiniTableField* f = &m->fields[m->field_count]; - const upb_MiniTableField* first = &m->fields[0]; - while (f != first) { - f--; - if (encode_shouldencode(e, msg, m->subs, f)) { - encode_field(e, msg, m->subs, f); - } - } + if (!_upb_Message_AddUnknown(msg, buf, end - buf, &d->arena)) { + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); } - - *size = (e->limit - e->ptr) - pre_len; } -static upb_EncodeStatus upb_Encoder_Encode(upb_encstate* const encoder, - const void* const msg, - const upb_MiniTable* const l, - char** const buf, - size_t* const size) { - // Unfortunately we must continue to perform hackery here because there are - // code paths which blindly copy the returned pointer without bothering to - // check for errors until much later (b/235839510). So we still set *buf to - // NULL on error and we still set it to non-NULL on a successful empty result. - if (UPB_SETJMP(encoder->err) == 0) { - encode_message(encoder, msg, l, size); - *size = encoder->limit - encoder->ptr; - if (*size == 0) { - static char ch; - *buf = &ch; - } else { - UPB_ASSERT(encoder->ptr); - *buf = encoder->ptr; - } - } else { - UPB_ASSERT(encoder->status != kUpb_EncodeStatus_Ok); - *buf = NULL; - *size = 0; - } +UPB_NOINLINE +static bool _upb_Decoder_CheckEnumSlow(upb_Decoder* d, const char* ptr, + upb_Message* msg, + const upb_MiniTableEnum* e, + const upb_MiniTableField* field, + uint32_t v) { + if (_upb_MiniTable_CheckEnumValueSlow(e, v)) return true; - _upb_mapsorter_destroy(&encoder->sorter); - return encoder->status; + // Unrecognized enum goes into unknown fields. + // For packed fields the tag could be arbitrarily far in the past, so we + // just re-encode the tag and value here. + uint32_t tag = ((uint32_t)field->number << 3) | kUpb_WireType_Varint; + upb_Message* unknown_msg = + field->mode & kUpb_LabelFlags_IsExtension ? d->unknown_msg : msg; + _upb_Decoder_AddUnknownVarints(d, unknown_msg, tag, v); + return false; } -upb_EncodeStatus upb_Encode(const void* msg, const upb_MiniTable* l, - int options, upb_Arena* arena, char** buf, - size_t* size) { - upb_encstate e; - unsigned depth = (unsigned)options >> 16; - - e.status = kUpb_EncodeStatus_Ok; - e.arena = arena; - e.buf = NULL; - e.limit = NULL; - e.ptr = NULL; - e.depth = depth ? depth : kUpb_WireFormat_DefaultDepthLimit; - e.options = options; - _upb_mapsorter_init(&e.sorter); +UPB_FORCEINLINE +static bool _upb_Decoder_CheckEnum(upb_Decoder* d, const char* ptr, + upb_Message* msg, const upb_MiniTableEnum* e, + const upb_MiniTableField* field, + wireval* val) { + uint32_t v = val->uint32_val; - return upb_Encoder_Encode(&e, msg, l, buf, size); + _kUpb_FastEnumCheck_Status status = _upb_MiniTable_CheckEnumValueFast(e, v); + if (UPB_LIKELY(status == _kUpb_FastEnumCheck_ValueIsInEnum)) return true; + return _upb_Decoder_CheckEnumSlow(d, ptr, msg, e, field, v); } +UPB_NOINLINE +static const char* _upb_Decoder_DecodeEnumArray(upb_Decoder* d, const char* ptr, + upb_Message* msg, + upb_Array* arr, + const upb_MiniTableSub* subs, + const upb_MiniTableField* field, + wireval* val) { + const upb_MiniTableEnum* e = subs[field->UPB_PRIVATE(submsg_index)].subenum; + if (!_upb_Decoder_CheckEnum(d, ptr, msg, e, field, val)) return ptr; + void* mem = UPB_PTR_AT(_upb_array_ptr(arr), arr->size * 4, void); + arr->size++; + memcpy(mem, val, 4); + return ptr; +} +UPB_FORCEINLINE +static const char* _upb_Decoder_DecodeFixedPacked( + upb_Decoder* d, const char* ptr, upb_Array* arr, wireval* val, + const upb_MiniTableField* field, int lg2) { + int mask = (1 << lg2) - 1; + size_t count = val->size >> lg2; + if ((val->size & mask) != 0) { + // Length isn't a round multiple of elem size. + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); + } + _upb_Decoder_Reserve(d, arr, count); + void* mem = UPB_PTR_AT(_upb_array_ptr(arr), arr->size << lg2, void); + arr->size += count; + // Note: if/when the decoder supports multi-buffer input, we will need to + // handle buffer seams here. + if (_upb_IsLittleEndian()) { + ptr = upb_EpsCopyInputStream_Copy(&d->input, ptr, mem, val->size); + } else { + int delta = upb_EpsCopyInputStream_PushLimit(&d->input, ptr, val->size); + char* dst = mem; + while (!_upb_Decoder_IsDone(d, &ptr)) { + if (lg2 == 2) { + ptr = upb_WireReader_ReadFixed32(ptr, dst); + dst += 4; + } else { + UPB_ASSERT(lg2 == 3); + ptr = upb_WireReader_ReadFixed64(ptr, dst); + dst += 8; + } + } + upb_EpsCopyInputStream_PopLimit(&d->input, ptr, delta); + } -// Must be last. + return ptr; +} -UPB_NOINLINE _upb_WireReader_ReadLongVarintRet -_upb_WireReader_ReadLongVarint(const char* ptr, uint64_t val) { - _upb_WireReader_ReadLongVarintRet ret = {NULL, 0}; - uint64_t byte; - int i; - for (i = 1; i < 10; i++) { - byte = (uint8_t)ptr[i]; - val += (byte - 1) << (i * 7); - if (!(byte & 0x80)) { - ret.ptr = ptr + i + 1; - ret.val = val; - return ret; +UPB_FORCEINLINE +static const char* _upb_Decoder_DecodeVarintPacked( + upb_Decoder* d, const char* ptr, upb_Array* arr, wireval* val, + const upb_MiniTableField* field, int lg2) { + int scale = 1 << lg2; + int saved_limit = upb_EpsCopyInputStream_PushLimit(&d->input, ptr, val->size); + char* out = UPB_PTR_AT(_upb_array_ptr(arr), arr->size << lg2, void); + while (!_upb_Decoder_IsDone(d, &ptr)) { + wireval elem; + ptr = _upb_Decoder_DecodeVarint(d, ptr, &elem.uint64_val); + _upb_Decoder_Munge(field->UPB_PRIVATE(descriptortype), &elem); + if (_upb_Decoder_Reserve(d, arr, 1)) { + out = UPB_PTR_AT(_upb_array_ptr(arr), arr->size << lg2, void); } + arr->size++; + memcpy(out, &elem, scale); + out += scale; } - return ret; + upb_EpsCopyInputStream_PopLimit(&d->input, ptr, saved_limit); + return ptr; } -const char* _upb_WireReader_SkipGroup(const char* ptr, uint32_t tag, - int depth_limit, - upb_EpsCopyInputStream* stream) { - if (--depth_limit == 0) return NULL; - uint32_t end_group_tag = (tag & ~7ULL) | kUpb_WireType_EndGroup; - while (!upb_EpsCopyInputStream_IsDone(stream, &ptr)) { - uint32_t tag; - ptr = upb_WireReader_ReadTag(ptr, &tag); - if (!ptr) return NULL; - if (tag == end_group_tag) return ptr; - ptr = _upb_WireReader_SkipValue(ptr, tag, depth_limit, stream); - if (!ptr) return NULL; +UPB_NOINLINE +static const char* _upb_Decoder_DecodeEnumPacked( + upb_Decoder* d, const char* ptr, upb_Message* msg, upb_Array* arr, + const upb_MiniTableSub* subs, const upb_MiniTableField* field, + wireval* val) { + const upb_MiniTableEnum* e = subs[field->UPB_PRIVATE(submsg_index)].subenum; + int saved_limit = upb_EpsCopyInputStream_PushLimit(&d->input, ptr, val->size); + char* out = UPB_PTR_AT(_upb_array_ptr(arr), arr->size * 4, void); + while (!_upb_Decoder_IsDone(d, &ptr)) { + wireval elem; + ptr = _upb_Decoder_DecodeVarint(d, ptr, &elem.uint64_val); + _upb_Decoder_MungeInt32(&elem); + if (!_upb_Decoder_CheckEnum(d, ptr, msg, e, field, &elem)) { + continue; + } + if (_upb_Decoder_Reserve(d, arr, 1)) { + out = UPB_PTR_AT(_upb_array_ptr(arr), arr->size * 4, void); + } + arr->size++; + memcpy(out, &elem, 4); + out += 4; } + upb_EpsCopyInputStream_PopLimit(&d->input, ptr, saved_limit); return ptr; } +upb_Array* _upb_Decoder_CreateArray(upb_Decoder* d, + const upb_MiniTableField* field) { + /* Maps descriptor type -> elem_size_lg2. */ + static const uint8_t kElemSizeLg2[] = { + [0] = -1, // invalid descriptor type + [kUpb_FieldType_Double] = 3, + [kUpb_FieldType_Float] = 2, + [kUpb_FieldType_Int64] = 3, + [kUpb_FieldType_UInt64] = 3, + [kUpb_FieldType_Int32] = 2, + [kUpb_FieldType_Fixed64] = 3, + [kUpb_FieldType_Fixed32] = 2, + [kUpb_FieldType_Bool] = 0, + [kUpb_FieldType_String] = UPB_SIZE(3, 4), + [kUpb_FieldType_Group] = UPB_SIZE(2, 3), + [kUpb_FieldType_Message] = UPB_SIZE(2, 3), + [kUpb_FieldType_Bytes] = UPB_SIZE(3, 4), + [kUpb_FieldType_UInt32] = 2, + [kUpb_FieldType_Enum] = 2, + [kUpb_FieldType_SFixed32] = 2, + [kUpb_FieldType_SFixed64] = 3, + [kUpb_FieldType_SInt32] = 2, + [kUpb_FieldType_SInt64] = 3, + }; - -// Must be last. - -typedef struct { - uint64_t present_values_mask; - uint32_t last_written_value; -} upb_MtDataEncoderInternal_EnumState; - -typedef struct { - uint64_t msg_modifiers; - uint32_t last_field_num; - enum { - kUpb_OneofState_NotStarted, - kUpb_OneofState_StartedOneof, - kUpb_OneofState_EmittedOneofField, - } oneof_state; -} upb_MtDataEncoderInternal_MsgState; - -typedef struct { - char* buf_start; // Only for checking kUpb_MtDataEncoder_MinSize. - union { - upb_MtDataEncoderInternal_EnumState enum_state; - upb_MtDataEncoderInternal_MsgState msg_state; - } state; -} upb_MtDataEncoderInternal; - -static upb_MtDataEncoderInternal* upb_MtDataEncoder_GetInternal( - upb_MtDataEncoder* e, char* buf_start) { - UPB_ASSERT(sizeof(upb_MtDataEncoderInternal) <= sizeof(e->internal)); - upb_MtDataEncoderInternal* ret = (upb_MtDataEncoderInternal*)e->internal; - ret->buf_start = buf_start; + size_t lg2 = kElemSizeLg2[field->UPB_PRIVATE(descriptortype)]; + upb_Array* ret = _upb_Array_New(&d->arena, 4, lg2); + if (!ret) _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); return ret; } -static char* upb_MtDataEncoder_PutRaw(upb_MtDataEncoder* e, char* ptr, - char ch) { - upb_MtDataEncoderInternal* in = (upb_MtDataEncoderInternal*)e->internal; - UPB_ASSERT(ptr - in->buf_start < kUpb_MtDataEncoder_MinSize); - if (ptr == e->end) return NULL; - *ptr++ = ch; - return ptr; -} - -static char* upb_MtDataEncoder_Put(upb_MtDataEncoder* e, char* ptr, char ch) { - return upb_MtDataEncoder_PutRaw(e, ptr, _upb_ToBase92(ch)); -} +static const char* _upb_Decoder_DecodeToArray(upb_Decoder* d, const char* ptr, + upb_Message* msg, + const upb_MiniTableSub* subs, + const upb_MiniTableField* field, + wireval* val, int op) { + upb_Array** arrp = UPB_PTR_AT(msg, field->offset, void); + upb_Array* arr = *arrp; + void* mem; -static char* upb_MtDataEncoder_PutBase92Varint(upb_MtDataEncoder* e, char* ptr, - uint32_t val, int min, int max) { - int shift = upb_Log2Ceiling(_upb_FromBase92(max) - _upb_FromBase92(min) + 1); - UPB_ASSERT(shift <= 6); - uint32_t mask = (1 << shift) - 1; - do { - uint32_t bits = val & mask; - ptr = upb_MtDataEncoder_Put(e, ptr, bits + _upb_FromBase92(min)); - if (!ptr) return NULL; - val >>= shift; - } while (val); - return ptr; -} + if (arr) { + _upb_Decoder_Reserve(d, arr, 1); + } else { + arr = _upb_Decoder_CreateArray(d, field); + *arrp = arr; + } -char* upb_MtDataEncoder_PutModifier(upb_MtDataEncoder* e, char* ptr, - uint64_t mod) { - if (mod) { - ptr = upb_MtDataEncoder_PutBase92Varint(e, ptr, mod, - kUpb_EncodedValue_MinModifier, - kUpb_EncodedValue_MaxModifier); + switch (op) { + case kUpb_DecodeOp_Scalar1Byte: + case kUpb_DecodeOp_Scalar4Byte: + case kUpb_DecodeOp_Scalar8Byte: + /* Append scalar value. */ + mem = UPB_PTR_AT(_upb_array_ptr(arr), arr->size << op, void); + arr->size++; + memcpy(mem, val, 1 << op); + return ptr; + case kUpb_DecodeOp_String: + _upb_Decoder_VerifyUtf8(d, ptr, val->size); + /* Fallthrough. */ + case kUpb_DecodeOp_Bytes: { + /* Append bytes. */ + upb_StringView* str = (upb_StringView*)_upb_array_ptr(arr) + arr->size; + arr->size++; + return _upb_Decoder_ReadString(d, ptr, val->size, str); + } + case kUpb_DecodeOp_SubMessage: { + /* Append submessage / group. */ + upb_TaggedMessagePtr* target = UPB_PTR_AT( + _upb_array_ptr(arr), arr->size * sizeof(void*), upb_TaggedMessagePtr); + upb_Message* submsg = _upb_Decoder_NewSubMessage(d, subs, field, target); + arr->size++; + if (UPB_UNLIKELY(field->UPB_PRIVATE(descriptortype) == + kUpb_FieldType_Group)) { + return _upb_Decoder_DecodeKnownGroup(d, ptr, submsg, subs, field); + } else { + return _upb_Decoder_DecodeSubMessage(d, ptr, submsg, subs, field, + val->size); + } + } + case OP_FIXPCK_LG2(2): + case OP_FIXPCK_LG2(3): + return _upb_Decoder_DecodeFixedPacked(d, ptr, arr, val, field, + op - OP_FIXPCK_LG2(0)); + case OP_VARPCK_LG2(0): + case OP_VARPCK_LG2(2): + case OP_VARPCK_LG2(3): + return _upb_Decoder_DecodeVarintPacked(d, ptr, arr, val, field, + op - OP_VARPCK_LG2(0)); + case kUpb_DecodeOp_Enum: + return _upb_Decoder_DecodeEnumArray(d, ptr, msg, arr, subs, field, val); + case kUpb_DecodeOp_PackedEnum: + return _upb_Decoder_DecodeEnumPacked(d, ptr, msg, arr, subs, field, val); + default: + UPB_UNREACHABLE(); } - return ptr; } -char* upb_MtDataEncoder_EncodeExtension(upb_MtDataEncoder* e, char* ptr, - upb_FieldType type, uint32_t field_num, - uint64_t field_mod) { - upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr); - in->state.msg_state.msg_modifiers = 0; - in->state.msg_state.last_field_num = 0; - in->state.msg_state.oneof_state = kUpb_OneofState_NotStarted; - - ptr = upb_MtDataEncoder_PutRaw(e, ptr, kUpb_EncodedVersion_ExtensionV1); - if (!ptr) return NULL; +upb_Map* _upb_Decoder_CreateMap(upb_Decoder* d, const upb_MiniTable* entry) { + /* Maps descriptor type -> upb map size. */ + static const uint8_t kSizeInMap[] = { + [0] = -1, // invalid descriptor type */ + [kUpb_FieldType_Double] = 8, + [kUpb_FieldType_Float] = 4, + [kUpb_FieldType_Int64] = 8, + [kUpb_FieldType_UInt64] = 8, + [kUpb_FieldType_Int32] = 4, + [kUpb_FieldType_Fixed64] = 8, + [kUpb_FieldType_Fixed32] = 4, + [kUpb_FieldType_Bool] = 1, + [kUpb_FieldType_String] = UPB_MAPTYPE_STRING, + [kUpb_FieldType_Group] = sizeof(void*), + [kUpb_FieldType_Message] = sizeof(void*), + [kUpb_FieldType_Bytes] = UPB_MAPTYPE_STRING, + [kUpb_FieldType_UInt32] = 4, + [kUpb_FieldType_Enum] = 4, + [kUpb_FieldType_SFixed32] = 4, + [kUpb_FieldType_SFixed64] = 8, + [kUpb_FieldType_SInt32] = 4, + [kUpb_FieldType_SInt64] = 8, + }; - return upb_MtDataEncoder_PutField(e, ptr, type, field_num, field_mod); + const upb_MiniTableField* key_field = &entry->fields[0]; + const upb_MiniTableField* val_field = &entry->fields[1]; + char key_size = kSizeInMap[key_field->UPB_PRIVATE(descriptortype)]; + char val_size = kSizeInMap[val_field->UPB_PRIVATE(descriptortype)]; + UPB_ASSERT(key_field->offset == offsetof(upb_MapEntryData, k)); + UPB_ASSERT(val_field->offset == offsetof(upb_MapEntryData, v)); + upb_Map* ret = _upb_Map_New(&d->arena, key_size, val_size); + if (!ret) _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); + return ret; } -char* upb_MtDataEncoder_EncodeMap(upb_MtDataEncoder* e, char* ptr, - upb_FieldType key_type, - upb_FieldType value_type, uint64_t key_mod, - uint64_t value_mod) { - upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr); - in->state.msg_state.msg_modifiers = 0; - in->state.msg_state.last_field_num = 0; - in->state.msg_state.oneof_state = kUpb_OneofState_NotStarted; - - ptr = upb_MtDataEncoder_PutRaw(e, ptr, kUpb_EncodedVersion_MapV1); - if (!ptr) return NULL; - - ptr = upb_MtDataEncoder_PutField(e, ptr, key_type, 1, key_mod); - if (!ptr) return NULL; - - return upb_MtDataEncoder_PutField(e, ptr, value_type, 2, value_mod); -} +static const char* _upb_Decoder_DecodeToMap(upb_Decoder* d, const char* ptr, + upb_Message* msg, + const upb_MiniTableSub* subs, + const upb_MiniTableField* field, + wireval* val) { + upb_Map** map_p = UPB_PTR_AT(msg, field->offset, upb_Map*); + upb_Map* map = *map_p; + upb_MapEntry ent; + UPB_ASSERT(upb_MiniTableField_Type(field) == kUpb_FieldType_Message); + const upb_MiniTable* entry = subs[field->UPB_PRIVATE(submsg_index)].submsg; -char* upb_MtDataEncoder_EncodeMessageSet(upb_MtDataEncoder* e, char* ptr) { - (void)upb_MtDataEncoder_GetInternal(e, ptr); - return upb_MtDataEncoder_PutRaw(e, ptr, kUpb_EncodedVersion_MessageSetV1); -} + UPB_ASSERT(entry); + UPB_ASSERT(entry->field_count == 2); + UPB_ASSERT(!upb_IsRepeatedOrMap(&entry->fields[0])); + UPB_ASSERT(!upb_IsRepeatedOrMap(&entry->fields[1])); -char* upb_MtDataEncoder_StartMessage(upb_MtDataEncoder* e, char* ptr, - uint64_t msg_mod) { - upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr); - in->state.msg_state.msg_modifiers = msg_mod; - in->state.msg_state.last_field_num = 0; - in->state.msg_state.oneof_state = kUpb_OneofState_NotStarted; + if (!map) { + map = _upb_Decoder_CreateMap(d, entry); + *map_p = map; + } - ptr = upb_MtDataEncoder_PutRaw(e, ptr, kUpb_EncodedVersion_MessageV1); - if (!ptr) return NULL; + // Parse map entry. + memset(&ent, 0, sizeof(ent)); - return upb_MtDataEncoder_PutModifier(e, ptr, msg_mod); -} + if (entry->fields[1].UPB_PRIVATE(descriptortype) == kUpb_FieldType_Message || + entry->fields[1].UPB_PRIVATE(descriptortype) == kUpb_FieldType_Group) { + // Create proactively to handle the case where it doesn't appear. + upb_TaggedMessagePtr msg; + _upb_Decoder_NewSubMessage(d, entry->subs, &entry->fields[1], &msg); + ent.data.v.val = upb_value_uintptr(msg); + } -static char* _upb_MtDataEncoder_MaybePutFieldSkip(upb_MtDataEncoder* e, - char* ptr, - uint32_t field_num) { - upb_MtDataEncoderInternal* in = (upb_MtDataEncoderInternal*)e->internal; - if (field_num <= in->state.msg_state.last_field_num) return NULL; - if (in->state.msg_state.last_field_num + 1 != field_num) { - // Put skip. - UPB_ASSERT(field_num > in->state.msg_state.last_field_num); - uint32_t skip = field_num - in->state.msg_state.last_field_num; - ptr = upb_MtDataEncoder_PutBase92Varint( - e, ptr, skip, kUpb_EncodedValue_MinSkip, kUpb_EncodedValue_MaxSkip); - if (!ptr) return NULL; + ptr = + _upb_Decoder_DecodeSubMessage(d, ptr, &ent.data, subs, field, val->size); + // check if ent had any unknown fields + size_t size; + upb_Message_GetUnknown(&ent.data, &size); + if (size != 0) { + char* buf; + size_t size; + uint32_t tag = ((uint32_t)field->number << 3) | kUpb_WireType_Delimited; + upb_EncodeStatus status = + upb_Encode(&ent.data, entry, 0, &d->arena, &buf, &size); + if (status != kUpb_EncodeStatus_Ok) { + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); + } + _upb_Decoder_AddUnknownVarints(d, msg, tag, size); + if (!_upb_Message_AddUnknown(msg, buf, size, &d->arena)) { + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); + } + } else { + if (_upb_Map_Insert(map, &ent.data.k, map->key_size, &ent.data.v, + map->val_size, + &d->arena) == kUpb_MapInsertStatus_OutOfMemory) { + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); + } } - in->state.msg_state.last_field_num = field_num; return ptr; } -static char* _upb_MtDataEncoder_PutFieldType(upb_MtDataEncoder* e, char* ptr, - upb_FieldType type, - uint64_t field_mod) { - static const char kUpb_TypeToEncoded[] = { - [kUpb_FieldType_Double] = kUpb_EncodedType_Double, - [kUpb_FieldType_Float] = kUpb_EncodedType_Float, - [kUpb_FieldType_Int64] = kUpb_EncodedType_Int64, - [kUpb_FieldType_UInt64] = kUpb_EncodedType_UInt64, - [kUpb_FieldType_Int32] = kUpb_EncodedType_Int32, - [kUpb_FieldType_Fixed64] = kUpb_EncodedType_Fixed64, - [kUpb_FieldType_Fixed32] = kUpb_EncodedType_Fixed32, - [kUpb_FieldType_Bool] = kUpb_EncodedType_Bool, - [kUpb_FieldType_String] = kUpb_EncodedType_String, - [kUpb_FieldType_Group] = kUpb_EncodedType_Group, - [kUpb_FieldType_Message] = kUpb_EncodedType_Message, - [kUpb_FieldType_Bytes] = kUpb_EncodedType_Bytes, - [kUpb_FieldType_UInt32] = kUpb_EncodedType_UInt32, - [kUpb_FieldType_Enum] = kUpb_EncodedType_OpenEnum, - [kUpb_FieldType_SFixed32] = kUpb_EncodedType_SFixed32, - [kUpb_FieldType_SFixed64] = kUpb_EncodedType_SFixed64, - [kUpb_FieldType_SInt32] = kUpb_EncodedType_SInt32, - [kUpb_FieldType_SInt64] = kUpb_EncodedType_SInt64, - }; +static const char* _upb_Decoder_DecodeToSubMessage( + upb_Decoder* d, const char* ptr, upb_Message* msg, + const upb_MiniTableSub* subs, const upb_MiniTableField* field, wireval* val, + int op) { + void* mem = UPB_PTR_AT(msg, field->offset, void); + int type = field->UPB_PRIVATE(descriptortype); - int encoded_type = kUpb_TypeToEncoded[type]; + if (UPB_UNLIKELY(op == kUpb_DecodeOp_Enum) && + !_upb_Decoder_CheckEnum(d, ptr, msg, + subs[field->UPB_PRIVATE(submsg_index)].subenum, + field, val)) { + return ptr; + } - if (field_mod & kUpb_FieldModifier_IsClosedEnum) { - UPB_ASSERT(type == kUpb_FieldType_Enum); - encoded_type = kUpb_EncodedType_ClosedEnum; + /* Set presence if necessary. */ + if (field->presence > 0) { + _upb_sethas_field(msg, field); + } else if (field->presence < 0) { + /* Oneof case */ + uint32_t* oneof_case = _upb_oneofcase_field(msg, field); + if (op == kUpb_DecodeOp_SubMessage && *oneof_case != field->number) { + memset(mem, 0, sizeof(void*)); + } + *oneof_case = field->number; } - if (field_mod & kUpb_FieldModifier_IsRepeated) { - // Repeated fields shift the type number up (unlike other modifiers which - // are bit flags). - encoded_type += kUpb_EncodedType_RepeatedBase; + /* Store into message. */ + switch (op) { + case kUpb_DecodeOp_SubMessage: { + upb_TaggedMessagePtr* submsgp = mem; + upb_Message* submsg; + if (*submsgp) { + submsg = _upb_Decoder_ReuseSubMessage(d, subs, field, submsgp); + } else { + submsg = _upb_Decoder_NewSubMessage(d, subs, field, submsgp); + } + if (UPB_UNLIKELY(type == kUpb_FieldType_Group)) { + ptr = _upb_Decoder_DecodeKnownGroup(d, ptr, submsg, subs, field); + } else { + ptr = _upb_Decoder_DecodeSubMessage(d, ptr, submsg, subs, field, + val->size); + } + break; + } + case kUpb_DecodeOp_String: + _upb_Decoder_VerifyUtf8(d, ptr, val->size); + /* Fallthrough. */ + case kUpb_DecodeOp_Bytes: + return _upb_Decoder_ReadString(d, ptr, val->size, mem); + case kUpb_DecodeOp_Scalar8Byte: + memcpy(mem, val, 8); + break; + case kUpb_DecodeOp_Enum: + case kUpb_DecodeOp_Scalar4Byte: + memcpy(mem, val, 4); + break; + case kUpb_DecodeOp_Scalar1Byte: + memcpy(mem, val, 1); + break; + default: + UPB_UNREACHABLE(); } - return upb_MtDataEncoder_Put(e, ptr, encoded_type); + return ptr; } -static char* _upb_MtDataEncoder_MaybePutModifiers(upb_MtDataEncoder* e, - char* ptr, upb_FieldType type, - uint64_t field_mod) { - upb_MtDataEncoderInternal* in = (upb_MtDataEncoderInternal*)e->internal; - uint32_t encoded_modifiers = 0; - if ((field_mod & kUpb_FieldModifier_IsRepeated) && - upb_FieldType_IsPackable(type)) { - bool field_is_packed = field_mod & kUpb_FieldModifier_IsPacked; - bool default_is_packed = in->state.msg_state.msg_modifiers & - kUpb_MessageModifier_DefaultIsPacked; - if (field_is_packed != default_is_packed) { - encoded_modifiers |= kUpb_EncodedFieldModifier_FlipPacked; - } +UPB_NOINLINE +const char* _upb_Decoder_CheckRequired(upb_Decoder* d, const char* ptr, + const upb_Message* msg, + const upb_MiniTable* l) { + assert(l->required_count); + if (UPB_LIKELY((d->options & kUpb_DecodeOption_CheckRequired) == 0)) { + return ptr; } - - if (field_mod & kUpb_FieldModifier_IsProto3Singular) { - encoded_modifiers |= kUpb_EncodedFieldModifier_IsProto3Singular; + uint64_t msg_head; + memcpy(&msg_head, msg, 8); + msg_head = _upb_BigEndian_Swap64(msg_head); + if (upb_MiniTable_requiredmask(l) & ~msg_head) { + d->missing_required = true; } + return ptr; +} - if (field_mod & kUpb_FieldModifier_IsRequired) { - encoded_modifiers |= kUpb_EncodedFieldModifier_IsRequired; +UPB_FORCEINLINE +static bool _upb_Decoder_TryFastDispatch(upb_Decoder* d, const char** ptr, + upb_Message* msg, + const upb_MiniTable* layout) { +#if UPB_FASTTABLE + if (layout && layout->table_mask != (unsigned char)-1) { + uint16_t tag = _upb_FastDecoder_LoadTag(*ptr); + intptr_t table = decode_totable(layout); + *ptr = _upb_FastDecoder_TagDispatch(d, *ptr, msg, table, 0, tag); + return true; } +#endif + return false; +} - return upb_MtDataEncoder_PutModifier(e, ptr, encoded_modifiers); +static const char* upb_Decoder_SkipField(upb_Decoder* d, const char* ptr, + uint32_t tag) { + int field_number = tag >> 3; + int wire_type = tag & 7; + switch (wire_type) { + case kUpb_WireType_Varint: { + uint64_t val; + return _upb_Decoder_DecodeVarint(d, ptr, &val); + } + case kUpb_WireType_64Bit: + return ptr + 8; + case kUpb_WireType_32Bit: + return ptr + 4; + case kUpb_WireType_Delimited: { + uint32_t size; + ptr = upb_Decoder_DecodeSize(d, ptr, &size); + return ptr + size; + } + case kUpb_WireType_StartGroup: + return _upb_Decoder_DecodeUnknownGroup(d, ptr, field_number); + default: + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); + } } -char* upb_MtDataEncoder_PutField(upb_MtDataEncoder* e, char* ptr, - upb_FieldType type, uint32_t field_num, - uint64_t field_mod) { - upb_MtDataEncoder_GetInternal(e, ptr); +enum { + kStartItemTag = ((kUpb_MsgSet_Item << 3) | kUpb_WireType_StartGroup), + kEndItemTag = ((kUpb_MsgSet_Item << 3) | kUpb_WireType_EndGroup), + kTypeIdTag = ((kUpb_MsgSet_TypeId << 3) | kUpb_WireType_Varint), + kMessageTag = ((kUpb_MsgSet_Message << 3) | kUpb_WireType_Delimited), +}; - ptr = _upb_MtDataEncoder_MaybePutFieldSkip(e, ptr, field_num); - if (!ptr) return NULL; +static void upb_Decoder_AddKnownMessageSetItem( + upb_Decoder* d, upb_Message* msg, const upb_MiniTableExtension* item_mt, + const char* data, uint32_t size) { + upb_Message_Extension* ext = + _upb_Message_GetOrCreateExtension(msg, item_mt, &d->arena); + if (UPB_UNLIKELY(!ext)) { + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); + } + upb_Message* submsg = _upb_Decoder_NewSubMessage( + d, &ext->ext->sub, &ext->ext->field, (upb_TaggedMessagePtr*)&ext->data); + upb_DecodeStatus status = upb_Decode(data, size, submsg, item_mt->sub.submsg, + d->extreg, d->options, &d->arena); + if (status != kUpb_DecodeStatus_Ok) _upb_Decoder_ErrorJmp(d, status); +} - ptr = _upb_MtDataEncoder_PutFieldType(e, ptr, type, field_mod); - if (!ptr) return NULL; +static void upb_Decoder_AddUnknownMessageSetItem(upb_Decoder* d, + upb_Message* msg, + uint32_t type_id, + const char* message_data, + uint32_t message_size) { + char buf[60]; + char* ptr = buf; + ptr = upb_Decoder_EncodeVarint32(kStartItemTag, ptr); + ptr = upb_Decoder_EncodeVarint32(kTypeIdTag, ptr); + ptr = upb_Decoder_EncodeVarint32(type_id, ptr); + ptr = upb_Decoder_EncodeVarint32(kMessageTag, ptr); + ptr = upb_Decoder_EncodeVarint32(message_size, ptr); + char* split = ptr; - return _upb_MtDataEncoder_MaybePutModifiers(e, ptr, type, field_mod); + ptr = upb_Decoder_EncodeVarint32(kEndItemTag, ptr); + char* end = ptr; + + if (!_upb_Message_AddUnknown(msg, buf, split - buf, &d->arena) || + !_upb_Message_AddUnknown(msg, message_data, message_size, &d->arena) || + !_upb_Message_AddUnknown(msg, split, end - split, &d->arena)) { + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); + } } -char* upb_MtDataEncoder_StartOneof(upb_MtDataEncoder* e, char* ptr) { - upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr); - if (in->state.msg_state.oneof_state == kUpb_OneofState_NotStarted) { - ptr = upb_MtDataEncoder_Put(e, ptr, _upb_FromBase92(kUpb_EncodedValue_End)); +static void upb_Decoder_AddMessageSetItem(upb_Decoder* d, upb_Message* msg, + const upb_MiniTable* t, + uint32_t type_id, const char* data, + uint32_t size) { + const upb_MiniTableExtension* item_mt = + upb_ExtensionRegistry_Lookup(d->extreg, t, type_id); + if (item_mt) { + upb_Decoder_AddKnownMessageSetItem(d, msg, item_mt, data, size); } else { - ptr = upb_MtDataEncoder_Put( - e, ptr, _upb_FromBase92(kUpb_EncodedValue_OneofSeparator)); + upb_Decoder_AddUnknownMessageSetItem(d, msg, type_id, data, size); } - in->state.msg_state.oneof_state = kUpb_OneofState_StartedOneof; - return ptr; } -char* upb_MtDataEncoder_PutOneofField(upb_MtDataEncoder* e, char* ptr, - uint32_t field_num) { - upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr); - if (in->state.msg_state.oneof_state == kUpb_OneofState_EmittedOneofField) { - ptr = upb_MtDataEncoder_Put( - e, ptr, _upb_FromBase92(kUpb_EncodedValue_FieldSeparator)); - if (!ptr) return NULL; +static const char* upb_Decoder_DecodeMessageSetItem( + upb_Decoder* d, const char* ptr, upb_Message* msg, + const upb_MiniTable* layout) { + uint32_t type_id = 0; + upb_StringView preserved = {NULL, 0}; + typedef enum { + kUpb_HaveId = 1 << 0, + kUpb_HavePayload = 1 << 1, + } StateMask; + StateMask state_mask = 0; + while (!_upb_Decoder_IsDone(d, &ptr)) { + uint32_t tag; + ptr = _upb_Decoder_DecodeTag(d, ptr, &tag); + switch (tag) { + case kEndItemTag: + return ptr; + case kTypeIdTag: { + uint64_t tmp; + ptr = _upb_Decoder_DecodeVarint(d, ptr, &tmp); + if (state_mask & kUpb_HaveId) break; // Ignore dup. + state_mask |= kUpb_HaveId; + type_id = tmp; + if (state_mask & kUpb_HavePayload) { + upb_Decoder_AddMessageSetItem(d, msg, layout, type_id, preserved.data, + preserved.size); + } + break; + } + case kMessageTag: { + uint32_t size; + ptr = upb_Decoder_DecodeSize(d, ptr, &size); + const char* data = ptr; + ptr += size; + if (state_mask & kUpb_HavePayload) break; // Ignore dup. + state_mask |= kUpb_HavePayload; + if (state_mask & kUpb_HaveId) { + upb_Decoder_AddMessageSetItem(d, msg, layout, type_id, data, size); + } else { + // Out of order, we must preserve the payload. + preserved.data = data; + preserved.size = size; + } + break; + } + default: + // We do not preserve unexpected fields inside a message set item. + ptr = upb_Decoder_SkipField(d, ptr, tag); + break; + } } - ptr = upb_MtDataEncoder_PutBase92Varint(e, ptr, field_num, _upb_ToBase92(0), - _upb_ToBase92(63)); - in->state.msg_state.oneof_state = kUpb_OneofState_EmittedOneofField; - return ptr; + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); } -char* upb_MtDataEncoder_StartEnum(upb_MtDataEncoder* e, char* ptr) { - upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr); - in->state.enum_state.present_values_mask = 0; - in->state.enum_state.last_written_value = 0; +static const upb_MiniTableField* _upb_Decoder_FindField(upb_Decoder* d, + const upb_MiniTable* t, + uint32_t field_number, + int* last_field_index) { + static upb_MiniTableField none = { + 0, 0, 0, 0, kUpb_FakeFieldType_FieldNotFound, 0}; + if (t == NULL) return &none; - return upb_MtDataEncoder_PutRaw(e, ptr, kUpb_EncodedVersion_EnumV1); -} + size_t idx = ((size_t)field_number) - 1; // 0 wraps to SIZE_MAX + if (idx < t->dense_below) { + /* Fastest case: index into dense fields. */ + goto found; + } -static char* upb_MtDataEncoder_FlushDenseEnumMask(upb_MtDataEncoder* e, - char* ptr) { - upb_MtDataEncoderInternal* in = (upb_MtDataEncoderInternal*)e->internal; - ptr = upb_MtDataEncoder_Put(e, ptr, in->state.enum_state.present_values_mask); - in->state.enum_state.present_values_mask = 0; - in->state.enum_state.last_written_value += 5; - return ptr; -} + if (t->dense_below < t->field_count) { + /* Linear search non-dense fields. Resume scanning from last_field_index + * since fields are usually in order. */ + size_t last = *last_field_index; + for (idx = last; idx < t->field_count; idx++) { + if (t->fields[idx].number == field_number) { + goto found; + } + } -char* upb_MtDataEncoder_PutEnumValue(upb_MtDataEncoder* e, char* ptr, - uint32_t val) { - // TODO(b/229641772): optimize this encoding. - upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr); - UPB_ASSERT(val >= in->state.enum_state.last_written_value); - uint32_t delta = val - in->state.enum_state.last_written_value; - if (delta >= 5 && in->state.enum_state.present_values_mask) { - ptr = upb_MtDataEncoder_FlushDenseEnumMask(e, ptr); - if (!ptr) { - return NULL; + for (idx = t->dense_below; idx < last; idx++) { + if (t->fields[idx].number == field_number) { + goto found; + } } - delta -= 5; } - if (delta >= 5) { - ptr = upb_MtDataEncoder_PutBase92Varint( - e, ptr, delta, kUpb_EncodedValue_MinSkip, kUpb_EncodedValue_MaxSkip); - in->state.enum_state.last_written_value += delta; - delta = 0; + if (d->extreg) { + switch (t->ext) { + case kUpb_ExtMode_Extendable: { + const upb_MiniTableExtension* ext = + upb_ExtensionRegistry_Lookup(d->extreg, t, field_number); + if (ext) return &ext->field; + break; + } + case kUpb_ExtMode_IsMessageSet: + if (field_number == kUpb_MsgSet_Item) { + static upb_MiniTableField item = { + 0, 0, 0, 0, kUpb_FakeFieldType_MessageSetItem, 0}; + return &item; + } + break; + } } - UPB_ASSERT((in->state.enum_state.present_values_mask >> delta) == 0); - in->state.enum_state.present_values_mask |= 1ULL << delta; - return ptr; -} + return &none; /* Unknown field. */ -char* upb_MtDataEncoder_EndEnum(upb_MtDataEncoder* e, char* ptr) { - upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr); - if (!in->state.enum_state.present_values_mask) return ptr; - return upb_MtDataEncoder_FlushDenseEnumMask(e, ptr); +found: + UPB_ASSERT(t->fields[idx].number == field_number); + *last_field_index = idx; + return &t->fields[idx]; } +int _upb_Decoder_GetVarintOp(const upb_MiniTableField* field) { + static const int8_t kVarintOps[] = { + [kUpb_FakeFieldType_FieldNotFound] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_Double] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_Float] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_Int64] = kUpb_DecodeOp_Scalar8Byte, + [kUpb_FieldType_UInt64] = kUpb_DecodeOp_Scalar8Byte, + [kUpb_FieldType_Int32] = kUpb_DecodeOp_Scalar4Byte, + [kUpb_FieldType_Fixed64] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_Fixed32] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_Bool] = kUpb_DecodeOp_Scalar1Byte, + [kUpb_FieldType_String] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_Group] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_Message] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_Bytes] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_UInt32] = kUpb_DecodeOp_Scalar4Byte, + [kUpb_FieldType_Enum] = kUpb_DecodeOp_Enum, + [kUpb_FieldType_SFixed32] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_SFixed64] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_SInt32] = kUpb_DecodeOp_Scalar4Byte, + [kUpb_FieldType_SInt64] = kUpb_DecodeOp_Scalar8Byte, + [kUpb_FakeFieldType_MessageSetItem] = kUpb_DecodeOp_UnknownField, + }; -const char _kUpb_ToBase92[] = { - ' ', '!', '#', '$', '%', '&', '(', ')', '*', '+', ',', '-', '.', '/', - '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<', '=', - '>', '?', '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', - 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', - 'Z', '[', ']', '^', '_', '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', - 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', - 'w', 'x', 'y', 'z', '{', '|', '}', '~', -}; - -const int8_t _kUpb_FromBase92[] = { - 0, 1, -1, 2, 3, 4, 5, -1, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, - 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, - 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, - 55, 56, 57, -1, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, - 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, -}; - - - -// Must be last. - -typedef struct { - upb_MdDecoder base; - upb_Arena* arena; - upb_MiniTableEnum* enum_table; - uint32_t enum_value_count; - uint32_t enum_data_count; - uint32_t enum_data_capacity; -} upb_MdEnumDecoder; - -static size_t upb_MiniTableEnum_Size(size_t count) { - return sizeof(upb_MiniTableEnum) + count * sizeof(uint32_t); + return kVarintOps[field->UPB_PRIVATE(descriptortype)]; } -static upb_MiniTableEnum* _upb_MiniTable_AddEnumDataMember(upb_MdEnumDecoder* d, - uint32_t val) { - if (d->enum_data_count == d->enum_data_capacity) { - size_t old_sz = upb_MiniTableEnum_Size(d->enum_data_capacity); - d->enum_data_capacity = UPB_MAX(2, d->enum_data_capacity * 2); - size_t new_sz = upb_MiniTableEnum_Size(d->enum_data_capacity); - d->enum_table = upb_Arena_Realloc(d->arena, d->enum_table, old_sz, new_sz); - upb_MdDecoder_CheckOutOfMemory(&d->base, d->enum_table); +UPB_FORCEINLINE +static void _upb_Decoder_CheckUnlinked(upb_Decoder* d, const upb_MiniTable* mt, + const upb_MiniTableField* field, + int* op) { + // If sub-message is not linked, treat as unknown. + if (field->mode & kUpb_LabelFlags_IsExtension) return; + const upb_MiniTableSub* sub = &mt->subs[field->UPB_PRIVATE(submsg_index)]; + if ((d->options & kUpb_DecodeOption_ExperimentalAllowUnlinked) || + sub->submsg != &_kUpb_MiniTable_Empty) { + return; } - d->enum_table->data[d->enum_data_count++] = val; - return d->enum_table; -} - -static void upb_MiniTableEnum_BuildValue(upb_MdEnumDecoder* d, uint32_t val) { - upb_MiniTableEnum* table = d->enum_table; - d->enum_value_count++; - if (table->value_count || (val > 512 && d->enum_value_count < val / 32)) { - if (table->value_count == 0) { - assert(d->enum_data_count == table->mask_limit / 32); - } - table = _upb_MiniTable_AddEnumDataMember(d, val); - table->value_count++; - } else { - uint32_t new_mask_limit = ((val / 32) + 1) * 32; - while (table->mask_limit < new_mask_limit) { - table = _upb_MiniTable_AddEnumDataMember(d, 0); - table->mask_limit += 32; - } - table->data[val / 32] |= 1ULL << (val % 32); +#ifndef NDEBUG + const upb_MiniTableField* oneof = upb_MiniTable_GetOneof(mt, field); + if (oneof) { + // All other members of the oneof must be message fields that are also + // unlinked. + do { + assert(upb_MiniTableField_CType(oneof) == kUpb_CType_Message); + const upb_MiniTableSub* oneof_sub = + &mt->subs[oneof->UPB_PRIVATE(submsg_index)]; + assert(!oneof_sub); + } while (upb_MiniTable_NextOneofField(mt, &oneof)); } +#endif // NDEBUG + *op = kUpb_DecodeOp_UnknownField; } -static upb_MiniTableEnum* upb_MtDecoder_DoBuildMiniTableEnum( - upb_MdEnumDecoder* d, const char* data, size_t len) { - // If the string is non-empty then it must begin with a version tag. - if (len) { - if (*data != kUpb_EncodedVersion_EnumV1) { - upb_MdDecoder_ErrorJmp(&d->base, "Invalid enum version: %c", *data); - } - data++; - len--; - } - - upb_MdDecoder_CheckOutOfMemory(&d->base, d->enum_table); - - // Guarantee at least 64 bits of mask without checking mask size. - d->enum_table->mask_limit = 64; - d->enum_table = _upb_MiniTable_AddEnumDataMember(d, 0); - d->enum_table = _upb_MiniTable_AddEnumDataMember(d, 0); +int _upb_Decoder_GetDelimitedOp(upb_Decoder* d, const upb_MiniTable* mt, + const upb_MiniTableField* field) { + enum { kRepeatedBase = 19 }; - d->enum_table->value_count = 0; + static const int8_t kDelimitedOps[] = { + /* For non-repeated field type. */ + [kUpb_FakeFieldType_FieldNotFound] = + kUpb_DecodeOp_UnknownField, // Field not found. + [kUpb_FieldType_Double] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_Float] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_Int64] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_UInt64] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_Int32] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_Fixed64] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_Fixed32] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_Bool] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_String] = kUpb_DecodeOp_String, + [kUpb_FieldType_Group] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_Message] = kUpb_DecodeOp_SubMessage, + [kUpb_FieldType_Bytes] = kUpb_DecodeOp_Bytes, + [kUpb_FieldType_UInt32] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_Enum] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_SFixed32] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_SFixed64] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_SInt32] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_SInt64] = kUpb_DecodeOp_UnknownField, + [kUpb_FakeFieldType_MessageSetItem] = kUpb_DecodeOp_UnknownField, + // For repeated field type. */ + [kRepeatedBase + kUpb_FieldType_Double] = OP_FIXPCK_LG2(3), + [kRepeatedBase + kUpb_FieldType_Float] = OP_FIXPCK_LG2(2), + [kRepeatedBase + kUpb_FieldType_Int64] = OP_VARPCK_LG2(3), + [kRepeatedBase + kUpb_FieldType_UInt64] = OP_VARPCK_LG2(3), + [kRepeatedBase + kUpb_FieldType_Int32] = OP_VARPCK_LG2(2), + [kRepeatedBase + kUpb_FieldType_Fixed64] = OP_FIXPCK_LG2(3), + [kRepeatedBase + kUpb_FieldType_Fixed32] = OP_FIXPCK_LG2(2), + [kRepeatedBase + kUpb_FieldType_Bool] = OP_VARPCK_LG2(0), + [kRepeatedBase + kUpb_FieldType_String] = kUpb_DecodeOp_String, + [kRepeatedBase + kUpb_FieldType_Group] = kUpb_DecodeOp_SubMessage, + [kRepeatedBase + kUpb_FieldType_Message] = kUpb_DecodeOp_SubMessage, + [kRepeatedBase + kUpb_FieldType_Bytes] = kUpb_DecodeOp_Bytes, + [kRepeatedBase + kUpb_FieldType_UInt32] = OP_VARPCK_LG2(2), + [kRepeatedBase + kUpb_FieldType_Enum] = kUpb_DecodeOp_PackedEnum, + [kRepeatedBase + kUpb_FieldType_SFixed32] = OP_FIXPCK_LG2(2), + [kRepeatedBase + kUpb_FieldType_SFixed64] = OP_FIXPCK_LG2(3), + [kRepeatedBase + kUpb_FieldType_SInt32] = OP_VARPCK_LG2(2), + [kRepeatedBase + kUpb_FieldType_SInt64] = OP_VARPCK_LG2(3), + // Omitting kUpb_FakeFieldType_MessageSetItem, because we never emit a + // repeated msgset type + }; - const char* ptr = data; - uint32_t base = 0; + int ndx = field->UPB_PRIVATE(descriptortype); + if (upb_FieldMode_Get(field) == kUpb_FieldMode_Array) ndx += kRepeatedBase; + int op = kDelimitedOps[ndx]; - while (ptr < d->base.end) { - char ch = *ptr++; - if (ch <= kUpb_EncodedValue_MaxEnumMask) { - uint32_t mask = _upb_FromBase92(ch); - for (int i = 0; i < 5; i++, base++, mask >>= 1) { - if (mask & 1) upb_MiniTableEnum_BuildValue(d, base); - } - } else if (kUpb_EncodedValue_MinSkip <= ch && - ch <= kUpb_EncodedValue_MaxSkip) { - uint32_t skip; - ptr = upb_MdDecoder_DecodeBase92Varint(&d->base, ptr, ch, - kUpb_EncodedValue_MinSkip, - kUpb_EncodedValue_MaxSkip, &skip); - base += skip; - } else { - upb_MdDecoder_ErrorJmp(&d->base, "Unexpected character: %c", ch); - } + if (op == kUpb_DecodeOp_SubMessage) { + _upb_Decoder_CheckUnlinked(d, mt, field, &op); } - return d->enum_table; + return op; } -static upb_MiniTableEnum* upb_MtDecoder_BuildMiniTableEnum( - upb_MdEnumDecoder* const decoder, const char* const data, size_t const len) { - if (UPB_SETJMP(decoder->base.err) != 0) return NULL; - return upb_MtDecoder_DoBuildMiniTableEnum(decoder, data, len); -} +UPB_FORCEINLINE +static const char* _upb_Decoder_DecodeWireValue(upb_Decoder* d, const char* ptr, + const upb_MiniTable* mt, + const upb_MiniTableField* field, + int wire_type, wireval* val, + int* op) { + static const unsigned kFixed32OkMask = (1 << kUpb_FieldType_Float) | + (1 << kUpb_FieldType_Fixed32) | + (1 << kUpb_FieldType_SFixed32); -upb_MiniTableEnum* upb_MiniDescriptor_BuildEnum(const char* data, size_t len, - upb_Arena* arena, - upb_Status* status) { - upb_MdEnumDecoder decoder = { - .base = - { - .end = UPB_PTRADD(data, len), - .status = status, - }, - .arena = arena, - .enum_table = upb_Arena_Malloc(arena, upb_MiniTableEnum_Size(2)), - .enum_value_count = 0, - .enum_data_count = 0, - .enum_data_capacity = 1, - }; + static const unsigned kFixed64OkMask = (1 << kUpb_FieldType_Double) | + (1 << kUpb_FieldType_Fixed64) | + (1 << kUpb_FieldType_SFixed64); - return upb_MtDecoder_BuildMiniTableEnum(&decoder, data, len); + switch (wire_type) { + case kUpb_WireType_Varint: + ptr = _upb_Decoder_DecodeVarint(d, ptr, &val->uint64_val); + *op = _upb_Decoder_GetVarintOp(field); + _upb_Decoder_Munge(field->UPB_PRIVATE(descriptortype), val); + return ptr; + case kUpb_WireType_32Bit: + *op = kUpb_DecodeOp_Scalar4Byte; + if (((1 << field->UPB_PRIVATE(descriptortype)) & kFixed32OkMask) == 0) { + *op = kUpb_DecodeOp_UnknownField; + } + return upb_WireReader_ReadFixed32(ptr, &val->uint32_val); + case kUpb_WireType_64Bit: + *op = kUpb_DecodeOp_Scalar8Byte; + if (((1 << field->UPB_PRIVATE(descriptortype)) & kFixed64OkMask) == 0) { + *op = kUpb_DecodeOp_UnknownField; + } + return upb_WireReader_ReadFixed64(ptr, &val->uint64_val); + case kUpb_WireType_Delimited: + ptr = upb_Decoder_DecodeSize(d, ptr, &val->size); + *op = _upb_Decoder_GetDelimitedOp(d, mt, field); + return ptr; + case kUpb_WireType_StartGroup: + val->uint32_val = field->number; + if (field->UPB_PRIVATE(descriptortype) == kUpb_FieldType_Group) { + *op = kUpb_DecodeOp_SubMessage; + _upb_Decoder_CheckUnlinked(d, mt, field, op); + } else if (field->UPB_PRIVATE(descriptortype) == + kUpb_FakeFieldType_MessageSetItem) { + *op = kUpb_DecodeOp_MessageSetItem; + } else { + *op = kUpb_DecodeOp_UnknownField; + } + return ptr; + default: + break; + } + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); } +UPB_FORCEINLINE +static const char* _upb_Decoder_DecodeKnownField( + upb_Decoder* d, const char* ptr, upb_Message* msg, + const upb_MiniTable* layout, const upb_MiniTableField* field, int op, + wireval* val) { + const upb_MiniTableSub* subs = layout->subs; + uint8_t mode = field->mode; -#include -#include - + if (UPB_UNLIKELY(mode & kUpb_LabelFlags_IsExtension)) { + const upb_MiniTableExtension* ext_layout = + (const upb_MiniTableExtension*)field; + upb_Message_Extension* ext = + _upb_Message_GetOrCreateExtension(msg, ext_layout, &d->arena); + if (UPB_UNLIKELY(!ext)) { + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); + } + d->unknown_msg = msg; + msg = &ext->data; + subs = &ext->ext->sub; + } -// Must be last. + switch (mode & kUpb_FieldMode_Mask) { + case kUpb_FieldMode_Array: + return _upb_Decoder_DecodeToArray(d, ptr, msg, subs, field, val, op); + case kUpb_FieldMode_Map: + return _upb_Decoder_DecodeToMap(d, ptr, msg, subs, field, val); + case kUpb_FieldMode_Scalar: + return _upb_Decoder_DecodeToSubMessage(d, ptr, msg, subs, field, val, op); + default: + UPB_UNREACHABLE(); + } +} -// Note: we sort by this number when calculating layout order. -typedef enum { - kUpb_LayoutItemType_OneofCase, // Oneof case. - kUpb_LayoutItemType_OneofField, // Oneof field data. - kUpb_LayoutItemType_Field, // Non-oneof field data. +static const char* _upb_Decoder_ReverseSkipVarint(const char* ptr, + uint32_t val) { + uint32_t seen = 0; + do { + ptr--; + seen <<= 7; + seen |= *ptr & 0x7f; + } while (seen != val); + return ptr; +} - kUpb_LayoutItemType_Max = kUpb_LayoutItemType_Field, -} upb_LayoutItemType; +static const char* _upb_Decoder_DecodeUnknownField(upb_Decoder* d, + const char* ptr, + upb_Message* msg, + int field_number, + int wire_type, wireval val) { + if (field_number == 0) _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); -#define kUpb_LayoutItem_IndexSentinel ((uint16_t)-1) + // Since unknown fields are the uncommon case, we do a little extra work here + // to walk backwards through the buffer to find the field start. This frees + // up a register in the fast paths (when the field is known), which leads to + // significant speedups in benchmarks. + const char* start = ptr; -typedef struct { - // Index of the corresponding field. When this is a oneof field, the field's - // offset will be the index of the next field in a linked list. - uint16_t field_index; - uint16_t offset; - upb_FieldRep rep; - upb_LayoutItemType type; -} upb_LayoutItem; + if (wire_type == kUpb_WireType_Delimited) ptr += val.size; + if (msg) { + switch (wire_type) { + case kUpb_WireType_Varint: + case kUpb_WireType_Delimited: + start--; + while (start[-1] & 0x80) start--; + break; + case kUpb_WireType_32Bit: + start -= 4; + break; + case kUpb_WireType_64Bit: + start -= 8; + break; + default: + break; + } -typedef struct { - upb_LayoutItem* data; - size_t size; - size_t capacity; -} upb_LayoutItemVector; + assert(start == d->debug_valstart); + uint32_t tag = ((uint32_t)field_number << 3) | wire_type; + start = _upb_Decoder_ReverseSkipVarint(start, tag); + assert(start == d->debug_tagstart); -typedef struct { - upb_MdDecoder base; - upb_MiniTable* table; - upb_MiniTableField* fields; - upb_MiniTablePlatform platform; - upb_LayoutItemVector vec; - upb_Arena* arena; -} upb_MtDecoder; + if (wire_type == kUpb_WireType_StartGroup) { + d->unknown = start; + d->unknown_msg = msg; + ptr = _upb_Decoder_DecodeUnknownGroup(d, ptr, field_number); + start = d->unknown; + d->unknown = NULL; + } + if (!_upb_Message_AddUnknown(msg, start, ptr - start, &d->arena)) { + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); + } + } else if (wire_type == kUpb_WireType_StartGroup) { + ptr = _upb_Decoder_DecodeUnknownGroup(d, ptr, field_number); + } + return ptr; +} -// In each field's offset, we temporarily store a presence classifier: -enum PresenceClass { - kNoPresence = 0, - kHasbitPresence = 1, - kRequiredPresence = 2, - kOneofBase = 3, - // Negative values refer to a specific oneof with that number. Positive - // values >= kOneofBase indicate that this field is in a oneof, and specify - // the next field in this oneof's linked list. -}; +UPB_NOINLINE +static const char* _upb_Decoder_DecodeMessage(upb_Decoder* d, const char* ptr, + upb_Message* msg, + const upb_MiniTable* layout) { + int last_field_index = 0; -static bool upb_MtDecoder_FieldIsPackable(upb_MiniTableField* field) { - return (field->mode & kUpb_FieldMode_Array) && - upb_FieldType_IsPackable(field->UPB_PRIVATE(descriptortype)); -} +#if UPB_FASTTABLE + // The first time we want to skip fast dispatch, because we may have just been + // invoked by the fast parser to handle a case that it bailed on. + if (!_upb_Decoder_IsDone(d, &ptr)) goto nofast; +#endif -typedef struct { - uint16_t submsg_count; - uint16_t subenum_count; -} upb_SubCounts; + while (!_upb_Decoder_IsDone(d, &ptr)) { + uint32_t tag; + const upb_MiniTableField* field; + int field_number; + int wire_type; + wireval val; + int op; -static void upb_MiniTable_SetTypeAndSub(upb_MiniTableField* field, - upb_FieldType type, - upb_SubCounts* sub_counts, - uint64_t msg_modifiers, - bool is_proto3_enum) { - if (is_proto3_enum) { - UPB_ASSERT(type == kUpb_FieldType_Enum); - type = kUpb_FieldType_Int32; - field->mode |= kUpb_LabelFlags_IsAlternate; - } else if (type == kUpb_FieldType_String && - !(msg_modifiers & kUpb_MessageModifier_ValidateUtf8)) { - type = kUpb_FieldType_Bytes; - field->mode |= kUpb_LabelFlags_IsAlternate; - } + if (_upb_Decoder_TryFastDispatch(d, &ptr, msg, layout)) break; - field->UPB_PRIVATE(descriptortype) = type; +#if UPB_FASTTABLE + nofast: +#endif - if (upb_MtDecoder_FieldIsPackable(field) && - (msg_modifiers & kUpb_MessageModifier_DefaultIsPacked)) { - field->mode |= kUpb_LabelFlags_IsPacked; - } +#ifndef NDEBUG + d->debug_tagstart = ptr; +#endif - if (type == kUpb_FieldType_Message || type == kUpb_FieldType_Group) { - field->UPB_PRIVATE(submsg_index) = sub_counts->submsg_count++; - } else if (type == kUpb_FieldType_Enum) { - // We will need to update this later once we know the total number of - // submsg fields. - field->UPB_PRIVATE(submsg_index) = sub_counts->subenum_count++; - } else { - field->UPB_PRIVATE(submsg_index) = kUpb_NoSub; - } -} + UPB_ASSERT(ptr < d->input.limit_ptr); + ptr = _upb_Decoder_DecodeTag(d, ptr, &tag); + field_number = tag >> 3; + wire_type = tag & 7; -static const char kUpb_EncodedToType[] = { - [kUpb_EncodedType_Double] = kUpb_FieldType_Double, - [kUpb_EncodedType_Float] = kUpb_FieldType_Float, - [kUpb_EncodedType_Int64] = kUpb_FieldType_Int64, - [kUpb_EncodedType_UInt64] = kUpb_FieldType_UInt64, - [kUpb_EncodedType_Int32] = kUpb_FieldType_Int32, - [kUpb_EncodedType_Fixed64] = kUpb_FieldType_Fixed64, - [kUpb_EncodedType_Fixed32] = kUpb_FieldType_Fixed32, - [kUpb_EncodedType_Bool] = kUpb_FieldType_Bool, - [kUpb_EncodedType_String] = kUpb_FieldType_String, - [kUpb_EncodedType_Group] = kUpb_FieldType_Group, - [kUpb_EncodedType_Message] = kUpb_FieldType_Message, - [kUpb_EncodedType_Bytes] = kUpb_FieldType_Bytes, - [kUpb_EncodedType_UInt32] = kUpb_FieldType_UInt32, - [kUpb_EncodedType_OpenEnum] = kUpb_FieldType_Enum, - [kUpb_EncodedType_SFixed32] = kUpb_FieldType_SFixed32, - [kUpb_EncodedType_SFixed64] = kUpb_FieldType_SFixed64, - [kUpb_EncodedType_SInt32] = kUpb_FieldType_SInt32, - [kUpb_EncodedType_SInt64] = kUpb_FieldType_SInt64, - [kUpb_EncodedType_ClosedEnum] = kUpb_FieldType_Enum, -}; +#ifndef NDEBUG + d->debug_valstart = ptr; +#endif -static void upb_MiniTable_SetField(upb_MtDecoder* d, uint8_t ch, - upb_MiniTableField* field, - uint64_t msg_modifiers, - upb_SubCounts* sub_counts) { - static const char kUpb_EncodedToFieldRep[] = { - [kUpb_EncodedType_Double] = kUpb_FieldRep_8Byte, - [kUpb_EncodedType_Float] = kUpb_FieldRep_4Byte, - [kUpb_EncodedType_Int64] = kUpb_FieldRep_8Byte, - [kUpb_EncodedType_UInt64] = kUpb_FieldRep_8Byte, - [kUpb_EncodedType_Int32] = kUpb_FieldRep_4Byte, - [kUpb_EncodedType_Fixed64] = kUpb_FieldRep_8Byte, - [kUpb_EncodedType_Fixed32] = kUpb_FieldRep_4Byte, - [kUpb_EncodedType_Bool] = kUpb_FieldRep_1Byte, - [kUpb_EncodedType_String] = kUpb_FieldRep_StringView, - [kUpb_EncodedType_Bytes] = kUpb_FieldRep_StringView, - [kUpb_EncodedType_UInt32] = kUpb_FieldRep_4Byte, - [kUpb_EncodedType_OpenEnum] = kUpb_FieldRep_4Byte, - [kUpb_EncodedType_SFixed32] = kUpb_FieldRep_4Byte, - [kUpb_EncodedType_SFixed64] = kUpb_FieldRep_8Byte, - [kUpb_EncodedType_SInt32] = kUpb_FieldRep_4Byte, - [kUpb_EncodedType_SInt64] = kUpb_FieldRep_8Byte, - [kUpb_EncodedType_ClosedEnum] = kUpb_FieldRep_4Byte, - }; + if (wire_type == kUpb_WireType_EndGroup) { + d->end_group = field_number; + return ptr; + } - char pointer_rep = d->platform == kUpb_MiniTablePlatform_32Bit - ? kUpb_FieldRep_4Byte - : kUpb_FieldRep_8Byte; + field = _upb_Decoder_FindField(d, layout, field_number, &last_field_index); + ptr = _upb_Decoder_DecodeWireValue(d, ptr, layout, field, wire_type, &val, + &op); - int8_t type = _upb_FromBase92(ch); - if (ch >= _upb_ToBase92(kUpb_EncodedType_RepeatedBase)) { - type -= kUpb_EncodedType_RepeatedBase; - field->mode = kUpb_FieldMode_Array; - field->mode |= pointer_rep << kUpb_FieldRep_Shift; - field->offset = kNoPresence; - } else { - field->mode = kUpb_FieldMode_Scalar; - field->offset = kHasbitPresence; - if (type == kUpb_EncodedType_Group || type == kUpb_EncodedType_Message) { - field->mode |= pointer_rep << kUpb_FieldRep_Shift; - } else if ((unsigned long)type >= sizeof(kUpb_EncodedToFieldRep)) { - upb_MdDecoder_ErrorJmp(&d->base, "Invalid field type: %d", (int)type); + if (op >= 0) { + ptr = _upb_Decoder_DecodeKnownField(d, ptr, msg, layout, field, op, &val); } else { - field->mode |= kUpb_EncodedToFieldRep[type] << kUpb_FieldRep_Shift; + switch (op) { + case kUpb_DecodeOp_UnknownField: + ptr = _upb_Decoder_DecodeUnknownField(d, ptr, msg, field_number, + wire_type, val); + break; + case kUpb_DecodeOp_MessageSetItem: + ptr = upb_Decoder_DecodeMessageSetItem(d, ptr, msg, layout); + break; + } } } - if ((unsigned long)type >= sizeof(kUpb_EncodedToType)) { - upb_MdDecoder_ErrorJmp(&d->base, "Invalid field type: %d", (int)type); - } - upb_MiniTable_SetTypeAndSub(field, kUpb_EncodedToType[type], sub_counts, - msg_modifiers, type == kUpb_EncodedType_OpenEnum); -} -static void upb_MtDecoder_ModifyField(upb_MtDecoder* d, - uint32_t message_modifiers, - uint32_t field_modifiers, - upb_MiniTableField* field) { - if (field_modifiers & kUpb_EncodedFieldModifier_FlipPacked) { - if (!upb_MtDecoder_FieldIsPackable(field)) { - upb_MdDecoder_ErrorJmp(&d->base, - "Cannot flip packed on unpackable field %" PRIu32, - field->number); - } - field->mode ^= kUpb_LabelFlags_IsPacked; - } + return UPB_UNLIKELY(layout && layout->required_count) + ? _upb_Decoder_CheckRequired(d, ptr, msg, layout) + : ptr; +} - bool singular = field_modifiers & kUpb_EncodedFieldModifier_IsProto3Singular; - bool required = field_modifiers & kUpb_EncodedFieldModifier_IsRequired; +const char* _upb_FastDecoder_DecodeGeneric(struct upb_Decoder* d, + const char* ptr, upb_Message* msg, + intptr_t table, uint64_t hasbits, + uint64_t data) { + (void)data; + *(uint32_t*)msg |= hasbits; + return _upb_Decoder_DecodeMessage(d, ptr, msg, decode_totablep(table)); +} - // Validate. - if ((singular || required) && field->offset != kHasbitPresence) { - upb_MdDecoder_ErrorJmp(&d->base, - "Invalid modifier(s) for repeated field %" PRIu32, - field->number); - } - if (singular && required) { - upb_MdDecoder_ErrorJmp( - &d->base, "Field %" PRIu32 " cannot be both singular and required", - field->number); +static upb_DecodeStatus _upb_Decoder_DecodeTop(struct upb_Decoder* d, + const char* buf, void* msg, + const upb_MiniTable* l) { + if (!_upb_Decoder_TryFastDispatch(d, &buf, msg, l)) { + _upb_Decoder_DecodeMessage(d, buf, msg, l); } + if (d->end_group != DECODE_NOGROUP) return kUpb_DecodeStatus_Malformed; + if (d->missing_required) return kUpb_DecodeStatus_MissingRequired; + return kUpb_DecodeStatus_Ok; +} - if (singular) field->offset = kNoPresence; - if (required) { - field->offset = kRequiredPresence; - } +UPB_NOINLINE +const char* _upb_Decoder_IsDoneFallback(upb_EpsCopyInputStream* e, + const char* ptr, int overrun) { + return _upb_EpsCopyInputStream_IsDoneFallbackInline( + e, ptr, overrun, _upb_Decoder_BufferFlipCallback); } -static void upb_MtDecoder_PushItem(upb_MtDecoder* d, upb_LayoutItem item) { - if (d->vec.size == d->vec.capacity) { - size_t new_cap = UPB_MAX(8, d->vec.size * 2); - d->vec.data = realloc(d->vec.data, new_cap * sizeof(*d->vec.data)); - upb_MdDecoder_CheckOutOfMemory(&d->base, d->vec.data); - d->vec.capacity = new_cap; +static upb_DecodeStatus upb_Decoder_Decode(upb_Decoder* const decoder, + const char* const buf, + void* const msg, + const upb_MiniTable* const l, + upb_Arena* const arena) { + if (UPB_SETJMP(decoder->err) == 0) { + decoder->status = _upb_Decoder_DecodeTop(decoder, buf, msg, l); + } else { + UPB_ASSERT(decoder->status != kUpb_DecodeStatus_Ok); } - d->vec.data[d->vec.size++] = item; -} -static void upb_MtDecoder_PushOneof(upb_MtDecoder* d, upb_LayoutItem item) { - if (item.field_index == kUpb_LayoutItem_IndexSentinel) { - upb_MdDecoder_ErrorJmp(&d->base, "Empty oneof"); - } - item.field_index -= kOneofBase; + _upb_MemBlock* blocks = + upb_Atomic_Load(&decoder->arena.blocks, memory_order_relaxed); + arena->head = decoder->arena.head; + upb_Atomic_Store(&arena->blocks, blocks, memory_order_relaxed); + return decoder->status; +} - // Push oneof data. - item.type = kUpb_LayoutItemType_OneofField; - upb_MtDecoder_PushItem(d, item); +upb_DecodeStatus upb_Decode(const char* buf, size_t size, void* msg, + const upb_MiniTable* l, + const upb_ExtensionRegistry* extreg, int options, + upb_Arena* arena) { + upb_Decoder decoder; + unsigned depth = (unsigned)options >> 16; - // Push oneof case. - item.rep = kUpb_FieldRep_4Byte; // Field Number. - item.type = kUpb_LayoutItemType_OneofCase; - upb_MtDecoder_PushItem(d, item); -} + upb_EpsCopyInputStream_Init(&decoder.input, &buf, size, + options & kUpb_DecodeOption_AliasString); -size_t upb_MtDecoder_SizeOfRep(upb_FieldRep rep, - upb_MiniTablePlatform platform) { - static const uint8_t kRepToSize32[] = { - [kUpb_FieldRep_1Byte] = 1, - [kUpb_FieldRep_4Byte] = 4, - [kUpb_FieldRep_StringView] = 8, - [kUpb_FieldRep_8Byte] = 8, - }; - static const uint8_t kRepToSize64[] = { - [kUpb_FieldRep_1Byte] = 1, - [kUpb_FieldRep_4Byte] = 4, - [kUpb_FieldRep_StringView] = 16, - [kUpb_FieldRep_8Byte] = 8, - }; - UPB_ASSERT(sizeof(upb_StringView) == - UPB_SIZE(kRepToSize32, kRepToSize64)[kUpb_FieldRep_StringView]); - return platform == kUpb_MiniTablePlatform_32Bit ? kRepToSize32[rep] - : kRepToSize64[rep]; -} + decoder.extreg = extreg; + decoder.unknown = NULL; + decoder.depth = depth ? depth : kUpb_WireFormat_DefaultDepthLimit; + decoder.end_group = DECODE_NOGROUP; + decoder.options = (uint16_t)options; + decoder.missing_required = false; + decoder.status = kUpb_DecodeStatus_Ok; -size_t upb_MtDecoder_AlignOfRep(upb_FieldRep rep, - upb_MiniTablePlatform platform) { - static const uint8_t kRepToAlign32[] = { - [kUpb_FieldRep_1Byte] = 1, - [kUpb_FieldRep_4Byte] = 4, - [kUpb_FieldRep_StringView] = 4, - [kUpb_FieldRep_8Byte] = 8, - }; - static const uint8_t kRepToAlign64[] = { - [kUpb_FieldRep_1Byte] = 1, - [kUpb_FieldRep_4Byte] = 4, - [kUpb_FieldRep_StringView] = 8, - [kUpb_FieldRep_8Byte] = 8, - }; - UPB_ASSERT(UPB_ALIGN_OF(upb_StringView) == - UPB_SIZE(kRepToAlign32, kRepToAlign64)[kUpb_FieldRep_StringView]); - return platform == kUpb_MiniTablePlatform_32Bit ? kRepToAlign32[rep] - : kRepToAlign64[rep]; + // Violating the encapsulation of the arena for performance reasons. + // This is a temporary arena that we swap into and swap out of when we are + // done. The temporary arena only needs to be able to handle allocation, + // not fuse or free, so it does not need many of the members to be initialized + // (particularly parent_or_count). + _upb_MemBlock* blocks = upb_Atomic_Load(&arena->blocks, memory_order_relaxed); + decoder.arena.head = arena->head; + decoder.arena.block_alloc = arena->block_alloc; + upb_Atomic_Init(&decoder.arena.blocks, blocks); + + return upb_Decoder_Decode(&decoder, buf, msg, l, arena); } -static const char* upb_MtDecoder_DecodeOneofField(upb_MtDecoder* d, - const char* ptr, - char first_ch, - upb_LayoutItem* item) { - uint32_t field_num; - ptr = upb_MdDecoder_DecodeBase92Varint( - &d->base, ptr, first_ch, kUpb_EncodedValue_MinOneofField, - kUpb_EncodedValue_MaxOneofField, &field_num); - upb_MiniTableField* f = - (void*)upb_MiniTable_FindFieldByNumber(d->table, field_num); +#undef OP_FIXPCK_LG2 +#undef OP_VARPCK_LG2 - if (!f) { - upb_MdDecoder_ErrorJmp(&d->base, - "Couldn't add field number %" PRIu32 - " to oneof, no such field number.", - field_num); +// Fast decoder: ~3x the speed of decode.c, but requires x86-64/ARM64. +// Also the table size grows by 2x. +// +// Could potentially be ported to other 64-bit archs that pass at least six +// arguments in registers and have 8 unused high bits in pointers. +// +// The overall design is to create specialized functions for every possible +// field type (eg. oneof boolean field with a 1 byte tag) and then dispatch +// to the specialized function as quickly as possible. + + + +// Must be last. + +#if UPB_FASTTABLE + +// The standard set of arguments passed to each parsing function. +// Thanks to x86-64 calling conventions, these will stay in registers. +#define UPB_PARSE_PARAMS \ + upb_Decoder *d, const char *ptr, upb_Message *msg, intptr_t table, \ + uint64_t hasbits, uint64_t data + +#define UPB_PARSE_ARGS d, ptr, msg, table, hasbits, data + +#define RETURN_GENERIC(m) \ + /* Uncomment either of these for debugging purposes. */ \ + /* fprintf(stderr, m); */ \ + /*__builtin_trap(); */ \ + return _upb_FastDecoder_DecodeGeneric(d, ptr, msg, table, hasbits, 0); + +typedef enum { + CARD_s = 0, /* Singular (optional, non-repeated) */ + CARD_o = 1, /* Oneof */ + CARD_r = 2, /* Repeated */ + CARD_p = 3 /* Packed Repeated */ +} upb_card; + +UPB_NOINLINE +static const char* fastdecode_isdonefallback(UPB_PARSE_PARAMS) { + int overrun = data; + ptr = _upb_EpsCopyInputStream_IsDoneFallbackInline( + &d->input, ptr, overrun, _upb_Decoder_BufferFlipCallback); + data = _upb_FastDecoder_LoadTag(ptr); + UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); +} + +UPB_FORCEINLINE +static const char* fastdecode_dispatch(UPB_PARSE_PARAMS) { + int overrun; + switch (upb_EpsCopyInputStream_IsDoneStatus(&d->input, ptr, &overrun)) { + case kUpb_IsDoneStatus_Done: + *(uint32_t*)msg |= hasbits; // Sync hasbits. + const upb_MiniTable* l = decode_totablep(table); + return UPB_UNLIKELY(l->required_count) + ? _upb_Decoder_CheckRequired(d, ptr, msg, l) + : ptr; + case kUpb_IsDoneStatus_NotDone: + break; + case kUpb_IsDoneStatus_NeedFallback: + data = overrun; + UPB_MUSTTAIL return fastdecode_isdonefallback(UPB_PARSE_ARGS); } - if (f->offset != kHasbitPresence) { - upb_MdDecoder_ErrorJmp( - &d->base, - "Cannot add repeated, required, or singular field %" PRIu32 - " to oneof.", - field_num); + + // Read two bytes of tag data (for a one-byte tag, the high byte is junk). + data = _upb_FastDecoder_LoadTag(ptr); + UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); +} + +UPB_FORCEINLINE +static bool fastdecode_checktag(uint16_t data, int tagbytes) { + if (tagbytes == 1) { + return (data & 0xff) == 0; + } else { + return data == 0; } +} - // Oneof storage must be large enough to accommodate the largest member. - int rep = f->mode >> kUpb_FieldRep_Shift; - if (upb_MtDecoder_SizeOfRep(rep, d->platform) > - upb_MtDecoder_SizeOfRep(item->rep, d->platform)) { - item->rep = rep; +UPB_FORCEINLINE +static const char* fastdecode_longsize(const char* ptr, int* size) { + int i; + UPB_ASSERT(*size & 0x80); + *size &= 0xff; + for (i = 0; i < 3; i++) { + ptr++; + size_t byte = (uint8_t)ptr[-1]; + *size += (byte - 1) << (7 + 7 * i); + if (UPB_LIKELY((byte & 0x80) == 0)) return ptr; } - // Prepend this field to the linked list. - f->offset = item->field_index; - item->field_index = (f - d->fields) + kOneofBase; + ptr++; + size_t byte = (uint8_t)ptr[-1]; + // len is limited by 2gb not 4gb, hence 8 and not 16 as normally expected + // for a 32 bit varint. + if (UPB_UNLIKELY(byte >= 8)) return NULL; + *size += (byte - 1) << 28; return ptr; } -static const char* upb_MtDecoder_DecodeOneofs(upb_MtDecoder* d, - const char* ptr) { - upb_LayoutItem item = {.rep = 0, - .field_index = kUpb_LayoutItem_IndexSentinel}; - while (ptr < d->base.end) { - char ch = *ptr++; - if (ch == kUpb_EncodedValue_FieldSeparator) { - // Field separator, no action needed. - } else if (ch == kUpb_EncodedValue_OneofSeparator) { - // End of oneof. - upb_MtDecoder_PushOneof(d, item); - item.field_index = kUpb_LayoutItem_IndexSentinel; // Move to next oneof. - } else { - ptr = upb_MtDecoder_DecodeOneofField(d, ptr, ch, &item); +UPB_FORCEINLINE +static const char* fastdecode_delimited( + upb_Decoder* d, const char* ptr, + upb_EpsCopyInputStream_ParseDelimitedFunc* func, void* ctx) { + ptr++; + + // Sign-extend so varint greater than one byte becomes negative, causing + // fast delimited parse to fail. + int len = (int8_t)ptr[-1]; + + if (!upb_EpsCopyInputStream_TryParseDelimitedFast(&d->input, &ptr, len, func, + ctx)) { + // Slow case: Sub-message is >=128 bytes and/or exceeds the current buffer. + // If it exceeds the buffer limit, limit/limit_ptr will change during + // sub-message parsing, so we need to preserve delta, not limit. + if (UPB_UNLIKELY(len & 0x80)) { + // Size varint >1 byte (length >= 128). + ptr = fastdecode_longsize(ptr, &len); + if (!ptr) { + // Corrupt wire format: size exceeded INT_MAX. + return NULL; + } + } + if (!upb_EpsCopyInputStream_CheckSize(&d->input, ptr, len)) { + // Corrupt wire format: invalid limit. + return NULL; } + int delta = upb_EpsCopyInputStream_PushLimit(&d->input, ptr, len); + ptr = func(&d->input, ptr, ctx); + upb_EpsCopyInputStream_PopLimit(&d->input, ptr, delta); } - - // Push final oneof. - upb_MtDecoder_PushOneof(d, item); return ptr; } -static const char* upb_MtDecoder_ParseModifier(upb_MtDecoder* d, - const char* ptr, char first_ch, - upb_MiniTableField* last_field, - uint64_t* msg_modifiers) { - uint32_t mod; - ptr = upb_MdDecoder_DecodeBase92Varint(&d->base, ptr, first_ch, - kUpb_EncodedValue_MinModifier, - kUpb_EncodedValue_MaxModifier, &mod); - if (last_field) { - upb_MtDecoder_ModifyField(d, *msg_modifiers, mod, last_field); +/* singular, oneof, repeated field handling ***********************************/ + +typedef struct { + upb_Array* arr; + void* end; +} fastdecode_arr; + +typedef enum { + FD_NEXT_ATLIMIT, + FD_NEXT_SAMEFIELD, + FD_NEXT_OTHERFIELD +} fastdecode_next; + +typedef struct { + void* dst; + fastdecode_next next; + uint32_t tag; +} fastdecode_nextret; + +UPB_FORCEINLINE +static void* fastdecode_resizearr(upb_Decoder* d, void* dst, + fastdecode_arr* farr, int valbytes) { + if (UPB_UNLIKELY(dst == farr->end)) { + size_t old_size = farr->arr->capacity; + size_t old_bytes = old_size * valbytes; + size_t new_size = old_size * 2; + size_t new_bytes = new_size * valbytes; + char* old_ptr = _upb_array_ptr(farr->arr); + char* new_ptr = upb_Arena_Realloc(&d->arena, old_ptr, old_bytes, new_bytes); + uint8_t elem_size_lg2 = __builtin_ctz(valbytes); + farr->arr->capacity = new_size; + farr->arr->data = _upb_array_tagptr(new_ptr, elem_size_lg2); + dst = (void*)(new_ptr + (old_size * valbytes)); + farr->end = (void*)(new_ptr + (new_size * valbytes)); + } + return dst; +} + +UPB_FORCEINLINE +static bool fastdecode_tagmatch(uint32_t tag, uint64_t data, int tagbytes) { + if (tagbytes == 1) { + return (uint8_t)tag == (uint8_t)data; + } else { + return (uint16_t)tag == (uint16_t)data; + } +} + +UPB_FORCEINLINE +static void fastdecode_commitarr(void* dst, fastdecode_arr* farr, + int valbytes) { + farr->arr->size = + (size_t)((char*)dst - (char*)_upb_array_ptr(farr->arr)) / valbytes; +} + +UPB_FORCEINLINE +static fastdecode_nextret fastdecode_nextrepeated(upb_Decoder* d, void* dst, + const char** ptr, + fastdecode_arr* farr, + uint64_t data, int tagbytes, + int valbytes) { + fastdecode_nextret ret; + dst = (char*)dst + valbytes; + + if (UPB_LIKELY(!_upb_Decoder_IsDone(d, ptr))) { + ret.tag = _upb_FastDecoder_LoadTag(*ptr); + if (fastdecode_tagmatch(ret.tag, data, tagbytes)) { + ret.next = FD_NEXT_SAMEFIELD; + } else { + fastdecode_commitarr(dst, farr, valbytes); + ret.next = FD_NEXT_OTHERFIELD; + } + } else { + fastdecode_commitarr(dst, farr, valbytes); + ret.next = FD_NEXT_ATLIMIT; + } + + ret.dst = dst; + return ret; +} + +UPB_FORCEINLINE +static void* fastdecode_fieldmem(upb_Message* msg, uint64_t data) { + size_t ofs = data >> 48; + return (char*)msg + ofs; +} + +UPB_FORCEINLINE +static void* fastdecode_getfield(upb_Decoder* d, const char* ptr, + upb_Message* msg, uint64_t* data, + uint64_t* hasbits, fastdecode_arr* farr, + int valbytes, upb_card card) { + switch (card) { + case CARD_s: { + uint8_t hasbit_index = *data >> 24; + // Set hasbit and return pointer to scalar field. + *hasbits |= 1ull << hasbit_index; + return fastdecode_fieldmem(msg, *data); + } + case CARD_o: { + uint16_t case_ofs = *data >> 32; + uint32_t* oneof_case = UPB_PTR_AT(msg, case_ofs, uint32_t); + uint8_t field_number = *data >> 24; + *oneof_case = field_number; + return fastdecode_fieldmem(msg, *data); + } + case CARD_r: { + // Get pointer to upb_Array and allocate/expand if necessary. + uint8_t elem_size_lg2 = __builtin_ctz(valbytes); + upb_Array** arr_p = fastdecode_fieldmem(msg, *data); + char* begin; + *(uint32_t*)msg |= *hasbits; + *hasbits = 0; + if (UPB_LIKELY(!*arr_p)) { + farr->arr = _upb_Array_New(&d->arena, 8, elem_size_lg2); + *arr_p = farr->arr; + } else { + farr->arr = *arr_p; + } + begin = _upb_array_ptr(farr->arr); + farr->end = begin + (farr->arr->capacity * valbytes); + *data = _upb_FastDecoder_LoadTag(ptr); + return begin + (farr->arr->size * valbytes); + } + default: + UPB_UNREACHABLE(); + } +} + +UPB_FORCEINLINE +static bool fastdecode_flippacked(uint64_t* data, int tagbytes) { + *data ^= (0x2 ^ 0x0); // Patch data to match packed wiretype. + return fastdecode_checktag(*data, tagbytes); +} + +#define FASTDECODE_CHECKPACKED(tagbytes, card, func) \ + if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { \ + if (card == CARD_r && fastdecode_flippacked(&data, tagbytes)) { \ + UPB_MUSTTAIL return func(UPB_PARSE_ARGS); \ + } \ + RETURN_GENERIC("packed check tag mismatch\n"); \ + } + +/* varint fields **************************************************************/ + +UPB_FORCEINLINE +static uint64_t fastdecode_munge(uint64_t val, int valbytes, bool zigzag) { + if (valbytes == 1) { + return val != 0; + } else if (zigzag) { + if (valbytes == 4) { + uint32_t n = val; + return (n >> 1) ^ -(int32_t)(n & 1); + } else if (valbytes == 8) { + return (val >> 1) ^ -(int64_t)(val & 1); + } + UPB_UNREACHABLE(); + } + return val; +} + +UPB_FORCEINLINE +static const char* fastdecode_varint64(const char* ptr, uint64_t* val) { + ptr++; + *val = (uint8_t)ptr[-1]; + if (UPB_UNLIKELY(*val & 0x80)) { + int i; + for (i = 0; i < 8; i++) { + ptr++; + uint64_t byte = (uint8_t)ptr[-1]; + *val += (byte - 1) << (7 + 7 * i); + if (UPB_LIKELY((byte & 0x80) == 0)) goto done; + } + ptr++; + uint64_t byte = (uint8_t)ptr[-1]; + if (byte > 1) { + return NULL; + } + *val += (byte - 1) << 63; + } +done: + UPB_ASSUME(ptr != NULL); + return ptr; +} + +#define FASTDECODE_UNPACKEDVARINT(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, card, zigzag, packed) \ + uint64_t val; \ + void* dst; \ + fastdecode_arr farr; \ + \ + FASTDECODE_CHECKPACKED(tagbytes, card, packed); \ + \ + dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, valbytes, \ + card); \ + if (card == CARD_r) { \ + if (UPB_UNLIKELY(!dst)) { \ + RETURN_GENERIC("need array resize\n"); \ + } \ + } \ + \ + again: \ + if (card == CARD_r) { \ + dst = fastdecode_resizearr(d, dst, &farr, valbytes); \ + } \ + \ + ptr += tagbytes; \ + ptr = fastdecode_varint64(ptr, &val); \ + if (ptr == NULL) _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \ + val = fastdecode_munge(val, valbytes, zigzag); \ + memcpy(dst, &val, valbytes); \ + \ + if (card == CARD_r) { \ + fastdecode_nextret ret = fastdecode_nextrepeated( \ + d, dst, &ptr, &farr, data, tagbytes, valbytes); \ + switch (ret.next) { \ + case FD_NEXT_SAMEFIELD: \ + dst = ret.dst; \ + goto again; \ + case FD_NEXT_OTHERFIELD: \ + data = ret.tag; \ + UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); \ + case FD_NEXT_ATLIMIT: \ + return ptr; \ + } \ + } \ + \ + UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); + +typedef struct { + uint8_t valbytes; + bool zigzag; + void* dst; + fastdecode_arr farr; +} fastdecode_varintdata; + +UPB_FORCEINLINE +static const char* fastdecode_topackedvarint(upb_EpsCopyInputStream* e, + const char* ptr, void* ctx) { + upb_Decoder* d = (upb_Decoder*)e; + fastdecode_varintdata* data = ctx; + void* dst = data->dst; + uint64_t val; + + while (!_upb_Decoder_IsDone(d, &ptr)) { + dst = fastdecode_resizearr(d, dst, &data->farr, data->valbytes); + ptr = fastdecode_varint64(ptr, &val); + if (ptr == NULL) return NULL; + val = fastdecode_munge(val, data->valbytes, data->zigzag); + memcpy(dst, &val, data->valbytes); + dst = (char*)dst + data->valbytes; + } + + fastdecode_commitarr(dst, &data->farr, data->valbytes); + return ptr; +} + +#define FASTDECODE_PACKEDVARINT(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, zigzag, unpacked) \ + fastdecode_varintdata ctx = {valbytes, zigzag}; \ + \ + FASTDECODE_CHECKPACKED(tagbytes, CARD_r, unpacked); \ + \ + ctx.dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &ctx.farr, \ + valbytes, CARD_r); \ + if (UPB_UNLIKELY(!ctx.dst)) { \ + RETURN_GENERIC("need array resize\n"); \ + } \ + \ + ptr += tagbytes; \ + ptr = fastdecode_delimited(d, ptr, &fastdecode_topackedvarint, &ctx); \ + \ + if (UPB_UNLIKELY(ptr == NULL)) { \ + _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \ + } \ + \ + UPB_MUSTTAIL return fastdecode_dispatch(d, ptr, msg, table, hasbits, 0); + +#define FASTDECODE_VARINT(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, card, zigzag, unpacked, packed) \ + if (card == CARD_p) { \ + FASTDECODE_PACKEDVARINT(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, zigzag, unpacked); \ + } else { \ + FASTDECODE_UNPACKEDVARINT(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, card, zigzag, packed); \ + } + +#define z_ZZ true +#define b_ZZ false +#define v_ZZ false + +/* Generate all combinations: + * {s,o,r,p} x {b1,v4,z4,v8,z8} x {1bt,2bt} */ + +#define F(card, type, valbytes, tagbytes) \ + UPB_NOINLINE \ + const char* upb_p##card##type##valbytes##_##tagbytes##bt(UPB_PARSE_PARAMS) { \ + FASTDECODE_VARINT(d, ptr, msg, table, hasbits, data, tagbytes, valbytes, \ + CARD_##card, type##_ZZ, \ + upb_pr##type##valbytes##_##tagbytes##bt, \ + upb_pp##type##valbytes##_##tagbytes##bt); \ + } + +#define TYPES(card, tagbytes) \ + F(card, b, 1, tagbytes) \ + F(card, v, 4, tagbytes) \ + F(card, v, 8, tagbytes) \ + F(card, z, 4, tagbytes) \ + F(card, z, 8, tagbytes) + +#define TAGBYTES(card) \ + TYPES(card, 1) \ + TYPES(card, 2) + +TAGBYTES(s) +TAGBYTES(o) +TAGBYTES(r) +TAGBYTES(p) + +#undef z_ZZ +#undef b_ZZ +#undef v_ZZ +#undef o_ONEOF +#undef s_ONEOF +#undef r_ONEOF +#undef F +#undef TYPES +#undef TAGBYTES +#undef FASTDECODE_UNPACKEDVARINT +#undef FASTDECODE_PACKEDVARINT +#undef FASTDECODE_VARINT + +/* fixed fields ***************************************************************/ + +#define FASTDECODE_UNPACKEDFIXED(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, card, packed) \ + void* dst; \ + fastdecode_arr farr; \ + \ + FASTDECODE_CHECKPACKED(tagbytes, card, packed) \ + \ + dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, valbytes, \ + card); \ + if (card == CARD_r) { \ + if (UPB_UNLIKELY(!dst)) { \ + RETURN_GENERIC("couldn't allocate array in arena\n"); \ + } \ + } \ + \ + again: \ + if (card == CARD_r) { \ + dst = fastdecode_resizearr(d, dst, &farr, valbytes); \ + } \ + \ + ptr += tagbytes; \ + memcpy(dst, ptr, valbytes); \ + ptr += valbytes; \ + \ + if (card == CARD_r) { \ + fastdecode_nextret ret = fastdecode_nextrepeated( \ + d, dst, &ptr, &farr, data, tagbytes, valbytes); \ + switch (ret.next) { \ + case FD_NEXT_SAMEFIELD: \ + dst = ret.dst; \ + goto again; \ + case FD_NEXT_OTHERFIELD: \ + data = ret.tag; \ + UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); \ + case FD_NEXT_ATLIMIT: \ + return ptr; \ + } \ + } \ + \ + UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); + +#define FASTDECODE_PACKEDFIXED(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, unpacked) \ + FASTDECODE_CHECKPACKED(tagbytes, CARD_r, unpacked) \ + \ + ptr += tagbytes; \ + int size = (uint8_t)ptr[0]; \ + ptr++; \ + if (size & 0x80) { \ + ptr = fastdecode_longsize(ptr, &size); \ + } \ + \ + if (UPB_UNLIKELY(!upb_EpsCopyInputStream_CheckDataSizeAvailable( \ + &d->input, ptr, size) || \ + (size % valbytes) != 0)) { \ + _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \ + } \ + \ + upb_Array** arr_p = fastdecode_fieldmem(msg, data); \ + upb_Array* arr = *arr_p; \ + uint8_t elem_size_lg2 = __builtin_ctz(valbytes); \ + int elems = size / valbytes; \ + \ + if (UPB_LIKELY(!arr)) { \ + *arr_p = arr = _upb_Array_New(&d->arena, elems, elem_size_lg2); \ + if (!arr) { \ + _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \ + } \ + } else { \ + _upb_Array_ResizeUninitialized(arr, elems, &d->arena); \ + } \ + \ + char* dst = _upb_array_ptr(arr); \ + memcpy(dst, ptr, size); \ + arr->size = elems; \ + \ + ptr += size; \ + UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); + +#define FASTDECODE_FIXED(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, card, unpacked, packed) \ + if (card == CARD_p) { \ + FASTDECODE_PACKEDFIXED(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, unpacked); \ + } else { \ + FASTDECODE_UNPACKEDFIXED(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, card, packed); \ + } + +/* Generate all combinations: + * {s,o,r,p} x {f4,f8} x {1bt,2bt} */ + +#define F(card, valbytes, tagbytes) \ + UPB_NOINLINE \ + const char* upb_p##card##f##valbytes##_##tagbytes##bt(UPB_PARSE_PARAMS) { \ + FASTDECODE_FIXED(d, ptr, msg, table, hasbits, data, tagbytes, valbytes, \ + CARD_##card, upb_ppf##valbytes##_##tagbytes##bt, \ + upb_prf##valbytes##_##tagbytes##bt); \ + } + +#define TYPES(card, tagbytes) \ + F(card, 4, tagbytes) \ + F(card, 8, tagbytes) + +#define TAGBYTES(card) \ + TYPES(card, 1) \ + TYPES(card, 2) + +TAGBYTES(s) +TAGBYTES(o) +TAGBYTES(r) +TAGBYTES(p) + +#undef F +#undef TYPES +#undef TAGBYTES +#undef FASTDECODE_UNPACKEDFIXED +#undef FASTDECODE_PACKEDFIXED + +/* string fields **************************************************************/ + +typedef const char* fastdecode_copystr_func(struct upb_Decoder* d, + const char* ptr, upb_Message* msg, + const upb_MiniTable* table, + uint64_t hasbits, + upb_StringView* dst); + +UPB_NOINLINE +static const char* fastdecode_verifyutf8(upb_Decoder* d, const char* ptr, + upb_Message* msg, intptr_t table, + uint64_t hasbits, uint64_t data) { + upb_StringView* dst = (upb_StringView*)data; + if (!_upb_Decoder_VerifyUtf8Inline(dst->data, dst->size)) { + _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_BadUtf8); + } + UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); +} + +#define FASTDECODE_LONGSTRING(d, ptr, msg, table, hasbits, dst, validate_utf8) \ + int size = (uint8_t)ptr[0]; /* Could plumb through hasbits. */ \ + ptr++; \ + if (size & 0x80) { \ + ptr = fastdecode_longsize(ptr, &size); \ + } \ + \ + if (UPB_UNLIKELY(!upb_EpsCopyInputStream_CheckSize(&d->input, ptr, size))) { \ + dst->size = 0; \ + _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \ + } \ + \ + const char* s_ptr = ptr; \ + ptr = upb_EpsCopyInputStream_ReadString(&d->input, &s_ptr, size, &d->arena); \ + if (!ptr) _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); \ + dst->data = s_ptr; \ + dst->size = size; \ + \ + if (validate_utf8) { \ + data = (uint64_t)dst; \ + UPB_MUSTTAIL return fastdecode_verifyutf8(UPB_PARSE_ARGS); \ + } else { \ + UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); \ + } + +UPB_NOINLINE +static const char* fastdecode_longstring_utf8(struct upb_Decoder* d, + const char* ptr, upb_Message* msg, + intptr_t table, uint64_t hasbits, + uint64_t data) { + upb_StringView* dst = (upb_StringView*)data; + FASTDECODE_LONGSTRING(d, ptr, msg, table, hasbits, dst, true); +} + +UPB_NOINLINE +static const char* fastdecode_longstring_noutf8( + struct upb_Decoder* d, const char* ptr, upb_Message* msg, intptr_t table, + uint64_t hasbits, uint64_t data) { + upb_StringView* dst = (upb_StringView*)data; + FASTDECODE_LONGSTRING(d, ptr, msg, table, hasbits, dst, false); +} + +UPB_FORCEINLINE +static void fastdecode_docopy(upb_Decoder* d, const char* ptr, uint32_t size, + int copy, char* data, upb_StringView* dst) { + d->arena.head.ptr += copy; + dst->data = data; + UPB_UNPOISON_MEMORY_REGION(data, copy); + memcpy(data, ptr, copy); + UPB_POISON_MEMORY_REGION(data + size, copy - size); +} + +#define FASTDECODE_COPYSTRING(d, ptr, msg, table, hasbits, data, tagbytes, \ + card, validate_utf8) \ + upb_StringView* dst; \ + fastdecode_arr farr; \ + int64_t size; \ + size_t arena_has; \ + size_t common_has; \ + char* buf; \ + \ + UPB_ASSERT(!upb_EpsCopyInputStream_AliasingAvailable(&d->input, ptr, 0)); \ + UPB_ASSERT(fastdecode_checktag(data, tagbytes)); \ + \ + dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, \ + sizeof(upb_StringView), card); \ + \ + again: \ + if (card == CARD_r) { \ + dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_StringView)); \ + } \ + \ + size = (uint8_t)ptr[tagbytes]; \ + ptr += tagbytes + 1; \ + dst->size = size; \ + \ + buf = d->arena.head.ptr; \ + arena_has = _upb_ArenaHas(&d->arena); \ + common_has = UPB_MIN(arena_has, \ + upb_EpsCopyInputStream_BytesAvailable(&d->input, ptr)); \ + \ + if (UPB_LIKELY(size <= 15 - tagbytes)) { \ + if (arena_has < 16) goto longstr; \ + d->arena.head.ptr += 16; \ + memcpy(buf, ptr - tagbytes - 1, 16); \ + dst->data = buf + tagbytes + 1; \ + } else if (UPB_LIKELY(size <= 32)) { \ + if (UPB_UNLIKELY(common_has < 32)) goto longstr; \ + fastdecode_docopy(d, ptr, size, 32, buf, dst); \ + } else if (UPB_LIKELY(size <= 64)) { \ + if (UPB_UNLIKELY(common_has < 64)) goto longstr; \ + fastdecode_docopy(d, ptr, size, 64, buf, dst); \ + } else if (UPB_LIKELY(size < 128)) { \ + if (UPB_UNLIKELY(common_has < 128)) goto longstr; \ + fastdecode_docopy(d, ptr, size, 128, buf, dst); \ + } else { \ + goto longstr; \ + } \ + \ + ptr += size; \ + \ + if (card == CARD_r) { \ + if (validate_utf8 && \ + !_upb_Decoder_VerifyUtf8Inline(dst->data, dst->size)) { \ + _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_BadUtf8); \ + } \ + fastdecode_nextret ret = fastdecode_nextrepeated( \ + d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_StringView)); \ + switch (ret.next) { \ + case FD_NEXT_SAMEFIELD: \ + dst = ret.dst; \ + goto again; \ + case FD_NEXT_OTHERFIELD: \ + data = ret.tag; \ + UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); \ + case FD_NEXT_ATLIMIT: \ + return ptr; \ + } \ + } \ + \ + if (card != CARD_r && validate_utf8) { \ + data = (uint64_t)dst; \ + UPB_MUSTTAIL return fastdecode_verifyutf8(UPB_PARSE_ARGS); \ + } \ + \ + UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); \ + \ + longstr: \ + if (card == CARD_r) { \ + fastdecode_commitarr(dst + 1, &farr, sizeof(upb_StringView)); \ + } \ + ptr--; \ + if (validate_utf8) { \ + UPB_MUSTTAIL return fastdecode_longstring_utf8(d, ptr, msg, table, \ + hasbits, (uint64_t)dst); \ + } else { \ + UPB_MUSTTAIL return fastdecode_longstring_noutf8(d, ptr, msg, table, \ + hasbits, (uint64_t)dst); \ + } + +#define FASTDECODE_STRING(d, ptr, msg, table, hasbits, data, tagbytes, card, \ + copyfunc, validate_utf8) \ + upb_StringView* dst; \ + fastdecode_arr farr; \ + int64_t size; \ + \ + if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { \ + RETURN_GENERIC("string field tag mismatch\n"); \ + } \ + \ + if (UPB_UNLIKELY( \ + !upb_EpsCopyInputStream_AliasingAvailable(&d->input, ptr, 0))) { \ + UPB_MUSTTAIL return copyfunc(UPB_PARSE_ARGS); \ + } \ + \ + dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, \ + sizeof(upb_StringView), card); \ + \ + again: \ + if (card == CARD_r) { \ + dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_StringView)); \ + } \ + \ + size = (int8_t)ptr[tagbytes]; \ + ptr += tagbytes + 1; \ + \ + if (UPB_UNLIKELY( \ + !upb_EpsCopyInputStream_AliasingAvailable(&d->input, ptr, size))) { \ + ptr--; \ + if (validate_utf8) { \ + return fastdecode_longstring_utf8(d, ptr, msg, table, hasbits, \ + (uint64_t)dst); \ + } else { \ + return fastdecode_longstring_noutf8(d, ptr, msg, table, hasbits, \ + (uint64_t)dst); \ + } \ + } \ + \ + dst->data = ptr; \ + dst->size = size; \ + ptr = upb_EpsCopyInputStream_ReadStringAliased(&d->input, &dst->data, \ + dst->size); \ + \ + if (card == CARD_r) { \ + if (validate_utf8 && \ + !_upb_Decoder_VerifyUtf8Inline(dst->data, dst->size)) { \ + _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_BadUtf8); \ + } \ + fastdecode_nextret ret = fastdecode_nextrepeated( \ + d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_StringView)); \ + switch (ret.next) { \ + case FD_NEXT_SAMEFIELD: \ + dst = ret.dst; \ + goto again; \ + case FD_NEXT_OTHERFIELD: \ + data = ret.tag; \ + UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); \ + case FD_NEXT_ATLIMIT: \ + return ptr; \ + } \ + } \ + \ + if (card != CARD_r && validate_utf8) { \ + data = (uint64_t)dst; \ + UPB_MUSTTAIL return fastdecode_verifyutf8(UPB_PARSE_ARGS); \ + } \ + \ + UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); + +/* Generate all combinations: + * {p,c} x {s,o,r} x {s, b} x {1bt,2bt} */ + +#define s_VALIDATE true +#define b_VALIDATE false + +#define F(card, tagbytes, type) \ + UPB_NOINLINE \ + const char* upb_c##card##type##_##tagbytes##bt(UPB_PARSE_PARAMS) { \ + FASTDECODE_COPYSTRING(d, ptr, msg, table, hasbits, data, tagbytes, \ + CARD_##card, type##_VALIDATE); \ + } \ + const char* upb_p##card##type##_##tagbytes##bt(UPB_PARSE_PARAMS) { \ + FASTDECODE_STRING(d, ptr, msg, table, hasbits, data, tagbytes, \ + CARD_##card, upb_c##card##type##_##tagbytes##bt, \ + type##_VALIDATE); \ + } + +#define UTF8(card, tagbytes) \ + F(card, tagbytes, s) \ + F(card, tagbytes, b) + +#define TAGBYTES(card) \ + UTF8(card, 1) \ + UTF8(card, 2) + +TAGBYTES(s) +TAGBYTES(o) +TAGBYTES(r) + +#undef s_VALIDATE +#undef b_VALIDATE +#undef F +#undef TAGBYTES +#undef FASTDECODE_LONGSTRING +#undef FASTDECODE_COPYSTRING +#undef FASTDECODE_STRING + +/* message fields *************************************************************/ + +UPB_INLINE +upb_Message* decode_newmsg_ceil(upb_Decoder* d, const upb_MiniTable* l, + int msg_ceil_bytes) { + size_t size = l->size + sizeof(upb_Message_Internal); + char* msg_data; + if (UPB_LIKELY(msg_ceil_bytes > 0 && + _upb_ArenaHas(&d->arena) >= msg_ceil_bytes)) { + UPB_ASSERT(size <= (size_t)msg_ceil_bytes); + msg_data = d->arena.head.ptr; + d->arena.head.ptr += size; + UPB_UNPOISON_MEMORY_REGION(msg_data, msg_ceil_bytes); + memset(msg_data, 0, msg_ceil_bytes); + UPB_POISON_MEMORY_REGION(msg_data + size, msg_ceil_bytes - size); } else { - if (!d->table) { - upb_MdDecoder_ErrorJmp(&d->base, - "Extensions cannot have message modifiers"); - } - *msg_modifiers = mod; + msg_data = (char*)upb_Arena_Malloc(&d->arena, size); + memset(msg_data, 0, size); } + return msg_data + sizeof(upb_Message_Internal); +} + +typedef struct { + intptr_t table; + upb_Message* msg; +} fastdecode_submsgdata; +UPB_FORCEINLINE +static const char* fastdecode_tosubmsg(upb_EpsCopyInputStream* e, + const char* ptr, void* ctx) { + upb_Decoder* d = (upb_Decoder*)e; + fastdecode_submsgdata* submsg = ctx; + ptr = fastdecode_dispatch(d, ptr, submsg->msg, submsg->table, 0, 0); + UPB_ASSUME(ptr != NULL); return ptr; } -static void upb_MtDecoder_AllocateSubs(upb_MtDecoder* d, - upb_SubCounts sub_counts) { - uint32_t total_count = sub_counts.submsg_count + sub_counts.subenum_count; - size_t subs_bytes = sizeof(*d->table->subs) * total_count; - upb_MiniTableSub* subs = upb_Arena_Malloc(d->arena, subs_bytes); - upb_MdDecoder_CheckOutOfMemory(&d->base, subs); - uint32_t i = 0; - for (; i < sub_counts.submsg_count; i++) { - subs[i].submsg = &_kUpb_MiniTable_Empty; - } - if (sub_counts.subenum_count) { - upb_MiniTableField* f = d->fields; - upb_MiniTableField* end_f = f + d->table->field_count; - for (; f < end_f; f++) { - if (f->UPB_PRIVATE(descriptortype) == kUpb_FieldType_Enum) { - f->UPB_PRIVATE(submsg_index) += sub_counts.submsg_count; - } - } - for (; i < sub_counts.submsg_count + sub_counts.subenum_count; i++) { - subs[i].subenum = NULL; - } +#define FASTDECODE_SUBMSG(d, ptr, msg, table, hasbits, data, tagbytes, \ + msg_ceil_bytes, card) \ + \ + if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { \ + RETURN_GENERIC("submessage field tag mismatch\n"); \ + } \ + \ + if (--d->depth == 0) { \ + _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_MaxDepthExceeded); \ + } \ + \ + upb_Message** dst; \ + uint32_t submsg_idx = (data >> 16) & 0xff; \ + const upb_MiniTable* tablep = decode_totablep(table); \ + const upb_MiniTable* subtablep = tablep->subs[submsg_idx].submsg; \ + fastdecode_submsgdata submsg = {decode_totable(subtablep)}; \ + fastdecode_arr farr; \ + \ + if (subtablep->table_mask == (uint8_t)-1) { \ + RETURN_GENERIC("submessage doesn't have fast tables."); \ + } \ + \ + dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, \ + sizeof(upb_Message*), card); \ + \ + if (card == CARD_s) { \ + *(uint32_t*)msg |= hasbits; \ + hasbits = 0; \ + } \ + \ + again: \ + if (card == CARD_r) { \ + dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_Message*)); \ + } \ + \ + submsg.msg = *dst; \ + \ + if (card == CARD_r || UPB_LIKELY(!submsg.msg)) { \ + *dst = submsg.msg = decode_newmsg_ceil(d, subtablep, msg_ceil_bytes); \ + } \ + \ + ptr += tagbytes; \ + ptr = fastdecode_delimited(d, ptr, fastdecode_tosubmsg, &submsg); \ + \ + if (UPB_UNLIKELY(ptr == NULL || d->end_group != DECODE_NOGROUP)) { \ + _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \ + } \ + \ + if (card == CARD_r) { \ + fastdecode_nextret ret = fastdecode_nextrepeated( \ + d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_Message*)); \ + switch (ret.next) { \ + case FD_NEXT_SAMEFIELD: \ + dst = ret.dst; \ + goto again; \ + case FD_NEXT_OTHERFIELD: \ + d->depth++; \ + data = ret.tag; \ + UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); \ + case FD_NEXT_ATLIMIT: \ + d->depth++; \ + return ptr; \ + } \ + } \ + \ + d->depth++; \ + UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); + +#define F(card, tagbytes, size_ceil, ceil_arg) \ + const char* upb_p##card##m_##tagbytes##bt_max##size_ceil##b( \ + UPB_PARSE_PARAMS) { \ + FASTDECODE_SUBMSG(d, ptr, msg, table, hasbits, data, tagbytes, ceil_arg, \ + CARD_##card); \ } - d->table->subs = subs; -} -static const char* upb_MtDecoder_Parse(upb_MtDecoder* d, const char* ptr, - size_t len, void* fields, - size_t field_size, uint16_t* field_count, - upb_SubCounts* sub_counts) { - uint64_t msg_modifiers = 0; - uint32_t last_field_number = 0; - upb_MiniTableField* last_field = NULL; - bool need_dense_below = d->table != NULL; +#define SIZES(card, tagbytes) \ + F(card, tagbytes, 64, 64) \ + F(card, tagbytes, 128, 128) \ + F(card, tagbytes, 192, 192) \ + F(card, tagbytes, 256, 256) \ + F(card, tagbytes, max, -1) - d->base.end = UPB_PTRADD(ptr, len); +#define TAGBYTES(card) \ + SIZES(card, 1) \ + SIZES(card, 2) - while (ptr < d->base.end) { - char ch = *ptr++; - if (ch <= kUpb_EncodedValue_MaxField) { - if (!d->table && last_field) { - // For extensions, consume only a single field and then return. - return --ptr; - } - upb_MiniTableField* field = fields; - *field_count += 1; - fields = (char*)fields + field_size; - field->number = ++last_field_number; - last_field = field; - upb_MiniTable_SetField(d, ch, field, msg_modifiers, sub_counts); - } else if (kUpb_EncodedValue_MinModifier <= ch && - ch <= kUpb_EncodedValue_MaxModifier) { - ptr = upb_MtDecoder_ParseModifier(d, ptr, ch, last_field, &msg_modifiers); - if (msg_modifiers & kUpb_MessageModifier_IsExtendable) { - d->table->ext |= kUpb_ExtMode_Extendable; - } - } else if (ch == kUpb_EncodedValue_End) { - if (!d->table) { - upb_MdDecoder_ErrorJmp(&d->base, "Extensions cannot have oneofs."); - } - ptr = upb_MtDecoder_DecodeOneofs(d, ptr); - } else if (kUpb_EncodedValue_MinSkip <= ch && - ch <= kUpb_EncodedValue_MaxSkip) { - if (need_dense_below) { - d->table->dense_below = d->table->field_count; - need_dense_below = false; - } - uint32_t skip; - ptr = upb_MdDecoder_DecodeBase92Varint(&d->base, ptr, ch, - kUpb_EncodedValue_MinSkip, - kUpb_EncodedValue_MaxSkip, &skip); - last_field_number += skip; - last_field_number--; // Next field seen will increment. - } else { - upb_MdDecoder_ErrorJmp(&d->base, "Invalid char: %c", ch); - } - } +TAGBYTES(s) +TAGBYTES(o) +TAGBYTES(r) - if (need_dense_below) { - d->table->dense_below = d->table->field_count; - } +#undef TAGBYTES +#undef SIZES +#undef F +#undef FASTDECODE_SUBMSG - return ptr; -} +#endif /* UPB_FASTTABLE */ -static void upb_MtDecoder_ParseMessage(upb_MtDecoder* d, const char* data, - size_t len) { - // Buffer length is an upper bound on the number of fields. We will return - // what we don't use. - d->fields = upb_Arena_Malloc(d->arena, sizeof(*d->fields) * len); - upb_MdDecoder_CheckOutOfMemory(&d->base, d->fields); +// We encode backwards, to avoid pre-computing lengths (one-pass encode). - upb_SubCounts sub_counts = {0, 0}; - d->table->field_count = 0; - d->table->fields = d->fields; - upb_MtDecoder_Parse(d, data, len, d->fields, sizeof(*d->fields), - &d->table->field_count, &sub_counts); - upb_Arena_ShrinkLast(d->arena, d->fields, sizeof(*d->fields) * len, - sizeof(*d->fields) * d->table->field_count); - d->table->fields = d->fields; - upb_MtDecoder_AllocateSubs(d, sub_counts); -} +#include -int upb_MtDecoder_CompareFields(const void* _a, const void* _b) { - const upb_LayoutItem* a = _a; - const upb_LayoutItem* b = _b; - // Currently we just sort by: - // 1. rep (smallest fields first) - // 2. type (oneof cases first) - // 2. field_index (smallest numbers first) - // The main goal of this is to reduce space lost to padding. - // Later we may have more subtle reasons to prefer a different ordering. - const int rep_bits = upb_Log2Ceiling(kUpb_FieldRep_Max); - const int type_bits = upb_Log2Ceiling(kUpb_LayoutItemType_Max); - const int idx_bits = (sizeof(a->field_index) * 8); - UPB_ASSERT(idx_bits + rep_bits + type_bits < 32); -#define UPB_COMBINE(rep, ty, idx) (((rep << type_bits) | ty) << idx_bits) | idx - uint32_t a_packed = UPB_COMBINE(a->rep, a->type, a->field_index); - uint32_t b_packed = UPB_COMBINE(b->rep, b->type, b->field_index); - assert(a_packed != b_packed); -#undef UPB_COMBINE - return a_packed < b_packed ? -1 : 1; -} -static bool upb_MtDecoder_SortLayoutItems(upb_MtDecoder* d) { - // Add items for all non-oneof fields (oneofs were already added). - int n = d->table->field_count; - for (int i = 0; i < n; i++) { - upb_MiniTableField* f = &d->fields[i]; - if (f->offset >= kOneofBase) continue; - upb_LayoutItem item = {.field_index = i, - .rep = f->mode >> kUpb_FieldRep_Shift, - .type = kUpb_LayoutItemType_Field}; - upb_MtDecoder_PushItem(d, item); - } +// Must be last. + +#define UPB_PB_VARINT_MAX_LEN 10 + +UPB_NOINLINE +static size_t encode_varint64(uint64_t val, char* buf) { + size_t i = 0; + do { + uint8_t byte = val & 0x7fU; + val >>= 7; + if (val) byte |= 0x80U; + buf[i++] = byte; + } while (val); + return i; +} - if (d->vec.size) { - qsort(d->vec.data, d->vec.size, sizeof(*d->vec.data), - upb_MtDecoder_CompareFields); - } +static uint32_t encode_zz32(int32_t n) { + return ((uint32_t)n << 1) ^ (n >> 31); +} +static uint64_t encode_zz64(int64_t n) { + return ((uint64_t)n << 1) ^ (n >> 63); +} - return true; +typedef struct { + upb_EncodeStatus status; + jmp_buf err; + upb_Arena* arena; + char *buf, *ptr, *limit; + int options; + int depth; + _upb_mapsorter sorter; +} upb_encstate; + +static size_t upb_roundup_pow2(size_t bytes) { + size_t ret = 128; + while (ret < bytes) { + ret *= 2; + } + return ret; } -static size_t upb_MiniTable_DivideRoundUp(size_t n, size_t d) { - return (n + d - 1) / d; +UPB_NORETURN static void encode_err(upb_encstate* e, upb_EncodeStatus s) { + UPB_ASSERT(s != kUpb_EncodeStatus_Ok); + e->status = s; + UPB_LONGJMP(e->err, 1); } -static void upb_MtDecoder_AssignHasbits(upb_MtDecoder* d) { - upb_MiniTable* ret = d->table; - int n = ret->field_count; - int last_hasbit = 0; // 0 cannot be used. +UPB_NOINLINE +static void encode_growbuffer(upb_encstate* e, size_t bytes) { + size_t old_size = e->limit - e->buf; + size_t new_size = upb_roundup_pow2(bytes + (e->limit - e->ptr)); + char* new_buf = upb_Arena_Realloc(e->arena, e->buf, old_size, new_size); - // First assign required fields, which must have the lowest hasbits. - for (int i = 0; i < n; i++) { - upb_MiniTableField* field = (upb_MiniTableField*)&ret->fields[i]; - if (field->offset == kRequiredPresence) { - field->presence = ++last_hasbit; - } else if (field->offset == kNoPresence) { - field->presence = 0; - } - } - ret->required_count = last_hasbit; + if (!new_buf) encode_err(e, kUpb_EncodeStatus_OutOfMemory); - if (ret->required_count > 63) { - upb_MdDecoder_ErrorJmp(&d->base, "Too many required fields"); + // We want previous data at the end, realloc() put it at the beginning. + // TODO(salo): This is somewhat inefficient since we are copying twice. + // Maybe create a realloc() that copies to the end of the new buffer? + if (old_size > 0) { + memmove(new_buf + new_size - old_size, e->buf, old_size); } - // Next assign non-required hasbit fields. - for (int i = 0; i < n; i++) { - upb_MiniTableField* field = (upb_MiniTableField*)&ret->fields[i]; - if (field->offset == kHasbitPresence) { - field->presence = ++last_hasbit; - } - } + e->ptr = new_buf + new_size - (e->limit - e->ptr); + e->limit = new_buf + new_size; + e->buf = new_buf; - ret->size = last_hasbit ? upb_MiniTable_DivideRoundUp(last_hasbit + 1, 8) : 0; + e->ptr -= bytes; } -size_t upb_MtDecoder_Place(upb_MtDecoder* d, upb_FieldRep rep) { - size_t size = upb_MtDecoder_SizeOfRep(rep, d->platform); - size_t align = upb_MtDecoder_AlignOfRep(rep, d->platform); - size_t ret = UPB_ALIGN_UP(d->table->size, align); - static const size_t max = UINT16_MAX; - size_t new_size = ret + size; - if (new_size > max) { - upb_MdDecoder_ErrorJmp( - &d->base, "Message size exceeded maximum size of %zu bytes", max); +/* Call to ensure that at least "bytes" bytes are available for writing at + * e->ptr. Returns false if the bytes could not be allocated. */ +UPB_FORCEINLINE +static void encode_reserve(upb_encstate* e, size_t bytes) { + if ((size_t)(e->ptr - e->buf) < bytes) { + encode_growbuffer(e, bytes); + return; } - d->table->size = new_size; - return ret; -} - -static void upb_MtDecoder_AssignOffsets(upb_MtDecoder* d) { - upb_LayoutItem* end = UPB_PTRADD(d->vec.data, d->vec.size); - // Compute offsets. - for (upb_LayoutItem* item = d->vec.data; item < end; item++) { - item->offset = upb_MtDecoder_Place(d, item->rep); - } + e->ptr -= bytes; +} - // Assign oneof case offsets. We must do these first, since assigning - // actual offsets will overwrite the links of the linked list. - for (upb_LayoutItem* item = d->vec.data; item < end; item++) { - if (item->type != kUpb_LayoutItemType_OneofCase) continue; - upb_MiniTableField* f = &d->fields[item->field_index]; - while (true) { - f->presence = ~item->offset; - if (f->offset == kUpb_LayoutItem_IndexSentinel) break; - UPB_ASSERT(f->offset - kOneofBase < d->table->field_count); - f = &d->fields[f->offset - kOneofBase]; - } - } +/* Writes the given bytes to the buffer, handling reserve/advance. */ +static void encode_bytes(upb_encstate* e, const void* data, size_t len) { + if (len == 0) return; /* memcpy() with zero size is UB */ + encode_reserve(e, len); + memcpy(e->ptr, data, len); +} - // Assign offsets. - for (upb_LayoutItem* item = d->vec.data; item < end; item++) { - upb_MiniTableField* f = &d->fields[item->field_index]; - switch (item->type) { - case kUpb_LayoutItemType_OneofField: - while (true) { - uint16_t next_offset = f->offset; - f->offset = item->offset; - if (next_offset == kUpb_LayoutItem_IndexSentinel) break; - f = &d->fields[next_offset - kOneofBase]; - } - break; - case kUpb_LayoutItemType_Field: - f->offset = item->offset; - break; - default: - break; - } - } +static void encode_fixed64(upb_encstate* e, uint64_t val) { + val = _upb_BigEndian_Swap64(val); + encode_bytes(e, &val, sizeof(uint64_t)); +} - // The fasttable parser (supported on 64-bit only) depends on this being a - // multiple of 8 in order to satisfy UPB_MALLOC_ALIGN, which is also 8. - // - // On 32-bit we could potentially make this smaller, but there is no - // compelling reason to optimize this right now. - d->table->size = UPB_ALIGN_UP(d->table->size, 8); +static void encode_fixed32(upb_encstate* e, uint32_t val) { + val = _upb_BigEndian_Swap32(val); + encode_bytes(e, &val, sizeof(uint32_t)); } -static void upb_MtDecoder_ValidateEntryField(upb_MtDecoder* d, - const upb_MiniTableField* f, - uint32_t expected_num) { - const char* name = expected_num == 1 ? "key" : "val"; - if (f->number != expected_num) { - upb_MdDecoder_ErrorJmp(&d->base, - "map %s did not have expected number (%d vs %d)", - name, expected_num, (int)f->number); - } +UPB_NOINLINE +static void encode_longvarint(upb_encstate* e, uint64_t val) { + size_t len; + char* start; - if (upb_IsRepeatedOrMap(f)) { - upb_MdDecoder_ErrorJmp( - &d->base, "map %s cannot be repeated or map, or be in oneof", name); - } + encode_reserve(e, UPB_PB_VARINT_MAX_LEN); + len = encode_varint64(val, e->ptr); + start = e->ptr + UPB_PB_VARINT_MAX_LEN - len; + memmove(start, e->ptr, len); + e->ptr = start; +} - uint32_t not_ok_types; - if (expected_num == 1) { - not_ok_types = (1 << kUpb_FieldType_Float) | (1 << kUpb_FieldType_Double) | - (1 << kUpb_FieldType_Message) | (1 << kUpb_FieldType_Group) | - (1 << kUpb_FieldType_Bytes) | (1 << kUpb_FieldType_Enum); +UPB_FORCEINLINE +static void encode_varint(upb_encstate* e, uint64_t val) { + if (val < 128 && e->ptr != e->buf) { + --e->ptr; + *e->ptr = val; } else { - not_ok_types = 1 << kUpb_FieldType_Group; - } - - if ((1 << upb_MiniTableField_Type(f)) & not_ok_types) { - upb_MdDecoder_ErrorJmp(&d->base, "map %s cannot have type %d", name, - (int)f->UPB_PRIVATE(descriptortype)); + encode_longvarint(e, val); } } -static void upb_MtDecoder_ParseMap(upb_MtDecoder* d, const char* data, - size_t len) { - upb_MtDecoder_ParseMessage(d, data, len); - upb_MtDecoder_AssignHasbits(d); +static void encode_double(upb_encstate* e, double d) { + uint64_t u64; + UPB_ASSERT(sizeof(double) == sizeof(uint64_t)); + memcpy(&u64, &d, sizeof(uint64_t)); + encode_fixed64(e, u64); +} - if (UPB_UNLIKELY(d->table->field_count != 2)) { - upb_MdDecoder_ErrorJmp(&d->base, "%hu fields in map", - d->table->field_count); - UPB_UNREACHABLE(); - } +static void encode_float(upb_encstate* e, float d) { + uint32_t u32; + UPB_ASSERT(sizeof(float) == sizeof(uint32_t)); + memcpy(&u32, &d, sizeof(uint32_t)); + encode_fixed32(e, u32); +} - upb_LayoutItem* end = UPB_PTRADD(d->vec.data, d->vec.size); - for (upb_LayoutItem* item = d->vec.data; item < end; item++) { - if (item->type == kUpb_LayoutItemType_OneofCase) { - upb_MdDecoder_ErrorJmp(&d->base, "Map entry cannot have oneof"); - } - } +static void encode_tag(upb_encstate* e, uint32_t field_number, + uint8_t wire_type) { + encode_varint(e, (field_number << 3) | wire_type); +} - upb_MtDecoder_ValidateEntryField(d, &d->table->fields[0], 1); - upb_MtDecoder_ValidateEntryField(d, &d->table->fields[1], 2); +static void encode_fixedarray(upb_encstate* e, const upb_Array* arr, + size_t elem_size, uint32_t tag) { + size_t bytes = arr->size * elem_size; + const char* data = _upb_array_constptr(arr); + const char* ptr = data + bytes - elem_size; - // Map entries have a pre-determined layout, regardless of types. - // NOTE: sync with mini_table/message_internal.h. - const size_t kv_size = d->platform == kUpb_MiniTablePlatform_32Bit ? 8 : 16; - const size_t hasbit_size = 8; - d->fields[0].offset = hasbit_size; - d->fields[1].offset = hasbit_size + kv_size; - d->table->size = UPB_ALIGN_UP(hasbit_size + kv_size + kv_size, 8); + if (tag || !_upb_IsLittleEndian()) { + while (true) { + if (elem_size == 4) { + uint32_t val; + memcpy(&val, ptr, sizeof(val)); + val = _upb_BigEndian_Swap32(val); + encode_bytes(e, &val, elem_size); + } else { + UPB_ASSERT(elem_size == 8); + uint64_t val; + memcpy(&val, ptr, sizeof(val)); + val = _upb_BigEndian_Swap64(val); + encode_bytes(e, &val, elem_size); + } - // Map entries have a special bit set to signal it's a map entry, used in - // upb_MiniTable_SetSubMessage() below. - d->table->ext |= kUpb_ExtMode_IsMapEntry; + if (tag) encode_varint(e, tag); + if (ptr == data) break; + ptr -= elem_size; + } + } else { + encode_bytes(e, data, bytes); + } } -static void upb_MtDecoder_ParseMessageSet(upb_MtDecoder* d, const char* data, - size_t len) { - if (len > 0) { - upb_MdDecoder_ErrorJmp(&d->base, "Invalid message set encode length: %zu", - len); - } +static void encode_message(upb_encstate* e, const upb_Message* msg, + const upb_MiniTable* m, size_t* size); - upb_MiniTable* ret = d->table; - ret->size = 0; - ret->field_count = 0; - ret->ext = kUpb_ExtMode_IsMessageSet; - ret->dense_below = 0; - ret->table_mask = -1; - ret->required_count = 0; +static void encode_TaggedMessagePtr(upb_encstate* e, + upb_TaggedMessagePtr tagged, + const upb_MiniTable* m, size_t* size) { + if (upb_TaggedMessagePtr_IsEmpty(tagged)) { + m = &_kUpb_MiniTable_Empty; + } + encode_message(e, _upb_TaggedMessagePtr_GetMessage(tagged), m, size); } -static upb_MiniTable* upb_MtDecoder_DoBuildMiniTableWithBuf( - upb_MtDecoder* decoder, const char* data, size_t len, void** buf, - size_t* buf_size) { - upb_MdDecoder_CheckOutOfMemory(&decoder->base, decoder->table); - - decoder->table->size = 0; - decoder->table->field_count = 0; - decoder->table->ext = kUpb_ExtMode_NonExtendable; - decoder->table->dense_below = 0; - decoder->table->table_mask = -1; - decoder->table->required_count = 0; +static void encode_scalar(upb_encstate* e, const void* _field_mem, + const upb_MiniTableSub* subs, + const upb_MiniTableField* f) { + const char* field_mem = _field_mem; + int wire_type; - // Strip off and verify the version tag. - if (!len--) goto done; - const char vers = *data++; +#define CASE(ctype, type, wtype, encodeval) \ + { \ + ctype val = *(ctype*)field_mem; \ + encode_##type(e, encodeval); \ + wire_type = wtype; \ + break; \ + } - switch (vers) { - case kUpb_EncodedVersion_MapV1: - upb_MtDecoder_ParseMap(decoder, data, len); + switch (f->UPB_PRIVATE(descriptortype)) { + case kUpb_FieldType_Double: + CASE(double, double, kUpb_WireType_64Bit, val); + case kUpb_FieldType_Float: + CASE(float, float, kUpb_WireType_32Bit, val); + case kUpb_FieldType_Int64: + case kUpb_FieldType_UInt64: + CASE(uint64_t, varint, kUpb_WireType_Varint, val); + case kUpb_FieldType_UInt32: + CASE(uint32_t, varint, kUpb_WireType_Varint, val); + case kUpb_FieldType_Int32: + case kUpb_FieldType_Enum: + CASE(int32_t, varint, kUpb_WireType_Varint, (int64_t)val); + case kUpb_FieldType_SFixed64: + case kUpb_FieldType_Fixed64: + CASE(uint64_t, fixed64, kUpb_WireType_64Bit, val); + case kUpb_FieldType_Fixed32: + case kUpb_FieldType_SFixed32: + CASE(uint32_t, fixed32, kUpb_WireType_32Bit, val); + case kUpb_FieldType_Bool: + CASE(bool, varint, kUpb_WireType_Varint, val); + case kUpb_FieldType_SInt32: + CASE(int32_t, varint, kUpb_WireType_Varint, encode_zz32(val)); + case kUpb_FieldType_SInt64: + CASE(int64_t, varint, kUpb_WireType_Varint, encode_zz64(val)); + case kUpb_FieldType_String: + case kUpb_FieldType_Bytes: { + upb_StringView view = *(upb_StringView*)field_mem; + encode_bytes(e, view.data, view.size); + encode_varint(e, view.size); + wire_type = kUpb_WireType_Delimited; break; - - case kUpb_EncodedVersion_MessageV1: - upb_MtDecoder_ParseMessage(decoder, data, len); - upb_MtDecoder_AssignHasbits(decoder); - upb_MtDecoder_SortLayoutItems(decoder); - upb_MtDecoder_AssignOffsets(decoder); + } + case kUpb_FieldType_Group: { + size_t size; + upb_TaggedMessagePtr submsg = *(upb_TaggedMessagePtr*)field_mem; + const upb_MiniTable* subm = subs[f->UPB_PRIVATE(submsg_index)].submsg; + if (submsg == 0) { + return; + } + if (--e->depth == 0) encode_err(e, kUpb_EncodeStatus_MaxDepthExceeded); + encode_tag(e, f->number, kUpb_WireType_EndGroup); + encode_TaggedMessagePtr(e, submsg, subm, &size); + wire_type = kUpb_WireType_StartGroup; + e->depth++; break; - - case kUpb_EncodedVersion_MessageSetV1: - upb_MtDecoder_ParseMessageSet(decoder, data, len); + } + case kUpb_FieldType_Message: { + size_t size; + upb_TaggedMessagePtr submsg = *(upb_TaggedMessagePtr*)field_mem; + const upb_MiniTable* subm = subs[f->UPB_PRIVATE(submsg_index)].submsg; + if (submsg == 0) { + return; + } + if (--e->depth == 0) encode_err(e, kUpb_EncodeStatus_MaxDepthExceeded); + encode_TaggedMessagePtr(e, submsg, subm, &size); + encode_varint(e, size); + wire_type = kUpb_WireType_Delimited; + e->depth++; break; - + } default: - upb_MdDecoder_ErrorJmp(&decoder->base, "Invalid message version: %c", - vers); + UPB_UNREACHABLE(); } +#undef CASE -done: - *buf = decoder->vec.data; - *buf_size = decoder->vec.capacity * sizeof(*decoder->vec.data); - return decoder->table; + encode_tag(e, f->number, wire_type); } -static upb_MiniTable* upb_MtDecoder_BuildMiniTableWithBuf( - upb_MtDecoder* const decoder, const char* const data, const size_t len, - void** const buf, size_t* const buf_size) { - if (UPB_SETJMP(decoder->base.err) != 0) { - *buf = decoder->vec.data; - *buf_size = decoder->vec.capacity * sizeof(*decoder->vec.data); - return NULL; - } +static void encode_array(upb_encstate* e, const upb_Message* msg, + const upb_MiniTableSub* subs, + const upb_MiniTableField* f) { + const upb_Array* arr = *UPB_PTR_AT(msg, f->offset, upb_Array*); + bool packed = f->mode & kUpb_LabelFlags_IsPacked; + size_t pre_len = e->limit - e->ptr; - return upb_MtDecoder_DoBuildMiniTableWithBuf(decoder, data, len, buf, - buf_size); -} + if (arr == NULL || arr->size == 0) { + return; + } -upb_MiniTable* upb_MiniTable_BuildWithBuf(const char* data, size_t len, - upb_MiniTablePlatform platform, - upb_Arena* arena, void** buf, - size_t* buf_size, - upb_Status* status) { - upb_MtDecoder decoder = { - .base = {.status = status}, - .platform = platform, - .vec = - { - .data = *buf, - .capacity = *buf_size / sizeof(*decoder.vec.data), - .size = 0, - }, - .arena = arena, - .table = upb_Arena_Malloc(arena, sizeof(*decoder.table)), - }; +#define VARINT_CASE(ctype, encode) \ + { \ + const ctype* start = _upb_array_constptr(arr); \ + const ctype* ptr = start + arr->size; \ + uint32_t tag = packed ? 0 : (f->number << 3) | kUpb_WireType_Varint; \ + do { \ + ptr--; \ + encode_varint(e, encode); \ + if (tag) encode_varint(e, tag); \ + } while (ptr != start); \ + } \ + break; - return upb_MtDecoder_BuildMiniTableWithBuf(&decoder, data, len, buf, - buf_size); -} +#define TAG(wire_type) (packed ? 0 : (f->number << 3 | wire_type)) -static const char* upb_MtDecoder_DoBuildMiniTableExtension( - upb_MtDecoder* decoder, const char* data, size_t len, - upb_MiniTableExtension* ext, const upb_MiniTable* extendee, - upb_MiniTableSub sub) { - // If the string is non-empty then it must begin with a version tag. - if (len) { - if (*data != kUpb_EncodedVersion_ExtensionV1) { - upb_MdDecoder_ErrorJmp(&decoder->base, "Invalid ext version: %c", *data); + switch (f->UPB_PRIVATE(descriptortype)) { + case kUpb_FieldType_Double: + encode_fixedarray(e, arr, sizeof(double), TAG(kUpb_WireType_64Bit)); + break; + case kUpb_FieldType_Float: + encode_fixedarray(e, arr, sizeof(float), TAG(kUpb_WireType_32Bit)); + break; + case kUpb_FieldType_SFixed64: + case kUpb_FieldType_Fixed64: + encode_fixedarray(e, arr, sizeof(uint64_t), TAG(kUpb_WireType_64Bit)); + break; + case kUpb_FieldType_Fixed32: + case kUpb_FieldType_SFixed32: + encode_fixedarray(e, arr, sizeof(uint32_t), TAG(kUpb_WireType_32Bit)); + break; + case kUpb_FieldType_Int64: + case kUpb_FieldType_UInt64: + VARINT_CASE(uint64_t, *ptr); + case kUpb_FieldType_UInt32: + VARINT_CASE(uint32_t, *ptr); + case kUpb_FieldType_Int32: + case kUpb_FieldType_Enum: + VARINT_CASE(int32_t, (int64_t)*ptr); + case kUpb_FieldType_Bool: + VARINT_CASE(bool, *ptr); + case kUpb_FieldType_SInt32: + VARINT_CASE(int32_t, encode_zz32(*ptr)); + case kUpb_FieldType_SInt64: + VARINT_CASE(int64_t, encode_zz64(*ptr)); + case kUpb_FieldType_String: + case kUpb_FieldType_Bytes: { + const upb_StringView* start = _upb_array_constptr(arr); + const upb_StringView* ptr = start + arr->size; + do { + ptr--; + encode_bytes(e, ptr->data, ptr->size); + encode_varint(e, ptr->size); + encode_tag(e, f->number, kUpb_WireType_Delimited); + } while (ptr != start); + return; + } + case kUpb_FieldType_Group: { + const upb_TaggedMessagePtr* start = _upb_array_constptr(arr); + const upb_TaggedMessagePtr* ptr = start + arr->size; + const upb_MiniTable* subm = subs[f->UPB_PRIVATE(submsg_index)].submsg; + if (--e->depth == 0) encode_err(e, kUpb_EncodeStatus_MaxDepthExceeded); + do { + size_t size; + ptr--; + encode_tag(e, f->number, kUpb_WireType_EndGroup); + encode_TaggedMessagePtr(e, *ptr, subm, &size); + encode_tag(e, f->number, kUpb_WireType_StartGroup); + } while (ptr != start); + e->depth++; + return; + } + case kUpb_FieldType_Message: { + const upb_TaggedMessagePtr* start = _upb_array_constptr(arr); + const upb_TaggedMessagePtr* ptr = start + arr->size; + const upb_MiniTable* subm = subs[f->UPB_PRIVATE(submsg_index)].submsg; + if (--e->depth == 0) encode_err(e, kUpb_EncodeStatus_MaxDepthExceeded); + do { + size_t size; + ptr--; + encode_TaggedMessagePtr(e, *ptr, subm, &size); + encode_varint(e, size); + encode_tag(e, f->number, kUpb_WireType_Delimited); + } while (ptr != start); + e->depth++; + return; } - data++; - len--; } +#undef VARINT_CASE - uint16_t count = 0; - upb_SubCounts sub_counts = {0, 0}; - const char* ret = upb_MtDecoder_Parse(decoder, data, len, ext, sizeof(*ext), - &count, &sub_counts); - if (!ret || count != 1) return NULL; - - upb_MiniTableField* f = &ext->field; - - f->mode |= kUpb_LabelFlags_IsExtension; - f->offset = 0; - f->presence = 0; - - if (extendee->ext & kUpb_ExtMode_IsMessageSet) { - // Extensions of MessageSet must be messages. - if (!upb_IsSubMessage(f)) return NULL; - - // Extensions of MessageSet must be non-repeating. - if ((f->mode & kUpb_FieldMode_Mask) == kUpb_FieldMode_Array) return NULL; + if (packed) { + encode_varint(e, e->limit - e->ptr - pre_len); + encode_tag(e, f->number, kUpb_WireType_Delimited); } - - ext->extendee = extendee; - ext->sub = sub; - - return ret; -} - -static const char* upb_MtDecoder_BuildMiniTableExtension( - upb_MtDecoder* const decoder, const char* const data, const size_t len, - upb_MiniTableExtension* const ext, const upb_MiniTable* const extendee, - const upb_MiniTableSub sub) { - if (UPB_SETJMP(decoder->base.err) != 0) return NULL; - return upb_MtDecoder_DoBuildMiniTableExtension(decoder, data, len, ext, - extendee, sub); } -const char* _upb_MiniTableExtension_Init(const char* data, size_t len, - upb_MiniTableExtension* ext, - const upb_MiniTable* extendee, - upb_MiniTableSub sub, - upb_MiniTablePlatform platform, - upb_Status* status) { - upb_MtDecoder decoder = { - .base = {.status = status}, - .arena = NULL, - .table = NULL, - .platform = platform, - }; - - return upb_MtDecoder_BuildMiniTableExtension(&decoder, data, len, ext, - extendee, sub); +static void encode_mapentry(upb_encstate* e, uint32_t number, + const upb_MiniTable* layout, + const upb_MapEntry* ent) { + const upb_MiniTableField* key_field = &layout->fields[0]; + const upb_MiniTableField* val_field = &layout->fields[1]; + size_t pre_len = e->limit - e->ptr; + size_t size; + encode_scalar(e, &ent->data.v, layout->subs, val_field); + encode_scalar(e, &ent->data.k, layout->subs, key_field); + size = (e->limit - e->ptr) - pre_len; + encode_varint(e, size); + encode_tag(e, number, kUpb_WireType_Delimited); } -upb_MiniTableExtension* _upb_MiniTableExtension_Build( - const char* data, size_t len, const upb_MiniTable* extendee, - upb_MiniTableSub sub, upb_MiniTablePlatform platform, upb_Arena* arena, - upb_Status* status) { - upb_MiniTableExtension* ext = - upb_Arena_Malloc(arena, sizeof(upb_MiniTableExtension)); - if (UPB_UNLIKELY(!ext)) return NULL; +static void encode_map(upb_encstate* e, const upb_Message* msg, + const upb_MiniTableSub* subs, + const upb_MiniTableField* f) { + const upb_Map* map = *UPB_PTR_AT(msg, f->offset, const upb_Map*); + const upb_MiniTable* layout = subs[f->UPB_PRIVATE(submsg_index)].submsg; + UPB_ASSERT(layout->field_count == 2); - const char* ptr = _upb_MiniTableExtension_Init(data, len, ext, extendee, sub, - platform, status); - if (UPB_UNLIKELY(!ptr)) return NULL; + if (map == NULL) return; - return ext; + if (e->options & kUpb_EncodeOption_Deterministic) { + _upb_sortedmap sorted; + _upb_mapsorter_pushmap(&e->sorter, + layout->fields[0].UPB_PRIVATE(descriptortype), map, + &sorted); + upb_MapEntry ent; + while (_upb_sortedmap_next(&e->sorter, map, &sorted, &ent)) { + encode_mapentry(e, f->number, layout, &ent); + } + _upb_mapsorter_popmap(&e->sorter, &sorted); + } else { + intptr_t iter = UPB_STRTABLE_BEGIN; + upb_StringView key; + upb_value val; + while (upb_strtable_next2(&map->table, &key, &val, &iter)) { + upb_MapEntry ent; + _upb_map_fromkey(key, &ent.data.k, map->key_size); + _upb_map_fromvalue(val, &ent.data.v, map->val_size); + encode_mapentry(e, f->number, layout, &ent); + } + } } -upb_MiniTable* _upb_MiniTable_Build(const char* data, size_t len, - upb_MiniTablePlatform platform, - upb_Arena* arena, upb_Status* status) { - void* buf = NULL; - size_t size = 0; - upb_MiniTable* ret = upb_MiniTable_BuildWithBuf(data, len, platform, arena, - &buf, &size, status); - free(buf); - return ret; +static bool encode_shouldencode(upb_encstate* e, const upb_Message* msg, + const upb_MiniTableSub* subs, + const upb_MiniTableField* f) { + if (f->presence == 0) { + /* Proto3 presence or map/array. */ + const void* mem = UPB_PTR_AT(msg, f->offset, void); + switch (_upb_MiniTableField_GetRep(f)) { + case kUpb_FieldRep_1Byte: { + char ch; + memcpy(&ch, mem, 1); + return ch != 0; + } + case kUpb_FieldRep_4Byte: { + uint32_t u32; + memcpy(&u32, mem, 4); + return u32 != 0; + } + case kUpb_FieldRep_8Byte: { + uint64_t u64; + memcpy(&u64, mem, 8); + return u64 != 0; + } + case kUpb_FieldRep_StringView: { + const upb_StringView* str = (const upb_StringView*)mem; + return str->size != 0; + } + default: + UPB_UNREACHABLE(); + } + } else if (f->presence > 0) { + /* Proto2 presence: hasbit. */ + return _upb_hasbit_field(msg, f); + } else { + /* Field is in a oneof. */ + return _upb_getoneofcase_field(msg, f) == f->number; + } } - -// Must be last. - -bool upb_MiniTable_SetSubMessage(upb_MiniTable* table, - upb_MiniTableField* field, - const upb_MiniTable* sub) { - UPB_ASSERT((uintptr_t)table->fields <= (uintptr_t)field && - (uintptr_t)field < - (uintptr_t)(table->fields + table->field_count)); - UPB_ASSERT(sub); - - const bool sub_is_map = sub->ext & kUpb_ExtMode_IsMapEntry; - - switch (field->UPB_PRIVATE(descriptortype)) { - case kUpb_FieldType_Message: - if (sub_is_map) { - const bool table_is_map = table->ext & kUpb_ExtMode_IsMapEntry; - if (UPB_UNLIKELY(table_is_map)) return false; - - field->mode = (field->mode & ~kUpb_FieldMode_Mask) | kUpb_FieldMode_Map; - } +static void encode_field(upb_encstate* e, const upb_Message* msg, + const upb_MiniTableSub* subs, + const upb_MiniTableField* field) { + switch (upb_FieldMode_Get(field)) { + case kUpb_FieldMode_Array: + encode_array(e, msg, subs, field); break; - - case kUpb_FieldType_Group: - if (UPB_UNLIKELY(sub_is_map)) return false; + case kUpb_FieldMode_Map: + encode_map(e, msg, subs, field); + break; + case kUpb_FieldMode_Scalar: + encode_scalar(e, UPB_PTR_AT(msg, field->offset, void), subs, field); break; - default: - return false; + UPB_UNREACHABLE(); } - - upb_MiniTableSub* table_sub = - (void*)&table->subs[field->UPB_PRIVATE(submsg_index)]; - // TODO(haberman): Add this assert back once YouTube is updated to not call - // this function repeatedly. - // UPB_ASSERT(table_sub->submsg == &_kUpb_MiniTable_Empty); - table_sub->submsg = sub; - return true; } -bool upb_MiniTable_SetSubEnum(upb_MiniTable* table, upb_MiniTableField* field, - const upb_MiniTableEnum* sub) { - UPB_ASSERT((uintptr_t)table->fields <= (uintptr_t)field && - (uintptr_t)field < - (uintptr_t)(table->fields + table->field_count)); - UPB_ASSERT(sub); - - upb_MiniTableSub* table_sub = - (void*)&table->subs[field->UPB_PRIVATE(submsg_index)]; - table_sub->subenum = sub; - return true; +static void encode_msgset_item(upb_encstate* e, + const upb_Message_Extension* ext) { + size_t size; + encode_tag(e, kUpb_MsgSet_Item, kUpb_WireType_EndGroup); + encode_message(e, ext->data.ptr, ext->ext->sub.submsg, &size); + encode_varint(e, size); + encode_tag(e, kUpb_MsgSet_Message, kUpb_WireType_Delimited); + encode_varint(e, ext->ext->field.number); + encode_tag(e, kUpb_MsgSet_TypeId, kUpb_WireType_Varint); + encode_tag(e, kUpb_MsgSet_Item, kUpb_WireType_StartGroup); } -uint32_t upb_MiniTable_GetSubList(const upb_MiniTable* mt, - const upb_MiniTableField** subs) { - uint32_t msg_count = 0; - uint32_t enum_count = 0; - - for (int i = 0; i < mt->field_count; i++) { - const upb_MiniTableField* f = &mt->fields[i]; - if (upb_MiniTableField_CType(f) == kUpb_CType_Message) { - *subs = f; - ++subs; - msg_count++; - } +static void encode_ext(upb_encstate* e, const upb_Message_Extension* ext, + bool is_message_set) { + if (UPB_UNLIKELY(is_message_set)) { + encode_msgset_item(e, ext); + } else { + encode_field(e, &ext->data, &ext->ext->sub, &ext->ext->field); } +} - for (int i = 0; i < mt->field_count; i++) { - const upb_MiniTableField* f = &mt->fields[i]; - if (upb_MiniTableField_CType(f) == kUpb_CType_Enum) { - *subs = f; - ++subs; - enum_count++; +static void encode_message(upb_encstate* e, const upb_Message* msg, + const upb_MiniTable* m, size_t* size) { + size_t pre_len = e->limit - e->ptr; + + if ((e->options & kUpb_EncodeOption_CheckRequired) && m->required_count) { + uint64_t msg_head; + memcpy(&msg_head, msg, 8); + msg_head = _upb_BigEndian_Swap64(msg_head); + if (upb_MiniTable_requiredmask(m) & ~msg_head) { + encode_err(e, kUpb_EncodeStatus_MissingRequired); } } - return (msg_count << 16) | enum_count; -} + if ((e->options & kUpb_EncodeOption_SkipUnknown) == 0) { + size_t unknown_size; + const char* unknown = upb_Message_GetUnknown(msg, &unknown_size); -// The list of sub_tables and sub_enums must exactly match the number and order -// of sub-message fields and sub-enum fields given by upb_MiniTable_GetSubList() -// above. -bool upb_MiniTable_Link(upb_MiniTable* mt, const upb_MiniTable** sub_tables, - size_t sub_table_count, - const upb_MiniTableEnum** sub_enums, - size_t sub_enum_count) { - uint32_t msg_count = 0; - uint32_t enum_count = 0; + if (unknown) { + encode_bytes(e, unknown, unknown_size); + } + } - for (int i = 0; i < mt->field_count; i++) { - upb_MiniTableField* f = (upb_MiniTableField*)&mt->fields[i]; - if (upb_MiniTableField_CType(f) == kUpb_CType_Message) { - const upb_MiniTable* sub = sub_tables[msg_count++]; - if (msg_count > sub_table_count) return false; - if (sub != NULL) { - if (!upb_MiniTable_SetSubMessage(mt, f, sub)) return false; + if (m->ext != kUpb_ExtMode_NonExtendable) { + /* Encode all extensions together. Unlike C++, we do not attempt to keep + * these in field number order relative to normal fields or even to each + * other. */ + size_t ext_count; + const upb_Message_Extension* ext = _upb_Message_Getexts(msg, &ext_count); + if (ext_count) { + if (e->options & kUpb_EncodeOption_Deterministic) { + _upb_sortedmap sorted; + _upb_mapsorter_pushexts(&e->sorter, ext, ext_count, &sorted); + while (_upb_sortedmap_nextext(&e->sorter, &sorted, &ext)) { + encode_ext(e, ext, m->ext == kUpb_ExtMode_IsMessageSet); + } + _upb_mapsorter_popmap(&e->sorter, &sorted); + } else { + const upb_Message_Extension* end = ext + ext_count; + for (; ext != end; ext++) { + encode_ext(e, ext, m->ext == kUpb_ExtMode_IsMessageSet); + } } } } - for (int i = 0; i < mt->field_count; i++) { - upb_MiniTableField* f = (upb_MiniTableField*)&mt->fields[i]; - if (upb_MiniTableField_IsClosedEnum(f)) { - const upb_MiniTableEnum* sub = sub_enums[enum_count++]; - if (enum_count > sub_enum_count) return false; - if (sub != NULL) { - if (!upb_MiniTable_SetSubEnum(mt, f, sub)) return false; + if (m->field_count) { + const upb_MiniTableField* f = &m->fields[m->field_count]; + const upb_MiniTableField* first = &m->fields[0]; + while (f != first) { + f--; + if (encode_shouldencode(e, msg, m->subs, f)) { + encode_field(e, msg, m->subs, f); } } } - return true; -} - - -const struct upb_MiniTable _kUpb_MiniTable_Empty = { - .subs = NULL, - .fields = NULL, - .size = 0, - .field_count = 0, - .ext = kUpb_ExtMode_NonExtendable, - .dense_below = 0, - .table_mask = -1, - .required_count = 0, -}; - - - -// Must be last. - -#define EXTREG_KEY_SIZE (sizeof(upb_MiniTable*) + sizeof(uint32_t)) - -struct upb_ExtensionRegistry { - upb_Arena* arena; - upb_strtable exts; // Key is upb_MiniTable* concatenated with fieldnum. -}; - -static void extreg_key(char* buf, const upb_MiniTable* l, uint32_t fieldnum) { - memcpy(buf, &l, sizeof(l)); - memcpy(buf + sizeof(l), &fieldnum, sizeof(fieldnum)); + *size = (e->limit - e->ptr) - pre_len; } -upb_ExtensionRegistry* upb_ExtensionRegistry_New(upb_Arena* arena) { - upb_ExtensionRegistry* r = upb_Arena_Malloc(arena, sizeof(*r)); - if (!r) return NULL; - r->arena = arena; - if (!upb_strtable_init(&r->exts, 8, arena)) return NULL; - return r; -} +static upb_EncodeStatus upb_Encoder_Encode(upb_encstate* const encoder, + const void* const msg, + const upb_MiniTable* const l, + char** const buf, + size_t* const size) { + // Unfortunately we must continue to perform hackery here because there are + // code paths which blindly copy the returned pointer without bothering to + // check for errors until much later (b/235839510). So we still set *buf to + // NULL on error and we still set it to non-NULL on a successful empty result. + if (UPB_SETJMP(encoder->err) == 0) { + encode_message(encoder, msg, l, size); + *size = encoder->limit - encoder->ptr; + if (*size == 0) { + static char ch; + *buf = &ch; + } else { + UPB_ASSERT(encoder->ptr); + *buf = encoder->ptr; + } + } else { + UPB_ASSERT(encoder->status != kUpb_EncodeStatus_Ok); + *buf = NULL; + *size = 0; + } -UPB_API bool upb_ExtensionRegistry_Add(upb_ExtensionRegistry* r, - const upb_MiniTableExtension* e) { - char buf[EXTREG_KEY_SIZE]; - extreg_key(buf, e->extendee, e->field.number); - if (upb_strtable_lookup2(&r->exts, buf, EXTREG_KEY_SIZE, NULL)) return false; - return upb_strtable_insert(&r->exts, buf, EXTREG_KEY_SIZE, - upb_value_constptr(e), r->arena); + _upb_mapsorter_destroy(&encoder->sorter); + return encoder->status; } -bool upb_ExtensionRegistry_AddArray(upb_ExtensionRegistry* r, - const upb_MiniTableExtension** e, - size_t count) { - const upb_MiniTableExtension** start = e; - const upb_MiniTableExtension** end = UPB_PTRADD(e, count); - for (; e < end; e++) { - if (!upb_ExtensionRegistry_Add(r, *e)) goto failure; - } - return true; +upb_EncodeStatus upb_Encode(const void* msg, const upb_MiniTable* l, + int options, upb_Arena* arena, char** buf, + size_t* size) { + upb_encstate e; + unsigned depth = (unsigned)options >> 16; -failure: - // Back out the entries previously added. - for (end = e, e = start; e < end; e++) { - const upb_MiniTableExtension* ext = *e; - char buf[EXTREG_KEY_SIZE]; - extreg_key(buf, ext->extendee, ext->field.number); - upb_strtable_remove2(&r->exts, buf, EXTREG_KEY_SIZE, NULL); - } - return false; -} + e.status = kUpb_EncodeStatus_Ok; + e.arena = arena; + e.buf = NULL; + e.limit = NULL; + e.ptr = NULL; + e.depth = depth ? depth : kUpb_WireFormat_DefaultDepthLimit; + e.options = options; + _upb_mapsorter_init(&e.sorter); -const upb_MiniTableExtension* upb_ExtensionRegistry_Lookup( - const upb_ExtensionRegistry* r, const upb_MiniTable* t, uint32_t num) { - char buf[EXTREG_KEY_SIZE]; - upb_value v; - extreg_key(buf, t, num); - if (upb_strtable_lookup2(&r->exts, buf, EXTREG_KEY_SIZE, &v)) { - return upb_value_getconstptr(v); - } else { - return NULL; - } + return upb_Encoder_Encode(&e, msg, l, buf, size); } -#include - // Must be last. -const upb_MiniTableField* upb_MiniTable_FindFieldByNumber( - const upb_MiniTable* t, uint32_t number) { - const size_t i = ((size_t)number) - 1; // 0 wraps to SIZE_MAX - - // Ideal case: index into dense fields - if (i < t->dense_below) { - UPB_ASSERT(t->fields[i].number == number); - return &t->fields[i]; - } - - // Slow case: binary search - int lo = t->dense_below; - int hi = t->field_count - 1; - while (lo <= hi) { - int mid = (lo + hi) / 2; - uint32_t num = t->fields[mid].number; - if (num < number) { - lo = mid + 1; - continue; - } - if (num > number) { - hi = mid - 1; - continue; - } - return &t->fields[mid]; - } - return NULL; -} - -static bool upb_MiniTable_Is_Oneof(const upb_MiniTableField* f) { - return f->presence < 0; -} - -const upb_MiniTableField* upb_MiniTable_GetOneof(const upb_MiniTable* m, - const upb_MiniTableField* f) { - if (UPB_UNLIKELY(!upb_MiniTable_Is_Oneof(f))) { - return NULL; - } - const upb_MiniTableField* ptr = &m->fields[0]; - const upb_MiniTableField* end = &m->fields[m->field_count]; - while (++ptr < end) { - if (ptr->presence == (*f).presence) { - return ptr; +UPB_NOINLINE _upb_WireReader_ReadLongVarintRet +_upb_WireReader_ReadLongVarint(const char* ptr, uint64_t val) { + _upb_WireReader_ReadLongVarintRet ret = {NULL, 0}; + uint64_t byte; + int i; + for (i = 1; i < 10; i++) { + byte = (uint8_t)ptr[i]; + val += (byte - 1) << (i * 7); + if (!(byte & 0x80)) { + ret.ptr = ptr + i + 1; + ret.val = val; + return ret; } } - return NULL; + return ret; } -bool upb_MiniTable_NextOneofField(const upb_MiniTable* m, - const upb_MiniTableField** f) { - const upb_MiniTableField* ptr = *f; - const upb_MiniTableField* end = &m->fields[m->field_count]; - while (++ptr < end) { - if (ptr->presence == (*f)->presence) { - *f = ptr; - return true; - } +const char* _upb_WireReader_SkipGroup(const char* ptr, uint32_t tag, + int depth_limit, + upb_EpsCopyInputStream* stream) { + if (--depth_limit == 0) return NULL; + uint32_t end_group_tag = (tag & ~7ULL) | kUpb_WireType_EndGroup; + while (!upb_EpsCopyInputStream_IsDone(stream, &ptr)) { + uint32_t tag; + ptr = upb_WireReader_ReadTag(ptr, &tag); + if (!ptr) return NULL; + if (tag == end_group_tag) return ptr; + ptr = _upb_WireReader_SkipValue(ptr, tag, depth_limit, stream); + if (!ptr) return NULL; } - return false; + return ptr; } // This should #undef all macros #defined in def.inc diff --git a/php/ext/google/protobuf/php-upb.h b/php/ext/google/protobuf/php-upb.h index d56d7f714f..b646e10cb3 100644 --- a/php/ext/google/protobuf/php-upb.h +++ b/php/ext/google/protobuf/php-upb.h @@ -357,8 +357,8 @@ void upb_Status_VAppendErrorFormat(upb_Status* status, const char* fmt, #endif /* UPB_BASE_STATUS_H_ */ -#ifndef UPB_INTERNAL_ARRAY_INTERNAL_H_ -#define UPB_INTERNAL_ARRAY_INTERNAL_H_ +#ifndef UPB_COLLECTIONS_INTERNAL_ARRAY_H_ +#define UPB_COLLECTIONS_INTERNAL_ARRAY_H_ #include @@ -1364,7 +1364,7 @@ UPB_INLINE void _upb_array_detach(const void* msg, size_t ofs) { #endif -#endif /* UPB_INTERNAL_ARRAY_INTERNAL_H_ */ +#endif /* UPB_COLLECTIONS_INTERNAL_ARRAY_H_ */ #ifndef UPB_COLLECTIONS_MAP_H_ #define UPB_COLLECTIONS_MAP_H_ @@ -1477,8 +1477,8 @@ UPB_API upb_MessageValue upb_MapIterator_Value(const upb_Map* map, size_t iter); // EVERYTHING BELOW THIS LINE IS INTERNAL - DO NOT USE ///////////////////////// -#ifndef UPB_COLLECTIONS_MAP_INTERNAL_H_ -#define UPB_COLLECTIONS_MAP_INTERNAL_H_ +#ifndef UPB_COLLECTIONS_INTERNAL_MAP_H_ +#define UPB_COLLECTIONS_INTERNAL_MAP_H_ #ifndef UPB_HASH_STR_TABLE_H_ @@ -1914,47 +1914,18 @@ upb_Map* _upb_Map_New(upb_Arena* a, size_t key_size, size_t value_size); #endif -#endif /* UPB_COLLECTIONS_MAP_INTERNAL_H_ */ - -#ifndef UPB_BASE_LOG2_H_ -#define UPB_BASE_LOG2_H_ - -// Must be last. - -#ifdef __cplusplus -extern "C" { -#endif - -UPB_INLINE int upb_Log2Ceiling(int x) { - if (x <= 1) return 0; -#ifdef __GNUC__ - return 32 - __builtin_clz(x - 1); -#else - int lg2 = 0; - while ((1 << lg2) < x) lg2++; - return lg2; -#endif -} - -UPB_INLINE int upb_Log2CeilingSize(int x) { return 1 << upb_Log2Ceiling(x); } - -#ifdef __cplusplus -} /* extern "C" */ -#endif - - -#endif /* UPB_BASE_LOG2_H_ */ +#endif /* UPB_COLLECTIONS_INTERNAL_MAP_H_ */ // EVERYTHING BELOW THIS LINE IS INTERNAL - DO NOT USE ///////////////////////// -#ifndef UPB_COLLECTIONS_MAP_SORTER_INTERNAL_H_ -#define UPB_COLLECTIONS_MAP_SORTER_INTERNAL_H_ +#ifndef UPB_COLLECTIONS_INTERNAL_MAP_SORTER_H_ +#define UPB_COLLECTIONS_INTERNAL_MAP_SORTER_H_ #include -#ifndef UPB_MESSAGE_EXTENSION_INTERNAL_H_ -#define UPB_MESSAGE_EXTENSION_INTERNAL_H_ +#ifndef UPB_MESSAGE_INTERNAL_EXTENSION_H_ +#define UPB_MESSAGE_INTERNAL_EXTENSION_H_ // Public APIs for message operations that do not depend on the schema. @@ -2068,7 +2039,7 @@ const upb_Message_Extension* _upb_Message_Getext( #endif -#endif /* UPB_MESSAGE_EXTENSION_INTERNAL_H_ */ +#endif /* UPB_MESSAGE_INTERNAL_EXTENSION_H_ */ #ifndef UPB_MINI_TABLE_INTERNAL_MAP_ENTRY_DATA_H_ #define UPB_MINI_TABLE_INTERNAL_MAP_ENTRY_DATA_H_ @@ -2108,7 +2079,7 @@ typedef struct { // require 8-byte alignment. double d; }; - // LINT.ThenChange(//depot/google3/third_party/upb/upb/message/internal.h:internal_layout) + // LINT.ThenChange(//depot/google3/third_party/upb/upb/message/internal/message.h:internal_layout) upb_MapEntryData data; } upb_MapEntry; @@ -2182,7 +2153,36 @@ bool _upb_mapsorter_pushexts(_upb_mapsorter* s, #endif -#endif /* UPB_COLLECTIONS_MAP_SORTER_INTERNAL_H_ */ +#endif /* UPB_COLLECTIONS_INTERNAL_MAP_SORTER_H_ */ + +#ifndef UPB_BASE_LOG2_H_ +#define UPB_BASE_LOG2_H_ + +// Must be last. + +#ifdef __cplusplus +extern "C" { +#endif + +UPB_INLINE int upb_Log2Ceiling(int x) { + if (x <= 1) return 0; +#ifdef __GNUC__ + return 32 - __builtin_clz(x - 1); +#else + int lg2 = 0; + while ((1 << lg2) < x) lg2++; + return lg2; +#endif +} + +UPB_INLINE int upb_Log2CeilingSize(int x) { return 1 << upb_Log2Ceiling(x); } + +#ifdef __cplusplus +} /* extern "C" */ +#endif + + +#endif /* UPB_BASE_LOG2_H_ */ #ifndef UPB_REFLECTION_DEF_H_ #define UPB_REFLECTION_DEF_H_ @@ -2280,6 +2280,10 @@ UPB_INLINE void _upb_msg_map_set_value(void* msg, const void* val, #define UPB_MESSAGE_ACCESSORS_H_ +#ifndef UPB_MESSAGE_INTERNAL_ACCESSORS_H_ +#define UPB_MESSAGE_INTERNAL_ACCESSORS_H_ + + /* ** Our memory representation for parsing tables and messages themselves. ** Functions in this file are used by generated code and possibly reflection. @@ -2459,10 +2463,6 @@ bool _upb_Message_AddUnknown(upb_Message* msg, const char* data, size_t len, #endif /* UPB_MESSAGE_INTERNAL_H_ */ -#ifndef UPB_MESSAGE_INTERNAL_ACCESSORS_H_ -#define UPB_MESSAGE_INTERNAL_ACCESSORS_H_ - - // Must be last. #if defined(__GNUC__) && !defined(__clang__) @@ -11952,123 +11952,53 @@ UPB_INLINE const char* upb_WireReader_SkipValue( #endif // UPB_WIRE_READER_H_ -#ifndef UPB_REFLECTION_DEF_BUILDER_INTERNAL_H_ -#define UPB_REFLECTION_DEF_BUILDER_INTERNAL_H_ +#ifndef UPB_MINI_DESCRIPTOR_INTERNAL_DECODER_H_ +#define UPB_MINI_DESCRIPTOR_INTERNAL_DECODER_H_ -// Must be last. +#ifndef UPB_MINI_DESCRIPTOR_INTERNAL_BASE92_H_ +#define UPB_MINI_DESCRIPTOR_INTERNAL_BASE92_H_ -// We want to copy the options verbatim into the destination options proto. -// We use serialize+parse as our deep copy. -#define UPB_DEF_SET_OPTIONS(target, desc_type, options_type, proto) \ - if (UPB_DESC(desc_type##_has_options)(proto)) { \ - size_t size; \ - char* pb = UPB_DESC(options_type##_serialize)( \ - UPB_DESC(desc_type##_options)(proto), ctx->tmp_arena, &size); \ - if (!pb) _upb_DefBuilder_OomErr(ctx); \ - target = \ - UPB_DESC(options_type##_parse)(pb, size, _upb_DefBuilder_Arena(ctx)); \ - if (!target) _upb_DefBuilder_OomErr(ctx); \ - } else { \ - target = (const UPB_DESC(options_type)*)kUpbDefOptDefault; \ - } + +// Must be last. #ifdef __cplusplus extern "C" { #endif -struct upb_DefBuilder { - upb_DefPool* symtab; - upb_FileDef* file; // File we are building. - upb_Arena* arena; // Allocate defs here. - upb_Arena* tmp_arena; // For temporary allocations. - upb_Status* status; // Record errors here. - const upb_MiniTableFile* layout; // NULL if we should build layouts. - upb_MiniTablePlatform platform; // Platform we are targeting. - int enum_count; // Count of enums built so far. - int msg_count; // Count of messages built so far. - int ext_count; // Count of extensions built so far. - jmp_buf err; // longjmp() on error. -}; - -extern const char* kUpbDefOptDefault; - -// ctx->status has already been set elsewhere so just fail/longjmp() -UPB_NORETURN void _upb_DefBuilder_FailJmp(upb_DefBuilder* ctx); - -UPB_NORETURN void _upb_DefBuilder_Errf(upb_DefBuilder* ctx, const char* fmt, - ...) UPB_PRINTF(2, 3); -UPB_NORETURN void _upb_DefBuilder_OomErr(upb_DefBuilder* ctx); - -const char* _upb_DefBuilder_MakeFullName(upb_DefBuilder* ctx, - const char* prefix, - upb_StringView name); - -// Given a symbol and the base symbol inside which it is defined, -// find the symbol's definition. -const void* _upb_DefBuilder_ResolveAny(upb_DefBuilder* ctx, - const char* from_name_dbg, - const char* base, upb_StringView sym, - upb_deftype_t* type); - -const void* _upb_DefBuilder_Resolve(upb_DefBuilder* ctx, - const char* from_name_dbg, const char* base, - upb_StringView sym, upb_deftype_t type); - -char _upb_DefBuilder_ParseEscape(upb_DefBuilder* ctx, const upb_FieldDef* f, - const char** src, const char* end); - -const char* _upb_DefBuilder_FullToShort(const char* fullname); - -UPB_INLINE void* _upb_DefBuilder_Alloc(upb_DefBuilder* ctx, size_t bytes) { - if (bytes == 0) return NULL; - void* ret = upb_Arena_Malloc(ctx->arena, bytes); - if (!ret) _upb_DefBuilder_OomErr(ctx); - return ret; -} - -// Adds a symbol |v| to the symtab, which must be a def pointer previously -// packed with pack_def(). The def's pointer to upb_FileDef* must be set before -// adding, so we know which entries to remove if building this file fails. -UPB_INLINE void _upb_DefBuilder_Add(upb_DefBuilder* ctx, const char* name, - upb_value v) { - upb_StringView sym = {.data = name, .size = strlen(name)}; - bool ok = _upb_DefPool_InsertSym(ctx->symtab, sym, v, ctx->status); - if (!ok) _upb_DefBuilder_FailJmp(ctx); -} - -UPB_INLINE upb_Arena* _upb_DefBuilder_Arena(const upb_DefBuilder* ctx) { - return ctx->arena; +UPB_INLINE char _upb_ToBase92(int8_t ch) { + extern const char _kUpb_ToBase92[]; + UPB_ASSERT(0 <= ch && ch < 92); + return _kUpb_ToBase92[ch]; } -UPB_INLINE upb_FileDef* _upb_DefBuilder_File(const upb_DefBuilder* ctx) { - return ctx->file; +UPB_INLINE char _upb_FromBase92(uint8_t ch) { + extern const int8_t _kUpb_FromBase92[]; + if (' ' > ch || ch > '~') return -1; + return _kUpb_FromBase92[ch - ' ']; } -// This version of CheckIdent() is only called by other, faster versions after -// they detect a parsing error. -void _upb_DefBuilder_CheckIdentSlow(upb_DefBuilder* ctx, upb_StringView name, - bool full); - -// Verify a full identifier string. This is slightly more complicated than -// verifying a relative identifier string because we must track '.' chars. -UPB_INLINE void _upb_DefBuilder_CheckIdentFull(upb_DefBuilder* ctx, - upb_StringView name) { - bool good = name.size > 0; - bool start = true; - - for (size_t i = 0; i < name.size; i++) { - const char c = name.data[i]; - const char d = c | 0x20; // force lowercase - const bool is_alpha = (('a' <= d) & (d <= 'z')) | (c == '_'); - const bool is_numer = ('0' <= c) & (c <= '9') & !start; - const bool is_dot = (c == '.') & !start; - - good &= is_alpha | is_numer | is_dot; - start = is_dot; +UPB_INLINE const char* _upb_Base92_DecodeVarint(const char* ptr, + const char* end, char first_ch, + uint8_t min, uint8_t max, + uint32_t* out_val) { + uint32_t val = 0; + uint32_t shift = 0; + const int bits_per_char = + upb_Log2Ceiling(_upb_FromBase92(max) - _upb_FromBase92(min)); + char ch = first_ch; + while (1) { + uint32_t bits = _upb_FromBase92(ch) - _upb_FromBase92(min); + val |= bits << shift; + if (ptr == end || *ptr < min || max < *ptr) { + *out_val = val; + UPB_ASSUME(ptr != NULL); + return ptr; + } + ch = *ptr++; + shift += bits_per_char; + if (shift >= 32) return NULL; } - - if (!good) _upb_DefBuilder_CheckIdentSlow(ctx, name, true); } #ifdef __cplusplus @@ -12076,34 +12006,364 @@ UPB_INLINE void _upb_DefBuilder_CheckIdentFull(upb_DefBuilder* ctx, #endif -#endif /* UPB_REFLECTION_DEF_BUILDER_INTERNAL_H_ */ - -#ifndef UPB_REFLECTION_ENUM_DEF_INTERNAL_H_ -#define UPB_REFLECTION_ENUM_DEF_INTERNAL_H_ - +#endif // UPB_MINI_DESCRIPTOR_INTERNAL_BASE92_H_ // Must be last. -#ifdef __cplusplus -extern "C" { -#endif +// upb_MdDecoder: used internally for decoding MiniDescriptors for messages, +// extensions, and enums. +typedef struct { + const char* end; + upb_Status* status; + jmp_buf err; +} upb_MdDecoder; -upb_EnumDef* _upb_EnumDef_At(const upb_EnumDef* e, int i); -bool _upb_EnumDef_Insert(upb_EnumDef* e, upb_EnumValueDef* v, upb_Arena* a); -const upb_MiniTableEnum* _upb_EnumDef_MiniTable(const upb_EnumDef* e); +UPB_PRINTF(2, 3) +UPB_NORETURN UPB_INLINE void upb_MdDecoder_ErrorJmp(upb_MdDecoder* d, + const char* fmt, ...) { + if (d->status) { + va_list argp; + upb_Status_SetErrorMessage(d->status, "Error building mini table: "); + va_start(argp, fmt); + upb_Status_VAppendErrorFormat(d->status, fmt, argp); + va_end(argp); + } + UPB_LONGJMP(d->err, 1); +} -// Allocate and initialize an array of |n| enum defs. -upb_EnumDef* _upb_EnumDefs_New( - upb_DefBuilder* ctx, int n, - const UPB_DESC(EnumDescriptorProto) * const* protos, - const upb_MessageDef* containing_type); +UPB_INLINE void upb_MdDecoder_CheckOutOfMemory(upb_MdDecoder* d, + const void* ptr) { + if (!ptr) upb_MdDecoder_ErrorJmp(d, "Out of memory"); +} -#ifdef __cplusplus -} /* extern "C" */ -#endif +UPB_INLINE const char* upb_MdDecoder_DecodeBase92Varint( + upb_MdDecoder* d, const char* ptr, char first_ch, uint8_t min, uint8_t max, + uint32_t* out_val) { + ptr = _upb_Base92_DecodeVarint(ptr, d->end, first_ch, min, max, out_val); + if (!ptr) upb_MdDecoder_ErrorJmp(d, "Overlong varint"); + return ptr; +} -#endif /* UPB_REFLECTION_ENUM_DEF_INTERNAL_H_ */ +#endif // UPB_MINI_DESCRIPTOR_INTERNAL_DECODER_H_ + +#ifndef UPB_MINI_DESCRIPTOR_INTERNAL_WIRE_CONSTANTS_H_ +#define UPB_MINI_DESCRIPTOR_INTERNAL_WIRE_CONSTANTS_H_ + + +// Must be last. + +typedef enum { + kUpb_EncodedType_Double = 0, + kUpb_EncodedType_Float = 1, + kUpb_EncodedType_Fixed32 = 2, + kUpb_EncodedType_Fixed64 = 3, + kUpb_EncodedType_SFixed32 = 4, + kUpb_EncodedType_SFixed64 = 5, + kUpb_EncodedType_Int32 = 6, + kUpb_EncodedType_UInt32 = 7, + kUpb_EncodedType_SInt32 = 8, + kUpb_EncodedType_Int64 = 9, + kUpb_EncodedType_UInt64 = 10, + kUpb_EncodedType_SInt64 = 11, + kUpb_EncodedType_OpenEnum = 12, + kUpb_EncodedType_Bool = 13, + kUpb_EncodedType_Bytes = 14, + kUpb_EncodedType_String = 15, + kUpb_EncodedType_Group = 16, + kUpb_EncodedType_Message = 17, + kUpb_EncodedType_ClosedEnum = 18, + + kUpb_EncodedType_RepeatedBase = 20, +} upb_EncodedType; + +typedef enum { + kUpb_EncodedFieldModifier_FlipPacked = 1 << 0, + kUpb_EncodedFieldModifier_IsRequired = 1 << 1, + kUpb_EncodedFieldModifier_IsProto3Singular = 1 << 2, +} upb_EncodedFieldModifier; + +enum { + kUpb_EncodedValue_MinField = ' ', + kUpb_EncodedValue_MaxField = 'I', + kUpb_EncodedValue_MinModifier = 'L', + kUpb_EncodedValue_MaxModifier = '[', + kUpb_EncodedValue_End = '^', + kUpb_EncodedValue_MinSkip = '_', + kUpb_EncodedValue_MaxSkip = '~', + kUpb_EncodedValue_OneofSeparator = '~', + kUpb_EncodedValue_FieldSeparator = '|', + kUpb_EncodedValue_MinOneofField = ' ', + kUpb_EncodedValue_MaxOneofField = 'b', + kUpb_EncodedValue_MaxEnumMask = 'A', +}; + +enum { + kUpb_EncodedVersion_EnumV1 = '!', + kUpb_EncodedVersion_ExtensionV1 = '#', + kUpb_EncodedVersion_MapV1 = '%', + kUpb_EncodedVersion_MessageV1 = '$', + kUpb_EncodedVersion_MessageSetV1 = '&', +}; + + +#endif // UPB_MINI_DESCRIPTOR_INTERNAL_WIRE_CONSTANTS_H_ + +#ifndef UPB_MINI_DESCRIPTOR_INTERNAL_MODIFIERS_H_ +#define UPB_MINI_DESCRIPTOR_INTERNAL_MODIFIERS_H_ + +// Must be last. + +typedef enum { + kUpb_FieldModifier_IsRepeated = 1 << 0, + kUpb_FieldModifier_IsPacked = 1 << 1, + kUpb_FieldModifier_IsClosedEnum = 1 << 2, + kUpb_FieldModifier_IsProto3Singular = 1 << 3, + kUpb_FieldModifier_IsRequired = 1 << 4, +} kUpb_FieldModifier; + +typedef enum { + kUpb_MessageModifier_ValidateUtf8 = 1 << 0, + kUpb_MessageModifier_DefaultIsPacked = 1 << 1, + kUpb_MessageModifier_IsExtendable = 1 << 2, +} kUpb_MessageModifier; + + +#endif // UPB_MINI_DESCRIPTOR_INTERNAL_MODIFIERS_H_ + +#ifndef UPB_MINI_DESCRIPTOR_INTERNAL_ENCODE_H_ +#define UPB_MINI_DESCRIPTOR_INTERNAL_ENCODE_H_ + + +// Must be last. + +// If the input buffer has at least this many bytes available, the encoder call +// is guaranteed to succeed (as long as field number order is maintained). +#define kUpb_MtDataEncoder_MinSize 16 + +typedef struct { + char* end; // Limit of the buffer passed as a parameter. + // Aliased to internal-only members in .cc. + char internal[32]; +} upb_MtDataEncoder; + +#ifdef __cplusplus +extern "C" { +#endif + +// Encodes field/oneof information for a given message. The sequence of calls +// should look like: +// +// upb_MtDataEncoder e; +// char buf[256]; +// char* ptr = buf; +// e.end = ptr + sizeof(buf); +// unit64_t msg_mod = ...; // bitwise & of kUpb_MessageModifiers or zero +// ptr = upb_MtDataEncoder_StartMessage(&e, ptr, msg_mod); +// // Fields *must* be in field number order. +// ptr = upb_MtDataEncoder_PutField(&e, ptr, ...); +// ptr = upb_MtDataEncoder_PutField(&e, ptr, ...); +// ptr = upb_MtDataEncoder_PutField(&e, ptr, ...); +// +// // If oneofs are present. Oneofs must be encoded after regular fields. +// ptr = upb_MiniTable_StartOneof(&e, ptr) +// ptr = upb_MiniTable_PutOneofField(&e, ptr, ...); +// ptr = upb_MiniTable_PutOneofField(&e, ptr, ...); +// +// ptr = upb_MiniTable_StartOneof(&e, ptr); +// ptr = upb_MiniTable_PutOneofField(&e, ptr, ...); +// ptr = upb_MiniTable_PutOneofField(&e, ptr, ...); +// +// Oneofs must be encoded after all regular fields. +char* upb_MtDataEncoder_StartMessage(upb_MtDataEncoder* e, char* ptr, + uint64_t msg_mod); +char* upb_MtDataEncoder_PutField(upb_MtDataEncoder* e, char* ptr, + upb_FieldType type, uint32_t field_num, + uint64_t field_mod); +char* upb_MtDataEncoder_StartOneof(upb_MtDataEncoder* e, char* ptr); +char* upb_MtDataEncoder_PutOneofField(upb_MtDataEncoder* e, char* ptr, + uint32_t field_num); + +// Encodes the set of values for a given enum. The values must be given in +// order (after casting to uint32_t), and repeats are not allowed. +char* upb_MtDataEncoder_StartEnum(upb_MtDataEncoder* e, char* ptr); +char* upb_MtDataEncoder_PutEnumValue(upb_MtDataEncoder* e, char* ptr, + uint32_t val); +char* upb_MtDataEncoder_EndEnum(upb_MtDataEncoder* e, char* ptr); + +// Encodes an entire mini descriptor for an extension. +char* upb_MtDataEncoder_EncodeExtension(upb_MtDataEncoder* e, char* ptr, + upb_FieldType type, uint32_t field_num, + uint64_t field_mod); + +// Encodes an entire mini descriptor for a map. +char* upb_MtDataEncoder_EncodeMap(upb_MtDataEncoder* e, char* ptr, + upb_FieldType key_type, + upb_FieldType value_type, uint64_t key_mod, + uint64_t value_mod); + +// Encodes an entire mini descriptor for a message set. +char* upb_MtDataEncoder_EncodeMessageSet(upb_MtDataEncoder* e, char* ptr); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + + +#endif /* UPB_MINI_DESCRIPTOR_INTERNAL_ENCODE_H_ */ + +#ifndef UPB_REFLECTION_DEF_BUILDER_INTERNAL_H_ +#define UPB_REFLECTION_DEF_BUILDER_INTERNAL_H_ + + +// Must be last. + +// We want to copy the options verbatim into the destination options proto. +// We use serialize+parse as our deep copy. +#define UPB_DEF_SET_OPTIONS(target, desc_type, options_type, proto) \ + if (UPB_DESC(desc_type##_has_options)(proto)) { \ + size_t size; \ + char* pb = UPB_DESC(options_type##_serialize)( \ + UPB_DESC(desc_type##_options)(proto), ctx->tmp_arena, &size); \ + if (!pb) _upb_DefBuilder_OomErr(ctx); \ + target = \ + UPB_DESC(options_type##_parse)(pb, size, _upb_DefBuilder_Arena(ctx)); \ + if (!target) _upb_DefBuilder_OomErr(ctx); \ + } else { \ + target = (const UPB_DESC(options_type)*)kUpbDefOptDefault; \ + } + +#ifdef __cplusplus +extern "C" { +#endif + +struct upb_DefBuilder { + upb_DefPool* symtab; + upb_FileDef* file; // File we are building. + upb_Arena* arena; // Allocate defs here. + upb_Arena* tmp_arena; // For temporary allocations. + upb_Status* status; // Record errors here. + const upb_MiniTableFile* layout; // NULL if we should build layouts. + upb_MiniTablePlatform platform; // Platform we are targeting. + int enum_count; // Count of enums built so far. + int msg_count; // Count of messages built so far. + int ext_count; // Count of extensions built so far. + jmp_buf err; // longjmp() on error. +}; + +extern const char* kUpbDefOptDefault; + +// ctx->status has already been set elsewhere so just fail/longjmp() +UPB_NORETURN void _upb_DefBuilder_FailJmp(upb_DefBuilder* ctx); + +UPB_NORETURN void _upb_DefBuilder_Errf(upb_DefBuilder* ctx, const char* fmt, + ...) UPB_PRINTF(2, 3); +UPB_NORETURN void _upb_DefBuilder_OomErr(upb_DefBuilder* ctx); + +const char* _upb_DefBuilder_MakeFullName(upb_DefBuilder* ctx, + const char* prefix, + upb_StringView name); + +// Given a symbol and the base symbol inside which it is defined, +// find the symbol's definition. +const void* _upb_DefBuilder_ResolveAny(upb_DefBuilder* ctx, + const char* from_name_dbg, + const char* base, upb_StringView sym, + upb_deftype_t* type); + +const void* _upb_DefBuilder_Resolve(upb_DefBuilder* ctx, + const char* from_name_dbg, const char* base, + upb_StringView sym, upb_deftype_t type); + +char _upb_DefBuilder_ParseEscape(upb_DefBuilder* ctx, const upb_FieldDef* f, + const char** src, const char* end); + +const char* _upb_DefBuilder_FullToShort(const char* fullname); + +UPB_INLINE void* _upb_DefBuilder_Alloc(upb_DefBuilder* ctx, size_t bytes) { + if (bytes == 0) return NULL; + void* ret = upb_Arena_Malloc(ctx->arena, bytes); + if (!ret) _upb_DefBuilder_OomErr(ctx); + return ret; +} + +// Adds a symbol |v| to the symtab, which must be a def pointer previously +// packed with pack_def(). The def's pointer to upb_FileDef* must be set before +// adding, so we know which entries to remove if building this file fails. +UPB_INLINE void _upb_DefBuilder_Add(upb_DefBuilder* ctx, const char* name, + upb_value v) { + upb_StringView sym = {.data = name, .size = strlen(name)}; + bool ok = _upb_DefPool_InsertSym(ctx->symtab, sym, v, ctx->status); + if (!ok) _upb_DefBuilder_FailJmp(ctx); +} + +UPB_INLINE upb_Arena* _upb_DefBuilder_Arena(const upb_DefBuilder* ctx) { + return ctx->arena; +} + +UPB_INLINE upb_FileDef* _upb_DefBuilder_File(const upb_DefBuilder* ctx) { + return ctx->file; +} + +// This version of CheckIdent() is only called by other, faster versions after +// they detect a parsing error. +void _upb_DefBuilder_CheckIdentSlow(upb_DefBuilder* ctx, upb_StringView name, + bool full); + +// Verify a full identifier string. This is slightly more complicated than +// verifying a relative identifier string because we must track '.' chars. +UPB_INLINE void _upb_DefBuilder_CheckIdentFull(upb_DefBuilder* ctx, + upb_StringView name) { + bool good = name.size > 0; + bool start = true; + + for (size_t i = 0; i < name.size; i++) { + const char c = name.data[i]; + const char d = c | 0x20; // force lowercase + const bool is_alpha = (('a' <= d) & (d <= 'z')) | (c == '_'); + const bool is_numer = ('0' <= c) & (c <= '9') & !start; + const bool is_dot = (c == '.') & !start; + + good &= is_alpha | is_numer | is_dot; + start = is_dot; + } + + if (!good) _upb_DefBuilder_CheckIdentSlow(ctx, name, true); +} + +#ifdef __cplusplus +} /* extern "C" */ +#endif + + +#endif /* UPB_REFLECTION_DEF_BUILDER_INTERNAL_H_ */ + +#ifndef UPB_REFLECTION_ENUM_DEF_INTERNAL_H_ +#define UPB_REFLECTION_ENUM_DEF_INTERNAL_H_ + + +// Must be last. + +#ifdef __cplusplus +extern "C" { +#endif + +upb_EnumDef* _upb_EnumDef_At(const upb_EnumDef* e, int i); +bool _upb_EnumDef_Insert(upb_EnumDef* e, upb_EnumValueDef* v, upb_Arena* a); +const upb_MiniTableEnum* _upb_EnumDef_MiniTable(const upb_EnumDef* e); + +// Allocate and initialize an array of |n| enum defs. +upb_EnumDef* _upb_EnumDefs_New( + upb_DefBuilder* ctx, int n, + const UPB_DESC(EnumDescriptorProto) * const* protos, + const upb_MessageDef* containing_type); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + + +#endif /* UPB_REFLECTION_ENUM_DEF_INTERNAL_H_ */ #ifndef UPB_REFLECTION_ENUM_VALUE_DEF_INTERNAL_H_ #define UPB_REFLECTION_ENUM_VALUE_DEF_INTERNAL_H_ @@ -12240,116 +12500,35 @@ upb_MessageDef* _upb_MessageDefs_New( #endif -#endif /* UPB_REFLECTION_MESSAGE_DEF_INTERNAL_H_ */ - -#ifndef UPB_REFLECTION_SERVICE_DEF_INTERNAL_H_ -#define UPB_REFLECTION_SERVICE_DEF_INTERNAL_H_ - - -// Must be last. - -#ifdef __cplusplus -extern "C" { -#endif - -upb_ServiceDef* _upb_ServiceDef_At(const upb_ServiceDef* s, int i); - -// Allocate and initialize an array of |n| service defs. -upb_ServiceDef* _upb_ServiceDefs_New( - upb_DefBuilder* ctx, int n, - const UPB_DESC(ServiceDescriptorProto) * const* protos); - -#ifdef __cplusplus -} /* extern "C" */ -#endif - - -#endif /* UPB_REFLECTION_SERVICE_DEF_INTERNAL_H_ */ - -#ifndef UPB_REFLECTION_DESC_STATE_INTERNAL_H_ -#define UPB_REFLECTION_DESC_STATE_INTERNAL_H_ - - -#ifndef UPB_MINI_DESCRIPTOR_INTERNAL_ENCODE_H_ -#define UPB_MINI_DESCRIPTOR_INTERNAL_ENCODE_H_ - - -// Must be last. +#endif /* UPB_REFLECTION_MESSAGE_DEF_INTERNAL_H_ */ -// If the input buffer has at least this many bytes available, the encoder call -// is guaranteed to succeed (as long as field number order is maintained). -#define kUpb_MtDataEncoder_MinSize 16 +#ifndef UPB_REFLECTION_SERVICE_DEF_INTERNAL_H_ +#define UPB_REFLECTION_SERVICE_DEF_INTERNAL_H_ -typedef struct { - char* end; // Limit of the buffer passed as a parameter. - // Aliased to internal-only members in .cc. - char internal[32]; -} upb_MtDataEncoder; + +// Must be last. #ifdef __cplusplus extern "C" { #endif -// Encodes field/oneof information for a given message. The sequence of calls -// should look like: -// -// upb_MtDataEncoder e; -// char buf[256]; -// char* ptr = buf; -// e.end = ptr + sizeof(buf); -// unit64_t msg_mod = ...; // bitwise & of kUpb_MessageModifiers or zero -// ptr = upb_MtDataEncoder_StartMessage(&e, ptr, msg_mod); -// // Fields *must* be in field number order. -// ptr = upb_MtDataEncoder_PutField(&e, ptr, ...); -// ptr = upb_MtDataEncoder_PutField(&e, ptr, ...); -// ptr = upb_MtDataEncoder_PutField(&e, ptr, ...); -// -// // If oneofs are present. Oneofs must be encoded after regular fields. -// ptr = upb_MiniTable_StartOneof(&e, ptr) -// ptr = upb_MiniTable_PutOneofField(&e, ptr, ...); -// ptr = upb_MiniTable_PutOneofField(&e, ptr, ...); -// -// ptr = upb_MiniTable_StartOneof(&e, ptr); -// ptr = upb_MiniTable_PutOneofField(&e, ptr, ...); -// ptr = upb_MiniTable_PutOneofField(&e, ptr, ...); -// -// Oneofs must be encoded after all regular fields. -char* upb_MtDataEncoder_StartMessage(upb_MtDataEncoder* e, char* ptr, - uint64_t msg_mod); -char* upb_MtDataEncoder_PutField(upb_MtDataEncoder* e, char* ptr, - upb_FieldType type, uint32_t field_num, - uint64_t field_mod); -char* upb_MtDataEncoder_StartOneof(upb_MtDataEncoder* e, char* ptr); -char* upb_MtDataEncoder_PutOneofField(upb_MtDataEncoder* e, char* ptr, - uint32_t field_num); - -// Encodes the set of values for a given enum. The values must be given in -// order (after casting to uint32_t), and repeats are not allowed. -char* upb_MtDataEncoder_StartEnum(upb_MtDataEncoder* e, char* ptr); -char* upb_MtDataEncoder_PutEnumValue(upb_MtDataEncoder* e, char* ptr, - uint32_t val); -char* upb_MtDataEncoder_EndEnum(upb_MtDataEncoder* e, char* ptr); - -// Encodes an entire mini descriptor for an extension. -char* upb_MtDataEncoder_EncodeExtension(upb_MtDataEncoder* e, char* ptr, - upb_FieldType type, uint32_t field_num, - uint64_t field_mod); - -// Encodes an entire mini descriptor for a map. -char* upb_MtDataEncoder_EncodeMap(upb_MtDataEncoder* e, char* ptr, - upb_FieldType key_type, - upb_FieldType value_type, uint64_t key_mod, - uint64_t value_mod); +upb_ServiceDef* _upb_ServiceDef_At(const upb_ServiceDef* s, int i); -// Encodes an entire mini descriptor for a message set. -char* upb_MtDataEncoder_EncodeMessageSet(upb_MtDataEncoder* e, char* ptr); +// Allocate and initialize an array of |n| service defs. +upb_ServiceDef* _upb_ServiceDefs_New( + upb_DefBuilder* ctx, int n, + const UPB_DESC(ServiceDescriptorProto) * const* protos); #ifdef __cplusplus } /* extern "C" */ #endif -#endif /* UPB_MINI_DESCRIPTOR_INTERNAL_ENCODE_H_ */ +#endif /* UPB_REFLECTION_SERVICE_DEF_INTERNAL_H_ */ + +#ifndef UPB_REFLECTION_DESC_STATE_INTERNAL_H_ +#define UPB_REFLECTION_DESC_STATE_INTERNAL_H_ + // Must be last. @@ -12454,28 +12633,6 @@ upb_ExtensionRange* _upb_ExtensionRanges_New( #endif /* UPB_REFLECTION_EXTENSION_RANGE_INTERNAL_H_ */ -#ifndef UPB_MINI_DESCRIPTOR_INTERNAL_MODIFIERS_H_ -#define UPB_MINI_DESCRIPTOR_INTERNAL_MODIFIERS_H_ - -// Must be last. - -typedef enum { - kUpb_FieldModifier_IsRepeated = 1 << 0, - kUpb_FieldModifier_IsPacked = 1 << 1, - kUpb_FieldModifier_IsClosedEnum = 1 << 2, - kUpb_FieldModifier_IsProto3Singular = 1 << 3, - kUpb_FieldModifier_IsRequired = 1 << 4, -} kUpb_FieldModifier; - -typedef enum { - kUpb_MessageModifier_ValidateUtf8 = 1 << 0, - kUpb_MessageModifier_DefaultIsPacked = 1 << 1, - kUpb_MessageModifier_IsExtendable = 1 << 2, -} kUpb_MessageModifier; - - -#endif // UPB_MINI_DESCRIPTOR_INTERNAL_MODIFIERS_H_ - #ifndef UPB_REFLECTION_ONEOF_DEF_INTERNAL_H_ #define UPB_REFLECTION_ONEOF_DEF_INTERNAL_H_ @@ -12729,163 +12886,6 @@ UPB_INLINE uint32_t _upb_FastDecoder_LoadTag(const char* ptr) { #endif /* UPB_WIRE_INTERNAL_DECODE_H_ */ -#ifndef UPB_MINI_DESCRIPTOR_INTERNAL_BASE92_H_ -#define UPB_MINI_DESCRIPTOR_INTERNAL_BASE92_H_ - - -// Must be last. - -#ifdef __cplusplus -extern "C" { -#endif - -UPB_INLINE char _upb_ToBase92(int8_t ch) { - extern const char _kUpb_ToBase92[]; - UPB_ASSERT(0 <= ch && ch < 92); - return _kUpb_ToBase92[ch]; -} - -UPB_INLINE char _upb_FromBase92(uint8_t ch) { - extern const int8_t _kUpb_FromBase92[]; - if (' ' > ch || ch > '~') return -1; - return _kUpb_FromBase92[ch - ' ']; -} - -UPB_INLINE const char* _upb_Base92_DecodeVarint(const char* ptr, - const char* end, char first_ch, - uint8_t min, uint8_t max, - uint32_t* out_val) { - uint32_t val = 0; - uint32_t shift = 0; - const int bits_per_char = - upb_Log2Ceiling(_upb_FromBase92(max) - _upb_FromBase92(min)); - char ch = first_ch; - while (1) { - uint32_t bits = _upb_FromBase92(ch) - _upb_FromBase92(min); - val |= bits << shift; - if (ptr == end || *ptr < min || max < *ptr) { - *out_val = val; - UPB_ASSUME(ptr != NULL); - return ptr; - } - ch = *ptr++; - shift += bits_per_char; - if (shift >= 32) return NULL; - } -} - -#ifdef __cplusplus -} /* extern "C" */ -#endif - - -#endif // UPB_MINI_DESCRIPTOR_INTERNAL_BASE92_H_ - -#ifndef UPB_MINI_DESCRIPTOR_INTERNAL_WIRE_CONSTANTS_H_ -#define UPB_MINI_DESCRIPTOR_INTERNAL_WIRE_CONSTANTS_H_ - - -// Must be last. - -typedef enum { - kUpb_EncodedType_Double = 0, - kUpb_EncodedType_Float = 1, - kUpb_EncodedType_Fixed32 = 2, - kUpb_EncodedType_Fixed64 = 3, - kUpb_EncodedType_SFixed32 = 4, - kUpb_EncodedType_SFixed64 = 5, - kUpb_EncodedType_Int32 = 6, - kUpb_EncodedType_UInt32 = 7, - kUpb_EncodedType_SInt32 = 8, - kUpb_EncodedType_Int64 = 9, - kUpb_EncodedType_UInt64 = 10, - kUpb_EncodedType_SInt64 = 11, - kUpb_EncodedType_OpenEnum = 12, - kUpb_EncodedType_Bool = 13, - kUpb_EncodedType_Bytes = 14, - kUpb_EncodedType_String = 15, - kUpb_EncodedType_Group = 16, - kUpb_EncodedType_Message = 17, - kUpb_EncodedType_ClosedEnum = 18, - - kUpb_EncodedType_RepeatedBase = 20, -} upb_EncodedType; - -typedef enum { - kUpb_EncodedFieldModifier_FlipPacked = 1 << 0, - kUpb_EncodedFieldModifier_IsRequired = 1 << 1, - kUpb_EncodedFieldModifier_IsProto3Singular = 1 << 2, -} upb_EncodedFieldModifier; - -enum { - kUpb_EncodedValue_MinField = ' ', - kUpb_EncodedValue_MaxField = 'I', - kUpb_EncodedValue_MinModifier = 'L', - kUpb_EncodedValue_MaxModifier = '[', - kUpb_EncodedValue_End = '^', - kUpb_EncodedValue_MinSkip = '_', - kUpb_EncodedValue_MaxSkip = '~', - kUpb_EncodedValue_OneofSeparator = '~', - kUpb_EncodedValue_FieldSeparator = '|', - kUpb_EncodedValue_MinOneofField = ' ', - kUpb_EncodedValue_MaxOneofField = 'b', - kUpb_EncodedValue_MaxEnumMask = 'A', -}; - -enum { - kUpb_EncodedVersion_EnumV1 = '!', - kUpb_EncodedVersion_ExtensionV1 = '#', - kUpb_EncodedVersion_MapV1 = '%', - kUpb_EncodedVersion_MessageV1 = '$', - kUpb_EncodedVersion_MessageSetV1 = '&', -}; - - -#endif // UPB_MINI_DESCRIPTOR_INTERNAL_WIRE_CONSTANTS_H_ - -#ifndef UPB_MINI_DESCRIPTOR_INTERNAL_DECODER_H_ -#define UPB_MINI_DESCRIPTOR_INTERNAL_DECODER_H_ - - -// Must be last. - -// upb_MdDecoder: used internally for decoding MiniDescriptors for messages, -// extensions, and enums. -typedef struct { - const char* end; - upb_Status* status; - jmp_buf err; -} upb_MdDecoder; - -UPB_PRINTF(2, 3) -UPB_NORETURN UPB_INLINE void upb_MdDecoder_ErrorJmp(upb_MdDecoder* d, - const char* fmt, ...) { - if (d->status) { - va_list argp; - upb_Status_SetErrorMessage(d->status, "Error building mini table: "); - va_start(argp, fmt); - upb_Status_VAppendErrorFormat(d->status, fmt, argp); - va_end(argp); - } - UPB_LONGJMP(d->err, 1); -} - -UPB_INLINE void upb_MdDecoder_CheckOutOfMemory(upb_MdDecoder* d, - const void* ptr) { - if (!ptr) upb_MdDecoder_ErrorJmp(d, "Out of memory"); -} - -UPB_INLINE const char* upb_MdDecoder_DecodeBase92Varint( - upb_MdDecoder* d, const char* ptr, char first_ch, uint8_t min, uint8_t max, - uint32_t* out_val) { - ptr = _upb_Base92_DecodeVarint(ptr, d->end, first_ch, min, max, out_val); - if (!ptr) upb_MdDecoder_ErrorJmp(d, "Overlong varint"); - return ptr; -} - - -#endif // UPB_MINI_DESCRIPTOR_INTERNAL_DECODER_H_ - // This should #undef all macros #defined in def.inc #undef UPB_SIZE diff --git a/protobuf_deps.bzl b/protobuf_deps.bzl index d100b9a19e..1c622f88e9 100644 --- a/protobuf_deps.bzl +++ b/protobuf_deps.bzl @@ -150,7 +150,6 @@ def protobuf_deps(): _github_archive( name = "upb", repo = "https://github.com/protocolbuffers/upb", - commit = "58877b55e0796bcca743e9bd4d2be42092562f30", - sha256 = "b3a3279cffb91e22fd38fb316a39ded87211bf89ff39337c7f151d88ed3b71fd", - patches = ["@com_google_protobuf//build_defs:upb.patch"], + commit = "cd176a0cd1913ea8c643680d206c8ca278815c2c", + sha256 = "661b2d63dff82c6868cd1dea5e7be2ca1a6467746a146c293834f18aaa709077", ) diff --git a/ruby/ext/google/protobuf_c/ruby-upb.c b/ruby/ext/google/protobuf_c/ruby-upb.c index a2dd07e35e..1ea861b97c 100644 --- a/ruby/ext/google/protobuf_c/ruby-upb.c +++ b/ruby/ext/google/protobuf_c/ruby-upb.c @@ -373,8 +373,8 @@ void upb_Status_VAppendErrorFormat(upb_Status* status, const char* fmt, status->msg[_kUpb_Status_MaxMessage - 1] = '\0'; } -#include +#include // Must be last. @@ -609,6 +609,7 @@ upb_Map* _upb_Map_New(upb_Arena* a, size_t key_size, size_t value_size) { } + // Must be last. static void _upb_mapsorter_getkeys(const void* _a, const void* _b, void* a_key, @@ -5871,6164 +5872,5381 @@ size_t upb_Message_ExtensionCount(const upb_Message* msg) { return count; } -#include // Must be last. -/* The upb core does not generally have a concept of default instances. However - * for descriptor options we make an exception since the max size is known and - * modest (<200 bytes). All types can share a default instance since it is - * initialized to zeroes. - * - * We have to allocate an extra pointer for upb's internal metadata. */ -static const char opt_default_buf[_UPB_MAXOPT_SIZE + sizeof(void*)] = {0}; -const char* kUpbDefOptDefault = &opt_default_buf[sizeof(void*)]; - -const char* _upb_DefBuilder_FullToShort(const char* fullname) { - const char* p; - - if (fullname == NULL) { - return NULL; - } else if ((p = strrchr(fullname, '.')) == NULL) { - /* No '.' in the name, return the full string. */ - return fullname; - } else { - /* Return one past the last '.'. */ - return p + 1; - } -} - -void _upb_DefBuilder_FailJmp(upb_DefBuilder* ctx) { UPB_LONGJMP(ctx->err, 1); } - -void _upb_DefBuilder_Errf(upb_DefBuilder* ctx, const char* fmt, ...) { - va_list argp; - va_start(argp, fmt); - upb_Status_VSetErrorFormat(ctx->status, fmt, argp); - va_end(argp); - _upb_DefBuilder_FailJmp(ctx); -} +typedef struct { + upb_MdDecoder base; + upb_Arena* arena; + upb_MiniTableEnum* enum_table; + uint32_t enum_value_count; + uint32_t enum_data_count; + uint32_t enum_data_capacity; +} upb_MdEnumDecoder; -void _upb_DefBuilder_OomErr(upb_DefBuilder* ctx) { - upb_Status_SetErrorMessage(ctx->status, "out of memory"); - _upb_DefBuilder_FailJmp(ctx); +static size_t upb_MiniTableEnum_Size(size_t count) { + return sizeof(upb_MiniTableEnum) + count * sizeof(uint32_t); } -// Verify a relative identifier string. The loop is branchless for speed. -static void _upb_DefBuilder_CheckIdentNotFull(upb_DefBuilder* ctx, - upb_StringView name) { - bool good = name.size > 0; - - for (size_t i = 0; i < name.size; i++) { - const char c = name.data[i]; - const char d = c | 0x20; // force lowercase - const bool is_alpha = (('a' <= d) & (d <= 'z')) | (c == '_'); - const bool is_numer = ('0' <= c) & (c <= '9') & (i != 0); - - good &= is_alpha | is_numer; +static upb_MiniTableEnum* _upb_MiniTable_AddEnumDataMember(upb_MdEnumDecoder* d, + uint32_t val) { + if (d->enum_data_count == d->enum_data_capacity) { + size_t old_sz = upb_MiniTableEnum_Size(d->enum_data_capacity); + d->enum_data_capacity = UPB_MAX(2, d->enum_data_capacity * 2); + size_t new_sz = upb_MiniTableEnum_Size(d->enum_data_capacity); + d->enum_table = upb_Arena_Realloc(d->arena, d->enum_table, old_sz, new_sz); + upb_MdDecoder_CheckOutOfMemory(&d->base, d->enum_table); } - - if (!good) _upb_DefBuilder_CheckIdentSlow(ctx, name, false); + d->enum_table->data[d->enum_data_count++] = val; + return d->enum_table; } -const char* _upb_DefBuilder_MakeFullName(upb_DefBuilder* ctx, - const char* prefix, - upb_StringView name) { - _upb_DefBuilder_CheckIdentNotFull(ctx, name); - if (prefix) { - // ret = prefix + '.' + name; - size_t n = strlen(prefix); - char* ret = _upb_DefBuilder_Alloc(ctx, n + name.size + 2); - strcpy(ret, prefix); - ret[n] = '.'; - memcpy(&ret[n + 1], name.data, name.size); - ret[n + 1 + name.size] = '\0'; - return ret; +static void upb_MiniTableEnum_BuildValue(upb_MdEnumDecoder* d, uint32_t val) { + upb_MiniTableEnum* table = d->enum_table; + d->enum_value_count++; + if (table->value_count || (val > 512 && d->enum_value_count < val / 32)) { + if (table->value_count == 0) { + assert(d->enum_data_count == table->mask_limit / 32); + } + table = _upb_MiniTable_AddEnumDataMember(d, val); + table->value_count++; } else { - char* ret = upb_strdup2(name.data, name.size, ctx->arena); - if (!ret) _upb_DefBuilder_OomErr(ctx); - return ret; + uint32_t new_mask_limit = ((val / 32) + 1) * 32; + while (table->mask_limit < new_mask_limit) { + table = _upb_MiniTable_AddEnumDataMember(d, 0); + table->mask_limit += 32; + } + table->data[val / 32] |= 1ULL << (val % 32); } } -static bool remove_component(char* base, size_t* len) { - if (*len == 0) return false; - - for (size_t i = *len - 1; i > 0; i--) { - if (base[i] == '.') { - *len = i; - return true; +static upb_MiniTableEnum* upb_MtDecoder_DoBuildMiniTableEnum( + upb_MdEnumDecoder* d, const char* data, size_t len) { + // If the string is non-empty then it must begin with a version tag. + if (len) { + if (*data != kUpb_EncodedVersion_EnumV1) { + upb_MdDecoder_ErrorJmp(&d->base, "Invalid enum version: %c", *data); } + data++; + len--; } - *len = 0; - return true; -} + upb_MdDecoder_CheckOutOfMemory(&d->base, d->enum_table); -const void* _upb_DefBuilder_ResolveAny(upb_DefBuilder* ctx, - const char* from_name_dbg, - const char* base, upb_StringView sym, - upb_deftype_t* type) { - if (sym.size == 0) goto notfound; - upb_value v; - if (sym.data[0] == '.') { - /* Symbols starting with '.' are absolute, so we do a single lookup. - * Slice to omit the leading '.' */ - if (!_upb_DefPool_LookupSym(ctx->symtab, sym.data + 1, sym.size - 1, &v)) { - goto notfound; - } - } else { - /* Remove components from base until we find an entry or run out. */ - size_t baselen = base ? strlen(base) : 0; - char* tmp = malloc(sym.size + baselen + 1); - while (1) { - char* p = tmp; - if (baselen) { - memcpy(p, base, baselen); - p[baselen] = '.'; - p += baselen + 1; - } - memcpy(p, sym.data, sym.size); - p += sym.size; - if (_upb_DefPool_LookupSym(ctx->symtab, tmp, p - tmp, &v)) { - break; - } - if (!remove_component(tmp, &baselen)) { - free(tmp); - goto notfound; + // Guarantee at least 64 bits of mask without checking mask size. + d->enum_table->mask_limit = 64; + d->enum_table = _upb_MiniTable_AddEnumDataMember(d, 0); + d->enum_table = _upb_MiniTable_AddEnumDataMember(d, 0); + + d->enum_table->value_count = 0; + + const char* ptr = data; + uint32_t base = 0; + + while (ptr < d->base.end) { + char ch = *ptr++; + if (ch <= kUpb_EncodedValue_MaxEnumMask) { + uint32_t mask = _upb_FromBase92(ch); + for (int i = 0; i < 5; i++, base++, mask >>= 1) { + if (mask & 1) upb_MiniTableEnum_BuildValue(d, base); } + } else if (kUpb_EncodedValue_MinSkip <= ch && + ch <= kUpb_EncodedValue_MaxSkip) { + uint32_t skip; + ptr = upb_MdDecoder_DecodeBase92Varint(&d->base, ptr, ch, + kUpb_EncodedValue_MinSkip, + kUpb_EncodedValue_MaxSkip, &skip); + base += skip; + } else { + upb_MdDecoder_ErrorJmp(&d->base, "Unexpected character: %c", ch); } - free(tmp); } - *type = _upb_DefType_Type(v); - return _upb_DefType_Unpack(v, *type); - -notfound: - _upb_DefBuilder_Errf(ctx, "couldn't resolve name '" UPB_STRINGVIEW_FORMAT "'", - UPB_STRINGVIEW_ARGS(sym)); + return d->enum_table; } -const void* _upb_DefBuilder_Resolve(upb_DefBuilder* ctx, - const char* from_name_dbg, const char* base, - upb_StringView sym, upb_deftype_t type) { - upb_deftype_t found_type; - const void* ret = - _upb_DefBuilder_ResolveAny(ctx, from_name_dbg, base, sym, &found_type); - if (ret && found_type != type) { - _upb_DefBuilder_Errf(ctx, - "type mismatch when resolving %s: couldn't find " - "name " UPB_STRINGVIEW_FORMAT " with type=%d", - from_name_dbg, UPB_STRINGVIEW_ARGS(sym), (int)type); - } - return ret; +static upb_MiniTableEnum* upb_MtDecoder_BuildMiniTableEnum( + upb_MdEnumDecoder* const decoder, const char* const data, size_t const len) { + if (UPB_SETJMP(decoder->base.err) != 0) return NULL; + return upb_MtDecoder_DoBuildMiniTableEnum(decoder, data, len); } -// Per ASCII this will lower-case a letter. If the result is a letter, the -// input was definitely a letter. If the output is not a letter, this may -// have transformed the character unpredictably. -static char upb_ascii_lower(char ch) { return ch | 0x20; } +upb_MiniTableEnum* upb_MiniDescriptor_BuildEnum(const char* data, size_t len, + upb_Arena* arena, + upb_Status* status) { + upb_MdEnumDecoder decoder = { + .base = + { + .end = UPB_PTRADD(data, len), + .status = status, + }, + .arena = arena, + .enum_table = upb_Arena_Malloc(arena, upb_MiniTableEnum_Size(2)), + .enum_value_count = 0, + .enum_data_count = 0, + .enum_data_capacity = 1, + }; -// isalpha() etc. from are locale-dependent, which we don't want. -static bool upb_isbetween(uint8_t c, uint8_t low, uint8_t high) { - return low <= c && c <= high; + return upb_MtDecoder_BuildMiniTableEnum(&decoder, data, len); } -static bool upb_isletter(char c) { - char lower = upb_ascii_lower(c); - return upb_isbetween(lower, 'a', 'z') || c == '_'; -} -static bool upb_isalphanum(char c) { - return upb_isletter(c) || upb_isbetween(c, '0', '9'); -} +#include +#include -static bool TryGetChar(const char** src, const char* end, char* ch) { - if (*src == end) return false; - *ch = **src; - *src += 1; - return true; -} -static int TryGetHexDigit(const char** src, const char* end) { - char ch; - if (!TryGetChar(src, end, &ch)) return -1; - if ('0' <= ch && ch <= '9') { - return ch - '0'; - } - ch = upb_ascii_lower(ch); - if ('a' <= ch && ch <= 'f') { - return ch - 'a' + 0xa; - } - *src -= 1; // Char wasn't actually a hex digit. - return -1; -} +// Must be last. -static char upb_DefBuilder_ParseHexEscape(upb_DefBuilder* ctx, - const upb_FieldDef* f, - const char** src, const char* end) { - int hex_digit = TryGetHexDigit(src, end); - if (hex_digit < 0) { - _upb_DefBuilder_Errf( - ctx, "\\x must be followed by at least one hex digit (field='%s')", - upb_FieldDef_FullName(f)); - return 0; - } - unsigned int ret = hex_digit; - while ((hex_digit = TryGetHexDigit(src, end)) >= 0) { - ret = (ret << 4) | hex_digit; - } - if (ret > 0xff) { - _upb_DefBuilder_Errf(ctx, "Value of hex escape in field %s exceeds 8 bits", - upb_FieldDef_FullName(f)); - return 0; - } - return ret; -} - -static char TryGetOctalDigit(const char** src, const char* end) { - char ch; - if (!TryGetChar(src, end, &ch)) return -1; - if ('0' <= ch && ch <= '7') { - return ch - '0'; - } - *src -= 1; // Char wasn't actually an octal digit. - return -1; -} - -static char upb_DefBuilder_ParseOctalEscape(upb_DefBuilder* ctx, - const upb_FieldDef* f, - const char** src, const char* end) { - char ch = 0; - for (int i = 0; i < 3; i++) { - char digit; - if ((digit = TryGetOctalDigit(src, end)) >= 0) { - ch = (ch << 3) | digit; - } - } - return ch; -} - -char _upb_DefBuilder_ParseEscape(upb_DefBuilder* ctx, const upb_FieldDef* f, - const char** src, const char* end) { - char ch; - if (!TryGetChar(src, end, &ch)) { - _upb_DefBuilder_Errf(ctx, "unterminated escape sequence in field %s", - upb_FieldDef_FullName(f)); - return 0; - } - switch (ch) { - case 'a': - return '\a'; - case 'b': - return '\b'; - case 'f': - return '\f'; - case 'n': - return '\n'; - case 'r': - return '\r'; - case 't': - return '\t'; - case 'v': - return '\v'; - case '\\': - return '\\'; - case '\'': - return '\''; - case '\"': - return '\"'; - case '?': - return '\?'; - case 'x': - case 'X': - return upb_DefBuilder_ParseHexEscape(ctx, f, src, end); - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - *src -= 1; - return upb_DefBuilder_ParseOctalEscape(ctx, f, src, end); - } - _upb_DefBuilder_Errf(ctx, "Unknown escape sequence: \\%c", ch); -} +// Note: we sort by this number when calculating layout order. +typedef enum { + kUpb_LayoutItemType_OneofCase, // Oneof case. + kUpb_LayoutItemType_OneofField, // Oneof field data. + kUpb_LayoutItemType_Field, // Non-oneof field data. -void _upb_DefBuilder_CheckIdentSlow(upb_DefBuilder* ctx, upb_StringView name, - bool full) { - const char* str = name.data; - const size_t len = name.size; - bool start = true; - for (size_t i = 0; i < len; i++) { - const char c = str[i]; - if (c == '.') { - if (start || !full) { - _upb_DefBuilder_Errf( - ctx, "invalid name: unexpected '.' (" UPB_STRINGVIEW_FORMAT ")", - UPB_STRINGVIEW_ARGS(name)); - } - start = true; - } else if (start) { - if (!upb_isletter(c)) { - _upb_DefBuilder_Errf(ctx, - "invalid name: path components must start with a " - "letter (" UPB_STRINGVIEW_FORMAT ")", - UPB_STRINGVIEW_ARGS(name)); - } - start = false; - } else if (!upb_isalphanum(c)) { - _upb_DefBuilder_Errf( - ctx, - "invalid name: non-alphanumeric character (" UPB_STRINGVIEW_FORMAT - ")", - UPB_STRINGVIEW_ARGS(name)); - } - } - if (start) { - _upb_DefBuilder_Errf(ctx, - "invalid name: empty part (" UPB_STRINGVIEW_FORMAT ")", - UPB_STRINGVIEW_ARGS(name)); - } + kUpb_LayoutItemType_Max = kUpb_LayoutItemType_Field, +} upb_LayoutItemType; - // We should never reach this point. - UPB_ASSERT(false); -} +#define kUpb_LayoutItem_IndexSentinel ((uint16_t)-1) +typedef struct { + // Index of the corresponding field. When this is a oneof field, the field's + // offset will be the index of the next field in a linked list. + uint16_t field_index; + uint16_t offset; + upb_FieldRep rep; + upb_LayoutItemType type; +} upb_LayoutItem; -// Must be last. +typedef struct { + upb_LayoutItem* data; + size_t size; + size_t capacity; +} upb_LayoutItemVector; -struct upb_DefPool { - upb_Arena* arena; - upb_strtable syms; // full_name -> packed def ptr - upb_strtable files; // file_name -> (upb_FileDef*) - upb_inttable exts; // (upb_MiniTableExtension*) -> (upb_FieldDef*) - upb_ExtensionRegistry* extreg; +typedef struct { + upb_MdDecoder base; + upb_MiniTable* table; + upb_MiniTableField* fields; upb_MiniTablePlatform platform; - void* scratch_data; - size_t scratch_size; - size_t bytes_loaded; + upb_LayoutItemVector vec; + upb_Arena* arena; +} upb_MtDecoder; + +// In each field's offset, we temporarily store a presence classifier: +enum PresenceClass { + kNoPresence = 0, + kHasbitPresence = 1, + kRequiredPresence = 2, + kOneofBase = 3, + // Negative values refer to a specific oneof with that number. Positive + // values >= kOneofBase indicate that this field is in a oneof, and specify + // the next field in this oneof's linked list. }; -void upb_DefPool_Free(upb_DefPool* s) { - upb_Arena_Free(s->arena); - upb_gfree(s->scratch_data); - upb_gfree(s); +static bool upb_MtDecoder_FieldIsPackable(upb_MiniTableField* field) { + return (field->mode & kUpb_FieldMode_Array) && + upb_FieldType_IsPackable(field->UPB_PRIVATE(descriptortype)); } -upb_DefPool* upb_DefPool_New(void) { - upb_DefPool* s = upb_gmalloc(sizeof(*s)); - if (!s) return NULL; - - s->arena = upb_Arena_New(); - s->bytes_loaded = 0; - - s->scratch_size = 240; - s->scratch_data = upb_gmalloc(s->scratch_size); - if (!s->scratch_data) goto err; - - if (!upb_strtable_init(&s->syms, 32, s->arena)) goto err; - if (!upb_strtable_init(&s->files, 4, s->arena)) goto err; - if (!upb_inttable_init(&s->exts, s->arena)) goto err; - - s->extreg = upb_ExtensionRegistry_New(s->arena); - if (!s->extreg) goto err; +typedef struct { + uint16_t submsg_count; + uint16_t subenum_count; +} upb_SubCounts; - s->platform = kUpb_MiniTablePlatform_Native; +static void upb_MiniTable_SetTypeAndSub(upb_MiniTableField* field, + upb_FieldType type, + upb_SubCounts* sub_counts, + uint64_t msg_modifiers, + bool is_proto3_enum) { + if (is_proto3_enum) { + UPB_ASSERT(type == kUpb_FieldType_Enum); + type = kUpb_FieldType_Int32; + field->mode |= kUpb_LabelFlags_IsAlternate; + } else if (type == kUpb_FieldType_String && + !(msg_modifiers & kUpb_MessageModifier_ValidateUtf8)) { + type = kUpb_FieldType_Bytes; + field->mode |= kUpb_LabelFlags_IsAlternate; + } - return s; + field->UPB_PRIVATE(descriptortype) = type; -err: - upb_DefPool_Free(s); - return NULL; -} + if (upb_MtDecoder_FieldIsPackable(field) && + (msg_modifiers & kUpb_MessageModifier_DefaultIsPacked)) { + field->mode |= kUpb_LabelFlags_IsPacked; + } -bool _upb_DefPool_InsertExt(upb_DefPool* s, const upb_MiniTableExtension* ext, - const upb_FieldDef* f) { - return upb_inttable_insert(&s->exts, (uintptr_t)ext, upb_value_constptr(f), - s->arena); + if (type == kUpb_FieldType_Message || type == kUpb_FieldType_Group) { + field->UPB_PRIVATE(submsg_index) = sub_counts->submsg_count++; + } else if (type == kUpb_FieldType_Enum) { + // We will need to update this later once we know the total number of + // submsg fields. + field->UPB_PRIVATE(submsg_index) = sub_counts->subenum_count++; + } else { + field->UPB_PRIVATE(submsg_index) = kUpb_NoSub; + } } -bool _upb_DefPool_InsertSym(upb_DefPool* s, upb_StringView sym, upb_value v, - upb_Status* status) { - // TODO: table should support an operation "tryinsert" to avoid the double - // lookup. - if (upb_strtable_lookup2(&s->syms, sym.data, sym.size, NULL)) { - upb_Status_SetErrorFormat(status, "duplicate symbol '%s'", sym.data); - return false; - } - if (!upb_strtable_insert(&s->syms, sym.data, sym.size, v, s->arena)) { - upb_Status_SetErrorMessage(status, "out of memory"); - return false; - } - return true; -} +static const char kUpb_EncodedToType[] = { + [kUpb_EncodedType_Double] = kUpb_FieldType_Double, + [kUpb_EncodedType_Float] = kUpb_FieldType_Float, + [kUpb_EncodedType_Int64] = kUpb_FieldType_Int64, + [kUpb_EncodedType_UInt64] = kUpb_FieldType_UInt64, + [kUpb_EncodedType_Int32] = kUpb_FieldType_Int32, + [kUpb_EncodedType_Fixed64] = kUpb_FieldType_Fixed64, + [kUpb_EncodedType_Fixed32] = kUpb_FieldType_Fixed32, + [kUpb_EncodedType_Bool] = kUpb_FieldType_Bool, + [kUpb_EncodedType_String] = kUpb_FieldType_String, + [kUpb_EncodedType_Group] = kUpb_FieldType_Group, + [kUpb_EncodedType_Message] = kUpb_FieldType_Message, + [kUpb_EncodedType_Bytes] = kUpb_FieldType_Bytes, + [kUpb_EncodedType_UInt32] = kUpb_FieldType_UInt32, + [kUpb_EncodedType_OpenEnum] = kUpb_FieldType_Enum, + [kUpb_EncodedType_SFixed32] = kUpb_FieldType_SFixed32, + [kUpb_EncodedType_SFixed64] = kUpb_FieldType_SFixed64, + [kUpb_EncodedType_SInt32] = kUpb_FieldType_SInt32, + [kUpb_EncodedType_SInt64] = kUpb_FieldType_SInt64, + [kUpb_EncodedType_ClosedEnum] = kUpb_FieldType_Enum, +}; -static const void* _upb_DefPool_Unpack(const upb_DefPool* s, const char* sym, - size_t size, upb_deftype_t type) { - upb_value v; - return upb_strtable_lookup2(&s->syms, sym, size, &v) - ? _upb_DefType_Unpack(v, type) - : NULL; -} +static void upb_MiniTable_SetField(upb_MtDecoder* d, uint8_t ch, + upb_MiniTableField* field, + uint64_t msg_modifiers, + upb_SubCounts* sub_counts) { + static const char kUpb_EncodedToFieldRep[] = { + [kUpb_EncodedType_Double] = kUpb_FieldRep_8Byte, + [kUpb_EncodedType_Float] = kUpb_FieldRep_4Byte, + [kUpb_EncodedType_Int64] = kUpb_FieldRep_8Byte, + [kUpb_EncodedType_UInt64] = kUpb_FieldRep_8Byte, + [kUpb_EncodedType_Int32] = kUpb_FieldRep_4Byte, + [kUpb_EncodedType_Fixed64] = kUpb_FieldRep_8Byte, + [kUpb_EncodedType_Fixed32] = kUpb_FieldRep_4Byte, + [kUpb_EncodedType_Bool] = kUpb_FieldRep_1Byte, + [kUpb_EncodedType_String] = kUpb_FieldRep_StringView, + [kUpb_EncodedType_Bytes] = kUpb_FieldRep_StringView, + [kUpb_EncodedType_UInt32] = kUpb_FieldRep_4Byte, + [kUpb_EncodedType_OpenEnum] = kUpb_FieldRep_4Byte, + [kUpb_EncodedType_SFixed32] = kUpb_FieldRep_4Byte, + [kUpb_EncodedType_SFixed64] = kUpb_FieldRep_8Byte, + [kUpb_EncodedType_SInt32] = kUpb_FieldRep_4Byte, + [kUpb_EncodedType_SInt64] = kUpb_FieldRep_8Byte, + [kUpb_EncodedType_ClosedEnum] = kUpb_FieldRep_4Byte, + }; -bool _upb_DefPool_LookupSym(const upb_DefPool* s, const char* sym, size_t size, - upb_value* v) { - return upb_strtable_lookup2(&s->syms, sym, size, v); -} + char pointer_rep = d->platform == kUpb_MiniTablePlatform_32Bit + ? kUpb_FieldRep_4Byte + : kUpb_FieldRep_8Byte; -upb_ExtensionRegistry* _upb_DefPool_ExtReg(const upb_DefPool* s) { - return s->extreg; + int8_t type = _upb_FromBase92(ch); + if (ch >= _upb_ToBase92(kUpb_EncodedType_RepeatedBase)) { + type -= kUpb_EncodedType_RepeatedBase; + field->mode = kUpb_FieldMode_Array; + field->mode |= pointer_rep << kUpb_FieldRep_Shift; + field->offset = kNoPresence; + } else { + field->mode = kUpb_FieldMode_Scalar; + field->offset = kHasbitPresence; + if (type == kUpb_EncodedType_Group || type == kUpb_EncodedType_Message) { + field->mode |= pointer_rep << kUpb_FieldRep_Shift; + } else if ((unsigned long)type >= sizeof(kUpb_EncodedToFieldRep)) { + upb_MdDecoder_ErrorJmp(&d->base, "Invalid field type: %d", (int)type); + } else { + field->mode |= kUpb_EncodedToFieldRep[type] << kUpb_FieldRep_Shift; + } + } + if ((unsigned long)type >= sizeof(kUpb_EncodedToType)) { + upb_MdDecoder_ErrorJmp(&d->base, "Invalid field type: %d", (int)type); + } + upb_MiniTable_SetTypeAndSub(field, kUpb_EncodedToType[type], sub_counts, + msg_modifiers, type == kUpb_EncodedType_OpenEnum); } -void** _upb_DefPool_ScratchData(const upb_DefPool* s) { - return (void**)&s->scratch_data; -} +static void upb_MtDecoder_ModifyField(upb_MtDecoder* d, + uint32_t message_modifiers, + uint32_t field_modifiers, + upb_MiniTableField* field) { + if (field_modifiers & kUpb_EncodedFieldModifier_FlipPacked) { + if (!upb_MtDecoder_FieldIsPackable(field)) { + upb_MdDecoder_ErrorJmp(&d->base, + "Cannot flip packed on unpackable field %" PRIu32, + field->number); + } + field->mode ^= kUpb_LabelFlags_IsPacked; + } -size_t* _upb_DefPool_ScratchSize(const upb_DefPool* s) { - return (size_t*)&s->scratch_size; -} + bool singular = field_modifiers & kUpb_EncodedFieldModifier_IsProto3Singular; + bool required = field_modifiers & kUpb_EncodedFieldModifier_IsRequired; -void _upb_DefPool_SetPlatform(upb_DefPool* s, upb_MiniTablePlatform platform) { - assert(upb_strtable_count(&s->files) == 0); - s->platform = platform; -} + // Validate. + if ((singular || required) && field->offset != kHasbitPresence) { + upb_MdDecoder_ErrorJmp(&d->base, + "Invalid modifier(s) for repeated field %" PRIu32, + field->number); + } + if (singular && required) { + upb_MdDecoder_ErrorJmp( + &d->base, "Field %" PRIu32 " cannot be both singular and required", + field->number); + } -const upb_MessageDef* upb_DefPool_FindMessageByName(const upb_DefPool* s, - const char* sym) { - return _upb_DefPool_Unpack(s, sym, strlen(sym), UPB_DEFTYPE_MSG); + if (singular) field->offset = kNoPresence; + if (required) { + field->offset = kRequiredPresence; + } } -const upb_MessageDef* upb_DefPool_FindMessageByNameWithSize( - const upb_DefPool* s, const char* sym, size_t len) { - return _upb_DefPool_Unpack(s, sym, len, UPB_DEFTYPE_MSG); +static void upb_MtDecoder_PushItem(upb_MtDecoder* d, upb_LayoutItem item) { + if (d->vec.size == d->vec.capacity) { + size_t new_cap = UPB_MAX(8, d->vec.size * 2); + d->vec.data = realloc(d->vec.data, new_cap * sizeof(*d->vec.data)); + upb_MdDecoder_CheckOutOfMemory(&d->base, d->vec.data); + d->vec.capacity = new_cap; + } + d->vec.data[d->vec.size++] = item; } -const upb_EnumDef* upb_DefPool_FindEnumByName(const upb_DefPool* s, - const char* sym) { - return _upb_DefPool_Unpack(s, sym, strlen(sym), UPB_DEFTYPE_ENUM); -} +static void upb_MtDecoder_PushOneof(upb_MtDecoder* d, upb_LayoutItem item) { + if (item.field_index == kUpb_LayoutItem_IndexSentinel) { + upb_MdDecoder_ErrorJmp(&d->base, "Empty oneof"); + } + item.field_index -= kOneofBase; -const upb_EnumValueDef* upb_DefPool_FindEnumByNameval(const upb_DefPool* s, - const char* sym) { - return _upb_DefPool_Unpack(s, sym, strlen(sym), UPB_DEFTYPE_ENUMVAL); + // Push oneof data. + item.type = kUpb_LayoutItemType_OneofField; + upb_MtDecoder_PushItem(d, item); + + // Push oneof case. + item.rep = kUpb_FieldRep_4Byte; // Field Number. + item.type = kUpb_LayoutItemType_OneofCase; + upb_MtDecoder_PushItem(d, item); } -const upb_FileDef* upb_DefPool_FindFileByName(const upb_DefPool* s, - const char* name) { - upb_value v; - return upb_strtable_lookup(&s->files, name, &v) ? upb_value_getconstptr(v) - : NULL; +size_t upb_MtDecoder_SizeOfRep(upb_FieldRep rep, + upb_MiniTablePlatform platform) { + static const uint8_t kRepToSize32[] = { + [kUpb_FieldRep_1Byte] = 1, + [kUpb_FieldRep_4Byte] = 4, + [kUpb_FieldRep_StringView] = 8, + [kUpb_FieldRep_8Byte] = 8, + }; + static const uint8_t kRepToSize64[] = { + [kUpb_FieldRep_1Byte] = 1, + [kUpb_FieldRep_4Byte] = 4, + [kUpb_FieldRep_StringView] = 16, + [kUpb_FieldRep_8Byte] = 8, + }; + UPB_ASSERT(sizeof(upb_StringView) == + UPB_SIZE(kRepToSize32, kRepToSize64)[kUpb_FieldRep_StringView]); + return platform == kUpb_MiniTablePlatform_32Bit ? kRepToSize32[rep] + : kRepToSize64[rep]; } -const upb_FileDef* upb_DefPool_FindFileByNameWithSize(const upb_DefPool* s, - const char* name, - size_t len) { - upb_value v; - return upb_strtable_lookup2(&s->files, name, len, &v) - ? upb_value_getconstptr(v) - : NULL; +size_t upb_MtDecoder_AlignOfRep(upb_FieldRep rep, + upb_MiniTablePlatform platform) { + static const uint8_t kRepToAlign32[] = { + [kUpb_FieldRep_1Byte] = 1, + [kUpb_FieldRep_4Byte] = 4, + [kUpb_FieldRep_StringView] = 4, + [kUpb_FieldRep_8Byte] = 8, + }; + static const uint8_t kRepToAlign64[] = { + [kUpb_FieldRep_1Byte] = 1, + [kUpb_FieldRep_4Byte] = 4, + [kUpb_FieldRep_StringView] = 8, + [kUpb_FieldRep_8Byte] = 8, + }; + UPB_ASSERT(UPB_ALIGN_OF(upb_StringView) == + UPB_SIZE(kRepToAlign32, kRepToAlign64)[kUpb_FieldRep_StringView]); + return platform == kUpb_MiniTablePlatform_32Bit ? kRepToAlign32[rep] + : kRepToAlign64[rep]; } -const upb_FieldDef* upb_DefPool_FindExtensionByNameWithSize( - const upb_DefPool* s, const char* name, size_t size) { - upb_value v; - if (!upb_strtable_lookup2(&s->syms, name, size, &v)) return NULL; +static const char* upb_MtDecoder_DecodeOneofField(upb_MtDecoder* d, + const char* ptr, + char first_ch, + upb_LayoutItem* item) { + uint32_t field_num; + ptr = upb_MdDecoder_DecodeBase92Varint( + &d->base, ptr, first_ch, kUpb_EncodedValue_MinOneofField, + kUpb_EncodedValue_MaxOneofField, &field_num); + upb_MiniTableField* f = + (void*)upb_MiniTable_FindFieldByNumber(d->table, field_num); - switch (_upb_DefType_Type(v)) { - case UPB_DEFTYPE_FIELD: - return _upb_DefType_Unpack(v, UPB_DEFTYPE_FIELD); - case UPB_DEFTYPE_MSG: { - const upb_MessageDef* m = _upb_DefType_Unpack(v, UPB_DEFTYPE_MSG); - return _upb_MessageDef_InMessageSet(m) - ? upb_MessageDef_NestedExtension(m, 0) - : NULL; - } - default: - break; + if (!f) { + upb_MdDecoder_ErrorJmp(&d->base, + "Couldn't add field number %" PRIu32 + " to oneof, no such field number.", + field_num); + } + if (f->offset != kHasbitPresence) { + upb_MdDecoder_ErrorJmp( + &d->base, + "Cannot add repeated, required, or singular field %" PRIu32 + " to oneof.", + field_num); } - return NULL; + // Oneof storage must be large enough to accommodate the largest member. + int rep = f->mode >> kUpb_FieldRep_Shift; + if (upb_MtDecoder_SizeOfRep(rep, d->platform) > + upb_MtDecoder_SizeOfRep(item->rep, d->platform)) { + item->rep = rep; + } + // Prepend this field to the linked list. + f->offset = item->field_index; + item->field_index = (f - d->fields) + kOneofBase; + return ptr; } -const upb_FieldDef* upb_DefPool_FindExtensionByName(const upb_DefPool* s, - const char* sym) { - return upb_DefPool_FindExtensionByNameWithSize(s, sym, strlen(sym)); -} +static const char* upb_MtDecoder_DecodeOneofs(upb_MtDecoder* d, + const char* ptr) { + upb_LayoutItem item = {.rep = 0, + .field_index = kUpb_LayoutItem_IndexSentinel}; + while (ptr < d->base.end) { + char ch = *ptr++; + if (ch == kUpb_EncodedValue_FieldSeparator) { + // Field separator, no action needed. + } else if (ch == kUpb_EncodedValue_OneofSeparator) { + // End of oneof. + upb_MtDecoder_PushOneof(d, item); + item.field_index = kUpb_LayoutItem_IndexSentinel; // Move to next oneof. + } else { + ptr = upb_MtDecoder_DecodeOneofField(d, ptr, ch, &item); + } + } -const upb_ServiceDef* upb_DefPool_FindServiceByName(const upb_DefPool* s, - const char* name) { - return _upb_DefPool_Unpack(s, name, strlen(name), UPB_DEFTYPE_SERVICE); + // Push final oneof. + upb_MtDecoder_PushOneof(d, item); + return ptr; } -const upb_ServiceDef* upb_DefPool_FindServiceByNameWithSize( - const upb_DefPool* s, const char* name, size_t size) { - return _upb_DefPool_Unpack(s, name, size, UPB_DEFTYPE_SERVICE); +static const char* upb_MtDecoder_ParseModifier(upb_MtDecoder* d, + const char* ptr, char first_ch, + upb_MiniTableField* last_field, + uint64_t* msg_modifiers) { + uint32_t mod; + ptr = upb_MdDecoder_DecodeBase92Varint(&d->base, ptr, first_ch, + kUpb_EncodedValue_MinModifier, + kUpb_EncodedValue_MaxModifier, &mod); + if (last_field) { + upb_MtDecoder_ModifyField(d, *msg_modifiers, mod, last_field); + } else { + if (!d->table) { + upb_MdDecoder_ErrorJmp(&d->base, + "Extensions cannot have message modifiers"); + } + *msg_modifiers = mod; + } + + return ptr; } -const upb_FileDef* upb_DefPool_FindFileContainingSymbol(const upb_DefPool* s, - const char* name) { - upb_value v; - // TODO(haberman): non-extension fields and oneofs. - if (upb_strtable_lookup(&s->syms, name, &v)) { - switch (_upb_DefType_Type(v)) { - case UPB_DEFTYPE_EXT: { - const upb_FieldDef* f = _upb_DefType_Unpack(v, UPB_DEFTYPE_EXT); - return upb_FieldDef_File(f); +static void upb_MtDecoder_AllocateSubs(upb_MtDecoder* d, + upb_SubCounts sub_counts) { + uint32_t total_count = sub_counts.submsg_count + sub_counts.subenum_count; + size_t subs_bytes = sizeof(*d->table->subs) * total_count; + upb_MiniTableSub* subs = upb_Arena_Malloc(d->arena, subs_bytes); + upb_MdDecoder_CheckOutOfMemory(&d->base, subs); + uint32_t i = 0; + for (; i < sub_counts.submsg_count; i++) { + subs[i].submsg = &_kUpb_MiniTable_Empty; + } + if (sub_counts.subenum_count) { + upb_MiniTableField* f = d->fields; + upb_MiniTableField* end_f = f + d->table->field_count; + for (; f < end_f; f++) { + if (f->UPB_PRIVATE(descriptortype) == kUpb_FieldType_Enum) { + f->UPB_PRIVATE(submsg_index) += sub_counts.submsg_count; } - case UPB_DEFTYPE_MSG: { - const upb_MessageDef* m = _upb_DefType_Unpack(v, UPB_DEFTYPE_MSG); - return upb_MessageDef_File(m); + } + for (; i < sub_counts.submsg_count + sub_counts.subenum_count; i++) { + subs[i].subenum = NULL; + } + } + d->table->subs = subs; +} + +static const char* upb_MtDecoder_Parse(upb_MtDecoder* d, const char* ptr, + size_t len, void* fields, + size_t field_size, uint16_t* field_count, + upb_SubCounts* sub_counts) { + uint64_t msg_modifiers = 0; + uint32_t last_field_number = 0; + upb_MiniTableField* last_field = NULL; + bool need_dense_below = d->table != NULL; + + d->base.end = UPB_PTRADD(ptr, len); + + while (ptr < d->base.end) { + char ch = *ptr++; + if (ch <= kUpb_EncodedValue_MaxField) { + if (!d->table && last_field) { + // For extensions, consume only a single field and then return. + return --ptr; } - case UPB_DEFTYPE_ENUM: { - const upb_EnumDef* e = _upb_DefType_Unpack(v, UPB_DEFTYPE_ENUM); - return upb_EnumDef_File(e); + upb_MiniTableField* field = fields; + *field_count += 1; + fields = (char*)fields + field_size; + field->number = ++last_field_number; + last_field = field; + upb_MiniTable_SetField(d, ch, field, msg_modifiers, sub_counts); + } else if (kUpb_EncodedValue_MinModifier <= ch && + ch <= kUpb_EncodedValue_MaxModifier) { + ptr = upb_MtDecoder_ParseModifier(d, ptr, ch, last_field, &msg_modifiers); + if (msg_modifiers & kUpb_MessageModifier_IsExtendable) { + d->table->ext |= kUpb_ExtMode_Extendable; } - case UPB_DEFTYPE_ENUMVAL: { - const upb_EnumValueDef* ev = - _upb_DefType_Unpack(v, UPB_DEFTYPE_ENUMVAL); - return upb_EnumDef_File(upb_EnumValueDef_Enum(ev)); + } else if (ch == kUpb_EncodedValue_End) { + if (!d->table) { + upb_MdDecoder_ErrorJmp(&d->base, "Extensions cannot have oneofs."); } - case UPB_DEFTYPE_SERVICE: { - const upb_ServiceDef* service = - _upb_DefType_Unpack(v, UPB_DEFTYPE_SERVICE); - return upb_ServiceDef_File(service); + ptr = upb_MtDecoder_DecodeOneofs(d, ptr); + } else if (kUpb_EncodedValue_MinSkip <= ch && + ch <= kUpb_EncodedValue_MaxSkip) { + if (need_dense_below) { + d->table->dense_below = d->table->field_count; + need_dense_below = false; } - default: - UPB_UNREACHABLE(); + uint32_t skip; + ptr = upb_MdDecoder_DecodeBase92Varint(&d->base, ptr, ch, + kUpb_EncodedValue_MinSkip, + kUpb_EncodedValue_MaxSkip, &skip); + last_field_number += skip; + last_field_number--; // Next field seen will increment. + } else { + upb_MdDecoder_ErrorJmp(&d->base, "Invalid char: %c", ch); } } - const char* last_dot = strrchr(name, '.'); - if (last_dot) { - const upb_MessageDef* parent = - upb_DefPool_FindMessageByNameWithSize(s, name, last_dot - name); - if (parent) { - const char* shortname = last_dot + 1; - if (upb_MessageDef_FindByNameWithSize(parent, shortname, - strlen(shortname), NULL, NULL)) { - return upb_MessageDef_File(parent); - } - } + if (need_dense_below) { + d->table->dense_below = d->table->field_count; } - return NULL; + return ptr; } -static void remove_filedef(upb_DefPool* s, upb_FileDef* file) { - intptr_t iter = UPB_INTTABLE_BEGIN; - upb_StringView key; - upb_value val; - while (upb_strtable_next2(&s->syms, &key, &val, &iter)) { - const upb_FileDef* f; - switch (_upb_DefType_Type(val)) { - case UPB_DEFTYPE_EXT: - f = upb_FieldDef_File(_upb_DefType_Unpack(val, UPB_DEFTYPE_EXT)); - break; - case UPB_DEFTYPE_MSG: - f = upb_MessageDef_File(_upb_DefType_Unpack(val, UPB_DEFTYPE_MSG)); - break; - case UPB_DEFTYPE_ENUM: - f = upb_EnumDef_File(_upb_DefType_Unpack(val, UPB_DEFTYPE_ENUM)); - break; - case UPB_DEFTYPE_ENUMVAL: - f = upb_EnumDef_File(upb_EnumValueDef_Enum( - _upb_DefType_Unpack(val, UPB_DEFTYPE_ENUMVAL))); - break; - case UPB_DEFTYPE_SERVICE: - f = upb_ServiceDef_File(_upb_DefType_Unpack(val, UPB_DEFTYPE_SERVICE)); - break; - default: - UPB_UNREACHABLE(); - } +static void upb_MtDecoder_ParseMessage(upb_MtDecoder* d, const char* data, + size_t len) { + // Buffer length is an upper bound on the number of fields. We will return + // what we don't use. + d->fields = upb_Arena_Malloc(d->arena, sizeof(*d->fields) * len); + upb_MdDecoder_CheckOutOfMemory(&d->base, d->fields); - if (f == file) upb_strtable_removeiter(&s->syms, &iter); - } + upb_SubCounts sub_counts = {0, 0}; + d->table->field_count = 0; + d->table->fields = d->fields; + upb_MtDecoder_Parse(d, data, len, d->fields, sizeof(*d->fields), + &d->table->field_count, &sub_counts); + + upb_Arena_ShrinkLast(d->arena, d->fields, sizeof(*d->fields) * len, + sizeof(*d->fields) * d->table->field_count); + d->table->fields = d->fields; + upb_MtDecoder_AllocateSubs(d, sub_counts); } -static const upb_FileDef* upb_DefBuilder_AddFileToPool( - upb_DefBuilder* const builder, upb_DefPool* const s, - const UPB_DESC(FileDescriptorProto) * const file_proto, - const upb_StringView name, upb_Status* const status) { - if (UPB_SETJMP(builder->err) != 0) { - UPB_ASSERT(!upb_Status_IsOk(status)); - if (builder->file) { - remove_filedef(s, builder->file); - builder->file = NULL; - } - } else if (!builder->arena || !builder->tmp_arena) { - _upb_DefBuilder_OomErr(builder); - } else { - _upb_FileDef_Create(builder, file_proto); - upb_strtable_insert(&s->files, name.data, name.size, - upb_value_constptr(builder->file), builder->arena); - UPB_ASSERT(upb_Status_IsOk(status)); - upb_Arena_Fuse(s->arena, builder->arena); +int upb_MtDecoder_CompareFields(const void* _a, const void* _b) { + const upb_LayoutItem* a = _a; + const upb_LayoutItem* b = _b; + // Currently we just sort by: + // 1. rep (smallest fields first) + // 2. type (oneof cases first) + // 2. field_index (smallest numbers first) + // The main goal of this is to reduce space lost to padding. + // Later we may have more subtle reasons to prefer a different ordering. + const int rep_bits = upb_Log2Ceiling(kUpb_FieldRep_Max); + const int type_bits = upb_Log2Ceiling(kUpb_LayoutItemType_Max); + const int idx_bits = (sizeof(a->field_index) * 8); + UPB_ASSERT(idx_bits + rep_bits + type_bits < 32); +#define UPB_COMBINE(rep, ty, idx) (((rep << type_bits) | ty) << idx_bits) | idx + uint32_t a_packed = UPB_COMBINE(a->rep, a->type, a->field_index); + uint32_t b_packed = UPB_COMBINE(b->rep, b->type, b->field_index); + assert(a_packed != b_packed); +#undef UPB_COMBINE + return a_packed < b_packed ? -1 : 1; +} + +static bool upb_MtDecoder_SortLayoutItems(upb_MtDecoder* d) { + // Add items for all non-oneof fields (oneofs were already added). + int n = d->table->field_count; + for (int i = 0; i < n; i++) { + upb_MiniTableField* f = &d->fields[i]; + if (f->offset >= kOneofBase) continue; + upb_LayoutItem item = {.field_index = i, + .rep = f->mode >> kUpb_FieldRep_Shift, + .type = kUpb_LayoutItemType_Field}; + upb_MtDecoder_PushItem(d, item); } - if (builder->arena) upb_Arena_Free(builder->arena); - if (builder->tmp_arena) upb_Arena_Free(builder->tmp_arena); - return builder->file; + if (d->vec.size) { + qsort(d->vec.data, d->vec.size, sizeof(*d->vec.data), + upb_MtDecoder_CompareFields); + } + + return true; } -static const upb_FileDef* _upb_DefPool_AddFile( - upb_DefPool* s, const UPB_DESC(FileDescriptorProto) * file_proto, - const upb_MiniTableFile* layout, upb_Status* status) { - const upb_StringView name = UPB_DESC(FileDescriptorProto_name)(file_proto); +static size_t upb_MiniTable_DivideRoundUp(size_t n, size_t d) { + return (n + d - 1) / d; +} - // Determine whether we already know about this file. - { - upb_value v; - if (upb_strtable_lookup2(&s->files, name.data, name.size, &v)) { - upb_Status_SetErrorFormat(status, - "duplicate file name " UPB_STRINGVIEW_FORMAT, - UPB_STRINGVIEW_ARGS(name)); - return NULL; +static void upb_MtDecoder_AssignHasbits(upb_MtDecoder* d) { + upb_MiniTable* ret = d->table; + int n = ret->field_count; + int last_hasbit = 0; // 0 cannot be used. + + // First assign required fields, which must have the lowest hasbits. + for (int i = 0; i < n; i++) { + upb_MiniTableField* field = (upb_MiniTableField*)&ret->fields[i]; + if (field->offset == kRequiredPresence) { + field->presence = ++last_hasbit; + } else if (field->offset == kNoPresence) { + field->presence = 0; } } + ret->required_count = last_hasbit; - upb_DefBuilder ctx = { - .symtab = s, - .layout = layout, - .platform = s->platform, - .msg_count = 0, - .enum_count = 0, - .ext_count = 0, - .status = status, - .file = NULL, - .arena = upb_Arena_New(), - .tmp_arena = upb_Arena_New(), - }; + if (ret->required_count > 63) { + upb_MdDecoder_ErrorJmp(&d->base, "Too many required fields"); + } - return upb_DefBuilder_AddFileToPool(&ctx, s, file_proto, name, status); -} + // Next assign non-required hasbit fields. + for (int i = 0; i < n; i++) { + upb_MiniTableField* field = (upb_MiniTableField*)&ret->fields[i]; + if (field->offset == kHasbitPresence) { + field->presence = ++last_hasbit; + } + } -const upb_FileDef* upb_DefPool_AddFile(upb_DefPool* s, - const UPB_DESC(FileDescriptorProto) * - file_proto, - upb_Status* status) { - return _upb_DefPool_AddFile(s, file_proto, NULL, status); + ret->size = last_hasbit ? upb_MiniTable_DivideRoundUp(last_hasbit + 1, 8) : 0; } -bool _upb_DefPool_LoadDefInitEx(upb_DefPool* s, const _upb_DefPool_Init* init, - bool rebuild_minitable) { - /* Since this function should never fail (it would indicate a bug in upb) we - * print errors to stderr instead of returning error status to the user. */ - _upb_DefPool_Init** deps = init->deps; - UPB_DESC(FileDescriptorProto) * file; - upb_Arena* arena; - upb_Status status; - - upb_Status_Clear(&status); - - if (upb_DefPool_FindFileByName(s, init->filename)) { - return true; +size_t upb_MtDecoder_Place(upb_MtDecoder* d, upb_FieldRep rep) { + size_t size = upb_MtDecoder_SizeOfRep(rep, d->platform); + size_t align = upb_MtDecoder_AlignOfRep(rep, d->platform); + size_t ret = UPB_ALIGN_UP(d->table->size, align); + static const size_t max = UINT16_MAX; + size_t new_size = ret + size; + if (new_size > max) { + upb_MdDecoder_ErrorJmp( + &d->base, "Message size exceeded maximum size of %zu bytes", max); } + d->table->size = new_size; + return ret; +} - arena = upb_Arena_New(); +static void upb_MtDecoder_AssignOffsets(upb_MtDecoder* d) { + upb_LayoutItem* end = UPB_PTRADD(d->vec.data, d->vec.size); - for (; *deps; deps++) { - if (!_upb_DefPool_LoadDefInitEx(s, *deps, rebuild_minitable)) goto err; + // Compute offsets. + for (upb_LayoutItem* item = d->vec.data; item < end; item++) { + item->offset = upb_MtDecoder_Place(d, item->rep); } - file = UPB_DESC(FileDescriptorProto_parse_ex)( - init->descriptor.data, init->descriptor.size, NULL, - kUpb_DecodeOption_AliasString, arena); - s->bytes_loaded += init->descriptor.size; - - if (!file) { - upb_Status_SetErrorFormat( - &status, - "Failed to parse compiled-in descriptor for file '%s'. This should " - "never happen.", - init->filename); - goto err; + // Assign oneof case offsets. We must do these first, since assigning + // actual offsets will overwrite the links of the linked list. + for (upb_LayoutItem* item = d->vec.data; item < end; item++) { + if (item->type != kUpb_LayoutItemType_OneofCase) continue; + upb_MiniTableField* f = &d->fields[item->field_index]; + while (true) { + f->presence = ~item->offset; + if (f->offset == kUpb_LayoutItem_IndexSentinel) break; + UPB_ASSERT(f->offset - kOneofBase < d->table->field_count); + f = &d->fields[f->offset - kOneofBase]; + } } - const upb_MiniTableFile* mt = rebuild_minitable ? NULL : init->layout; - if (!_upb_DefPool_AddFile(s, file, mt, &status)) { - goto err; + // Assign offsets. + for (upb_LayoutItem* item = d->vec.data; item < end; item++) { + upb_MiniTableField* f = &d->fields[item->field_index]; + switch (item->type) { + case kUpb_LayoutItemType_OneofField: + while (true) { + uint16_t next_offset = f->offset; + f->offset = item->offset; + if (next_offset == kUpb_LayoutItem_IndexSentinel) break; + f = &d->fields[next_offset - kOneofBase]; + } + break; + case kUpb_LayoutItemType_Field: + f->offset = item->offset; + break; + default: + break; + } } - upb_Arena_Free(arena); - return true; - -err: - fprintf(stderr, - "Error loading compiled-in descriptor for file '%s' (this should " - "never happen): %s\n", - init->filename, upb_Status_ErrorMessage(&status)); - upb_Arena_Free(arena); - return false; + // The fasttable parser (supported on 64-bit only) depends on this being a + // multiple of 8 in order to satisfy UPB_MALLOC_ALIGN, which is also 8. + // + // On 32-bit we could potentially make this smaller, but there is no + // compelling reason to optimize this right now. + d->table->size = UPB_ALIGN_UP(d->table->size, 8); } -size_t _upb_DefPool_BytesLoaded(const upb_DefPool* s) { - return s->bytes_loaded; -} +static void upb_MtDecoder_ValidateEntryField(upb_MtDecoder* d, + const upb_MiniTableField* f, + uint32_t expected_num) { + const char* name = expected_num == 1 ? "key" : "val"; + if (f->number != expected_num) { + upb_MdDecoder_ErrorJmp(&d->base, + "map %s did not have expected number (%d vs %d)", + name, expected_num, (int)f->number); + } -upb_Arena* _upb_DefPool_Arena(const upb_DefPool* s) { return s->arena; } + if (upb_IsRepeatedOrMap(f)) { + upb_MdDecoder_ErrorJmp( + &d->base, "map %s cannot be repeated or map, or be in oneof", name); + } -const upb_FieldDef* upb_DefPool_FindExtensionByMiniTable( - const upb_DefPool* s, const upb_MiniTableExtension* ext) { - upb_value v; - bool ok = upb_inttable_lookup(&s->exts, (uintptr_t)ext, &v); - UPB_ASSERT(ok); - return upb_value_getconstptr(v); -} + uint32_t not_ok_types; + if (expected_num == 1) { + not_ok_types = (1 << kUpb_FieldType_Float) | (1 << kUpb_FieldType_Double) | + (1 << kUpb_FieldType_Message) | (1 << kUpb_FieldType_Group) | + (1 << kUpb_FieldType_Bytes) | (1 << kUpb_FieldType_Enum); + } else { + not_ok_types = 1 << kUpb_FieldType_Group; + } -const upb_FieldDef* upb_DefPool_FindExtensionByNumber(const upb_DefPool* s, - const upb_MessageDef* m, - int32_t fieldnum) { - const upb_MiniTable* t = upb_MessageDef_MiniTable(m); - const upb_MiniTableExtension* ext = - upb_ExtensionRegistry_Lookup(s->extreg, t, fieldnum); - return ext ? upb_DefPool_FindExtensionByMiniTable(s, ext) : NULL; + if ((1 << upb_MiniTableField_Type(f)) & not_ok_types) { + upb_MdDecoder_ErrorJmp(&d->base, "map %s cannot have type %d", name, + (int)f->UPB_PRIVATE(descriptortype)); + } } -const upb_ExtensionRegistry* upb_DefPool_ExtensionRegistry( - const upb_DefPool* s) { - return s->extreg; -} +static void upb_MtDecoder_ParseMap(upb_MtDecoder* d, const char* data, + size_t len) { + upb_MtDecoder_ParseMessage(d, data, len); + upb_MtDecoder_AssignHasbits(d); -const upb_FieldDef** upb_DefPool_GetAllExtensions(const upb_DefPool* s, - const upb_MessageDef* m, - size_t* count) { - size_t n = 0; - intptr_t iter = UPB_INTTABLE_BEGIN; - uintptr_t key; - upb_value val; - // This is O(all exts) instead of O(exts for m). If we need this to be - // efficient we may need to make extreg into a two-level table, or have a - // second per-message index. - while (upb_inttable_next(&s->exts, &key, &val, &iter)) { - const upb_FieldDef* f = upb_value_getconstptr(val); - if (upb_FieldDef_ContainingType(f) == m) n++; - } - const upb_FieldDef** exts = malloc(n * sizeof(*exts)); - iter = UPB_INTTABLE_BEGIN; - size_t i = 0; - while (upb_inttable_next(&s->exts, &key, &val, &iter)) { - const upb_FieldDef* f = upb_value_getconstptr(val); - if (upb_FieldDef_ContainingType(f) == m) exts[i++] = f; + if (UPB_UNLIKELY(d->table->field_count != 2)) { + upb_MdDecoder_ErrorJmp(&d->base, "%hu fields in map", + d->table->field_count); + UPB_UNREACHABLE(); } - *count = n; - return exts; -} -bool _upb_DefPool_LoadDefInit(upb_DefPool* s, const _upb_DefPool_Init* init) { - return _upb_DefPool_LoadDefInitEx(s, init, false); -} + upb_LayoutItem* end = UPB_PTRADD(d->vec.data, d->vec.size); + for (upb_LayoutItem* item = d->vec.data; item < end; item++) { + if (item->type == kUpb_LayoutItemType_OneofCase) { + upb_MdDecoder_ErrorJmp(&d->base, "Map entry cannot have oneof"); + } + } + upb_MtDecoder_ValidateEntryField(d, &d->table->fields[0], 1); + upb_MtDecoder_ValidateEntryField(d, &d->table->fields[1], 2); -// Must be last. + // Map entries have a pre-determined layout, regardless of types. + // NOTE: sync with mini_table/message_internal.h. + const size_t kv_size = d->platform == kUpb_MiniTablePlatform_32Bit ? 8 : 16; + const size_t hasbit_size = 8; + d->fields[0].offset = hasbit_size; + d->fields[1].offset = hasbit_size + kv_size; + d->table->size = UPB_ALIGN_UP(hasbit_size + kv_size + kv_size, 8); -upb_deftype_t _upb_DefType_Type(upb_value v) { - const uintptr_t num = (uintptr_t)upb_value_getconstptr(v); - return num & UPB_DEFTYPE_MASK; + // Map entries have a special bit set to signal it's a map entry, used in + // upb_MiniTable_SetSubMessage() below. + d->table->ext |= kUpb_ExtMode_IsMapEntry; } -upb_value _upb_DefType_Pack(const void* ptr, upb_deftype_t type) { - uintptr_t num = (uintptr_t)ptr; - UPB_ASSERT((num & UPB_DEFTYPE_MASK) == 0); - num |= type; - return upb_value_constptr((const void*)num); -} +static void upb_MtDecoder_ParseMessageSet(upb_MtDecoder* d, const char* data, + size_t len) { + if (len > 0) { + upb_MdDecoder_ErrorJmp(&d->base, "Invalid message set encode length: %zu", + len); + } -const void* _upb_DefType_Unpack(upb_value v, upb_deftype_t type) { - uintptr_t num = (uintptr_t)upb_value_getconstptr(v); - return (num & UPB_DEFTYPE_MASK) == type - ? (const void*)(num & ~UPB_DEFTYPE_MASK) - : NULL; + upb_MiniTable* ret = d->table; + ret->size = 0; + ret->field_count = 0; + ret->ext = kUpb_ExtMode_IsMessageSet; + ret->dense_below = 0; + ret->table_mask = -1; + ret->required_count = 0; } +static upb_MiniTable* upb_MtDecoder_DoBuildMiniTableWithBuf( + upb_MtDecoder* decoder, const char* data, size_t len, void** buf, + size_t* buf_size) { + upb_MdDecoder_CheckOutOfMemory(&decoder->base, decoder->table); -// Must be last. + decoder->table->size = 0; + decoder->table->field_count = 0; + decoder->table->ext = kUpb_ExtMode_NonExtendable; + decoder->table->dense_below = 0; + decoder->table->table_mask = -1; + decoder->table->required_count = 0; -bool _upb_DescState_Grow(upb_DescState* d, upb_Arena* a) { - const size_t oldbufsize = d->bufsize; - const int used = d->ptr - d->buf; + // Strip off and verify the version tag. + if (!len--) goto done; + const char vers = *data++; - if (!d->buf) { - d->buf = upb_Arena_Malloc(a, d->bufsize); - if (!d->buf) return false; - d->ptr = d->buf; - d->e.end = d->buf + d->bufsize; - } + switch (vers) { + case kUpb_EncodedVersion_MapV1: + upb_MtDecoder_ParseMap(decoder, data, len); + break; - if (oldbufsize - used < kUpb_MtDataEncoder_MinSize) { - d->bufsize *= 2; - d->buf = upb_Arena_Realloc(a, d->buf, oldbufsize, d->bufsize); - if (!d->buf) return false; - d->ptr = d->buf + used; - d->e.end = d->buf + d->bufsize; - } + case kUpb_EncodedVersion_MessageV1: + upb_MtDecoder_ParseMessage(decoder, data, len); + upb_MtDecoder_AssignHasbits(decoder); + upb_MtDecoder_SortLayoutItems(decoder); + upb_MtDecoder_AssignOffsets(decoder); + break; - return true; -} + case kUpb_EncodedVersion_MessageSetV1: + upb_MtDecoder_ParseMessageSet(decoder, data, len); + break; + default: + upb_MdDecoder_ErrorJmp(&decoder->base, "Invalid message version: %c", + vers); + } -// Must be last. +done: + *buf = decoder->vec.data; + *buf_size = decoder->vec.capacity * sizeof(*decoder->vec.data); + return decoder->table; +} -struct upb_EnumDef { - const UPB_DESC(EnumOptions) * opts; - const upb_MiniTableEnum* layout; // Only for proto2. - const upb_FileDef* file; - const upb_MessageDef* containing_type; // Could be merged with "file". - const char* full_name; - upb_strtable ntoi; - upb_inttable iton; - const upb_EnumValueDef* values; - const upb_EnumReservedRange* res_ranges; - const upb_StringView* res_names; - int value_count; - int res_range_count; - int res_name_count; - int32_t defaultval; - bool is_closed; - bool is_sorted; // Whether all of the values are defined in ascending order. -}; +static upb_MiniTable* upb_MtDecoder_BuildMiniTableWithBuf( + upb_MtDecoder* const decoder, const char* const data, const size_t len, + void** const buf, size_t* const buf_size) { + if (UPB_SETJMP(decoder->base.err) != 0) { + *buf = decoder->vec.data; + *buf_size = decoder->vec.capacity * sizeof(*decoder->vec.data); + return NULL; + } -upb_EnumDef* _upb_EnumDef_At(const upb_EnumDef* e, int i) { - return (upb_EnumDef*)&e[i]; + return upb_MtDecoder_DoBuildMiniTableWithBuf(decoder, data, len, buf, + buf_size); } -const upb_MiniTableEnum* _upb_EnumDef_MiniTable(const upb_EnumDef* e) { - return e->layout; -} +upb_MiniTable* upb_MiniTable_BuildWithBuf(const char* data, size_t len, + upb_MiniTablePlatform platform, + upb_Arena* arena, void** buf, + size_t* buf_size, + upb_Status* status) { + upb_MtDecoder decoder = { + .base = {.status = status}, + .platform = platform, + .vec = + { + .data = *buf, + .capacity = *buf_size / sizeof(*decoder.vec.data), + .size = 0, + }, + .arena = arena, + .table = upb_Arena_Malloc(arena, sizeof(*decoder.table)), + }; -bool _upb_EnumDef_Insert(upb_EnumDef* e, upb_EnumValueDef* v, upb_Arena* a) { - const char* name = upb_EnumValueDef_Name(v); - const upb_value val = upb_value_constptr(v); - bool ok = upb_strtable_insert(&e->ntoi, name, strlen(name), val, a); - if (!ok) return false; + return upb_MtDecoder_BuildMiniTableWithBuf(&decoder, data, len, buf, + buf_size); +} - // Multiple enumerators can have the same number, first one wins. - const int number = upb_EnumValueDef_Number(v); - if (!upb_inttable_lookup(&e->iton, number, NULL)) { - return upb_inttable_insert(&e->iton, number, val, a); +static const char* upb_MtDecoder_DoBuildMiniTableExtension( + upb_MtDecoder* decoder, const char* data, size_t len, + upb_MiniTableExtension* ext, const upb_MiniTable* extendee, + upb_MiniTableSub sub) { + // If the string is non-empty then it must begin with a version tag. + if (len) { + if (*data != kUpb_EncodedVersion_ExtensionV1) { + upb_MdDecoder_ErrorJmp(&decoder->base, "Invalid ext version: %c", *data); + } + data++; + len--; } - return true; -} -const UPB_DESC(EnumOptions) * upb_EnumDef_Options(const upb_EnumDef* e) { - return e->opts; -} + uint16_t count = 0; + upb_SubCounts sub_counts = {0, 0}; + const char* ret = upb_MtDecoder_Parse(decoder, data, len, ext, sizeof(*ext), + &count, &sub_counts); + if (!ret || count != 1) return NULL; -bool upb_EnumDef_HasOptions(const upb_EnumDef* e) { - return e->opts != (void*)kUpbDefOptDefault; -} + upb_MiniTableField* f = &ext->field; -const char* upb_EnumDef_FullName(const upb_EnumDef* e) { return e->full_name; } + f->mode |= kUpb_LabelFlags_IsExtension; + f->offset = 0; + f->presence = 0; -const char* upb_EnumDef_Name(const upb_EnumDef* e) { - return _upb_DefBuilder_FullToShort(e->full_name); -} + if (extendee->ext & kUpb_ExtMode_IsMessageSet) { + // Extensions of MessageSet must be messages. + if (!upb_IsSubMessage(f)) return NULL; -const upb_FileDef* upb_EnumDef_File(const upb_EnumDef* e) { return e->file; } + // Extensions of MessageSet must be non-repeating. + if ((f->mode & kUpb_FieldMode_Mask) == kUpb_FieldMode_Array) return NULL; + } -const upb_MessageDef* upb_EnumDef_ContainingType(const upb_EnumDef* e) { - return e->containing_type; -} + ext->extendee = extendee; + ext->sub = sub; -int32_t upb_EnumDef_Default(const upb_EnumDef* e) { - UPB_ASSERT(upb_EnumDef_FindValueByNumber(e, e->defaultval)); - return e->defaultval; + return ret; } -int upb_EnumDef_ReservedRangeCount(const upb_EnumDef* e) { - return e->res_range_count; +static const char* upb_MtDecoder_BuildMiniTableExtension( + upb_MtDecoder* const decoder, const char* const data, const size_t len, + upb_MiniTableExtension* const ext, const upb_MiniTable* const extendee, + const upb_MiniTableSub sub) { + if (UPB_SETJMP(decoder->base.err) != 0) return NULL; + return upb_MtDecoder_DoBuildMiniTableExtension(decoder, data, len, ext, + extendee, sub); } -const upb_EnumReservedRange* upb_EnumDef_ReservedRange(const upb_EnumDef* e, - int i) { - UPB_ASSERT(0 <= i && i < e->res_range_count); - return _upb_EnumReservedRange_At(e->res_ranges, i); -} +const char* _upb_MiniTableExtension_Init(const char* data, size_t len, + upb_MiniTableExtension* ext, + const upb_MiniTable* extendee, + upb_MiniTableSub sub, + upb_MiniTablePlatform platform, + upb_Status* status) { + upb_MtDecoder decoder = { + .base = {.status = status}, + .arena = NULL, + .table = NULL, + .platform = platform, + }; -int upb_EnumDef_ReservedNameCount(const upb_EnumDef* e) { - return e->res_name_count; + return upb_MtDecoder_BuildMiniTableExtension(&decoder, data, len, ext, + extendee, sub); } -upb_StringView upb_EnumDef_ReservedName(const upb_EnumDef* e, int i) { - UPB_ASSERT(0 <= i && i < e->res_name_count); - return e->res_names[i]; -} +upb_MiniTableExtension* _upb_MiniTableExtension_Build( + const char* data, size_t len, const upb_MiniTable* extendee, + upb_MiniTableSub sub, upb_MiniTablePlatform platform, upb_Arena* arena, + upb_Status* status) { + upb_MiniTableExtension* ext = + upb_Arena_Malloc(arena, sizeof(upb_MiniTableExtension)); + if (UPB_UNLIKELY(!ext)) return NULL; -int upb_EnumDef_ValueCount(const upb_EnumDef* e) { return e->value_count; } + const char* ptr = _upb_MiniTableExtension_Init(data, len, ext, extendee, sub, + platform, status); + if (UPB_UNLIKELY(!ptr)) return NULL; -const upb_EnumValueDef* upb_EnumDef_FindValueByName(const upb_EnumDef* e, - const char* name) { - return upb_EnumDef_FindValueByNameWithSize(e, name, strlen(name)); + return ext; } -const upb_EnumValueDef* upb_EnumDef_FindValueByNameWithSize( - const upb_EnumDef* e, const char* name, size_t size) { - upb_value v; - return upb_strtable_lookup2(&e->ntoi, name, size, &v) - ? upb_value_getconstptr(v) - : NULL; +upb_MiniTable* _upb_MiniTable_Build(const char* data, size_t len, + upb_MiniTablePlatform platform, + upb_Arena* arena, upb_Status* status) { + void* buf = NULL; + size_t size = 0; + upb_MiniTable* ret = upb_MiniTable_BuildWithBuf(data, len, platform, arena, + &buf, &size, status); + free(buf); + return ret; } -const upb_EnumValueDef* upb_EnumDef_FindValueByNumber(const upb_EnumDef* e, - int32_t num) { - upb_value v; - return upb_inttable_lookup(&e->iton, num, &v) ? upb_value_getconstptr(v) - : NULL; -} - -bool upb_EnumDef_CheckNumber(const upb_EnumDef* e, int32_t num) { - // We could use upb_EnumDef_FindValueByNumber(e, num) != NULL, but we expect - // this to be faster (especially for small numbers). - return upb_MiniTableEnum_CheckValue(e->layout, num); -} - -const upb_EnumValueDef* upb_EnumDef_Value(const upb_EnumDef* e, int i) { - UPB_ASSERT(0 <= i && i < e->value_count); - return _upb_EnumValueDef_At(e->values, i); -} -bool upb_EnumDef_IsClosed(const upb_EnumDef* e) { return e->is_closed; } +// Must be last. -bool upb_EnumDef_MiniDescriptorEncode(const upb_EnumDef* e, upb_Arena* a, - upb_StringView* out) { - upb_DescState s; - _upb_DescState_Init(&s); +bool upb_MiniTable_SetSubMessage(upb_MiniTable* table, + upb_MiniTableField* field, + const upb_MiniTable* sub) { + UPB_ASSERT((uintptr_t)table->fields <= (uintptr_t)field && + (uintptr_t)field < + (uintptr_t)(table->fields + table->field_count)); + UPB_ASSERT(sub); - const upb_EnumValueDef** sorted = NULL; - if (!e->is_sorted) { - sorted = _upb_EnumValueDefs_Sorted(e->values, e->value_count, a); - if (!sorted) return false; - } + const bool sub_is_map = sub->ext & kUpb_ExtMode_IsMapEntry; - if (!_upb_DescState_Grow(&s, a)) return false; - s.ptr = upb_MtDataEncoder_StartEnum(&s.e, s.ptr); + switch (field->UPB_PRIVATE(descriptortype)) { + case kUpb_FieldType_Message: + if (sub_is_map) { + const bool table_is_map = table->ext & kUpb_ExtMode_IsMapEntry; + if (UPB_UNLIKELY(table_is_map)) return false; - // Duplicate values are allowed but we only encode each value once. - uint32_t previous = 0; + field->mode = (field->mode & ~kUpb_FieldMode_Mask) | kUpb_FieldMode_Map; + } + break; - for (size_t i = 0; i < e->value_count; i++) { - const uint32_t current = - upb_EnumValueDef_Number(sorted ? sorted[i] : upb_EnumDef_Value(e, i)); - if (i != 0 && previous == current) continue; + case kUpb_FieldType_Group: + if (UPB_UNLIKELY(sub_is_map)) return false; + break; - if (!_upb_DescState_Grow(&s, a)) return false; - s.ptr = upb_MtDataEncoder_PutEnumValue(&s.e, s.ptr, current); - previous = current; + default: + return false; } - if (!_upb_DescState_Grow(&s, a)) return false; - s.ptr = upb_MtDataEncoder_EndEnum(&s.e, s.ptr); + upb_MiniTableSub* table_sub = + (void*)&table->subs[field->UPB_PRIVATE(submsg_index)]; + // TODO(haberman): Add this assert back once YouTube is updated to not call + // this function repeatedly. + // UPB_ASSERT(table_sub->submsg == &_kUpb_MiniTable_Empty); + table_sub->submsg = sub; + return true; +} - // There will always be room for this '\0' in the encoder buffer because - // kUpb_MtDataEncoder_MinSize is overkill for upb_MtDataEncoder_EndEnum(). - UPB_ASSERT(s.ptr < s.buf + s.bufsize); - *s.ptr = '\0'; +bool upb_MiniTable_SetSubEnum(upb_MiniTable* table, upb_MiniTableField* field, + const upb_MiniTableEnum* sub) { + UPB_ASSERT((uintptr_t)table->fields <= (uintptr_t)field && + (uintptr_t)field < + (uintptr_t)(table->fields + table->field_count)); + UPB_ASSERT(sub); - out->data = s.buf; - out->size = s.ptr - s.buf; + upb_MiniTableSub* table_sub = + (void*)&table->subs[field->UPB_PRIVATE(submsg_index)]; + table_sub->subenum = sub; return true; } -static upb_MiniTableEnum* create_enumlayout(upb_DefBuilder* ctx, - const upb_EnumDef* e) { - upb_StringView sv; - bool ok = upb_EnumDef_MiniDescriptorEncode(e, ctx->tmp_arena, &sv); - if (!ok) _upb_DefBuilder_Errf(ctx, "OOM while building enum MiniDescriptor"); +uint32_t upb_MiniTable_GetSubList(const upb_MiniTable* mt, + const upb_MiniTableField** subs) { + uint32_t msg_count = 0; + uint32_t enum_count = 0; - upb_Status status; - upb_MiniTableEnum* layout = - upb_MiniTableEnum_Build(sv.data, sv.size, ctx->arena, &status); - if (!layout) - _upb_DefBuilder_Errf(ctx, "Error building enum MiniTable: %s", status.msg); - return layout; -} + for (int i = 0; i < mt->field_count; i++) { + const upb_MiniTableField* f = &mt->fields[i]; + if (upb_MiniTableField_CType(f) == kUpb_CType_Message) { + *subs = f; + ++subs; + msg_count++; + } + } -static upb_StringView* _upb_EnumReservedNames_New( - upb_DefBuilder* ctx, int n, const upb_StringView* protos) { - upb_StringView* sv = _upb_DefBuilder_Alloc(ctx, sizeof(upb_StringView) * n); - for (size_t i = 0; i < n; i++) { - sv[i].data = - upb_strdup2(protos[i].data, protos[i].size, _upb_DefBuilder_Arena(ctx)); - sv[i].size = protos[i].size; + for (int i = 0; i < mt->field_count; i++) { + const upb_MiniTableField* f = &mt->fields[i]; + if (upb_MiniTableField_CType(f) == kUpb_CType_Enum) { + *subs = f; + ++subs; + enum_count++; + } } - return sv; -} -static void create_enumdef(upb_DefBuilder* ctx, const char* prefix, - const UPB_DESC(EnumDescriptorProto) * enum_proto, - upb_EnumDef* e) { - const UPB_DESC(EnumValueDescriptorProto)* const* values; - const UPB_DESC(EnumDescriptorProto_EnumReservedRange)* const* res_ranges; - const upb_StringView* res_names; - upb_StringView name; - size_t n_value, n_res_range, n_res_name; + return (msg_count << 16) | enum_count; +} - // Must happen before _upb_DefBuilder_Add() - e->file = _upb_DefBuilder_File(ctx); +// The list of sub_tables and sub_enums must exactly match the number and order +// of sub-message fields and sub-enum fields given by upb_MiniTable_GetSubList() +// above. +bool upb_MiniTable_Link(upb_MiniTable* mt, const upb_MiniTable** sub_tables, + size_t sub_table_count, + const upb_MiniTableEnum** sub_enums, + size_t sub_enum_count) { + uint32_t msg_count = 0; + uint32_t enum_count = 0; - name = UPB_DESC(EnumDescriptorProto_name)(enum_proto); + for (int i = 0; i < mt->field_count; i++) { + upb_MiniTableField* f = (upb_MiniTableField*)&mt->fields[i]; + if (upb_MiniTableField_CType(f) == kUpb_CType_Message) { + const upb_MiniTable* sub = sub_tables[msg_count++]; + if (msg_count > sub_table_count) return false; + if (sub != NULL) { + if (!upb_MiniTable_SetSubMessage(mt, f, sub)) return false; + } + } + } - e->full_name = _upb_DefBuilder_MakeFullName(ctx, prefix, name); - _upb_DefBuilder_Add(ctx, e->full_name, - _upb_DefType_Pack(e, UPB_DEFTYPE_ENUM)); + for (int i = 0; i < mt->field_count; i++) { + upb_MiniTableField* f = (upb_MiniTableField*)&mt->fields[i]; + if (upb_MiniTableField_IsClosedEnum(f)) { + const upb_MiniTableEnum* sub = sub_enums[enum_count++]; + if (enum_count > sub_enum_count) return false; + if (sub != NULL) { + if (!upb_MiniTable_SetSubEnum(mt, f, sub)) return false; + } + } + } - e->is_closed = (!UPB_TREAT_PROTO2_ENUMS_LIKE_PROTO3) && - (upb_FileDef_Syntax(e->file) == kUpb_Syntax_Proto2); + return true; +} - values = UPB_DESC(EnumDescriptorProto_value)(enum_proto, &n_value); - bool ok = upb_strtable_init(&e->ntoi, n_value, ctx->arena); - if (!ok) _upb_DefBuilder_OomErr(ctx); - ok = upb_inttable_init(&e->iton, ctx->arena); - if (!ok) _upb_DefBuilder_OomErr(ctx); +// Must be last. - e->defaultval = 0; - e->value_count = n_value; - e->values = - _upb_EnumValueDefs_New(ctx, prefix, n_value, values, e, &e->is_sorted); +typedef struct { + uint64_t present_values_mask; + uint32_t last_written_value; +} upb_MtDataEncoderInternal_EnumState; - if (n_value == 0) { - _upb_DefBuilder_Errf(ctx, "enums must contain at least one value (%s)", - e->full_name); - } +typedef struct { + uint64_t msg_modifiers; + uint32_t last_field_num; + enum { + kUpb_OneofState_NotStarted, + kUpb_OneofState_StartedOneof, + kUpb_OneofState_EmittedOneofField, + } oneof_state; +} upb_MtDataEncoderInternal_MsgState; - res_ranges = - UPB_DESC(EnumDescriptorProto_reserved_range)(enum_proto, &n_res_range); - e->res_range_count = n_res_range; - e->res_ranges = _upb_EnumReservedRanges_New(ctx, n_res_range, res_ranges, e); +typedef struct { + char* buf_start; // Only for checking kUpb_MtDataEncoder_MinSize. + union { + upb_MtDataEncoderInternal_EnumState enum_state; + upb_MtDataEncoderInternal_MsgState msg_state; + } state; +} upb_MtDataEncoderInternal; - res_names = - UPB_DESC(EnumDescriptorProto_reserved_name)(enum_proto, &n_res_name); - e->res_name_count = n_res_name; - e->res_names = _upb_EnumReservedNames_New(ctx, n_res_name, res_names); +static upb_MtDataEncoderInternal* upb_MtDataEncoder_GetInternal( + upb_MtDataEncoder* e, char* buf_start) { + UPB_ASSERT(sizeof(upb_MtDataEncoderInternal) <= sizeof(e->internal)); + upb_MtDataEncoderInternal* ret = (upb_MtDataEncoderInternal*)e->internal; + ret->buf_start = buf_start; + return ret; +} - UPB_DEF_SET_OPTIONS(e->opts, EnumDescriptorProto, EnumOptions, enum_proto); +static char* upb_MtDataEncoder_PutRaw(upb_MtDataEncoder* e, char* ptr, + char ch) { + upb_MtDataEncoderInternal* in = (upb_MtDataEncoderInternal*)e->internal; + UPB_ASSERT(ptr - in->buf_start < kUpb_MtDataEncoder_MinSize); + if (ptr == e->end) return NULL; + *ptr++ = ch; + return ptr; +} - upb_inttable_compact(&e->iton, ctx->arena); +static char* upb_MtDataEncoder_Put(upb_MtDataEncoder* e, char* ptr, char ch) { + return upb_MtDataEncoder_PutRaw(e, ptr, _upb_ToBase92(ch)); +} - if (e->is_closed) { - if (ctx->layout) { - UPB_ASSERT(ctx->enum_count < ctx->layout->enum_count); - e->layout = ctx->layout->enums[ctx->enum_count++]; - } else { - e->layout = create_enumlayout(ctx, e); - } - } else { - e->layout = NULL; - } +static char* upb_MtDataEncoder_PutBase92Varint(upb_MtDataEncoder* e, char* ptr, + uint32_t val, int min, int max) { + int shift = upb_Log2Ceiling(_upb_FromBase92(max) - _upb_FromBase92(min) + 1); + UPB_ASSERT(shift <= 6); + uint32_t mask = (1 << shift) - 1; + do { + uint32_t bits = val & mask; + ptr = upb_MtDataEncoder_Put(e, ptr, bits + _upb_FromBase92(min)); + if (!ptr) return NULL; + val >>= shift; + } while (val); + return ptr; } -upb_EnumDef* _upb_EnumDefs_New( - upb_DefBuilder* ctx, int n, - const UPB_DESC(EnumDescriptorProto) * const* protos, - const upb_MessageDef* containing_type) { - _upb_DefType_CheckPadding(sizeof(upb_EnumDef)); +char* upb_MtDataEncoder_PutModifier(upb_MtDataEncoder* e, char* ptr, + uint64_t mod) { + if (mod) { + ptr = upb_MtDataEncoder_PutBase92Varint(e, ptr, mod, + kUpb_EncodedValue_MinModifier, + kUpb_EncodedValue_MaxModifier); + } + return ptr; +} - // If a containing type is defined then get the full name from that. - // Otherwise use the package name from the file def. - const char* name = containing_type ? upb_MessageDef_FullName(containing_type) - : _upb_FileDef_RawPackage(ctx->file); +char* upb_MtDataEncoder_EncodeExtension(upb_MtDataEncoder* e, char* ptr, + upb_FieldType type, uint32_t field_num, + uint64_t field_mod) { + upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr); + in->state.msg_state.msg_modifiers = 0; + in->state.msg_state.last_field_num = 0; + in->state.msg_state.oneof_state = kUpb_OneofState_NotStarted; - upb_EnumDef* e = _upb_DefBuilder_Alloc(ctx, sizeof(upb_EnumDef) * n); - for (size_t i = 0; i < n; i++) { - create_enumdef(ctx, name, protos[i], &e[i]); - e[i].containing_type = containing_type; - } - return e; + ptr = upb_MtDataEncoder_PutRaw(e, ptr, kUpb_EncodedVersion_ExtensionV1); + if (!ptr) return NULL; + + return upb_MtDataEncoder_PutField(e, ptr, type, field_num, field_mod); } -// #include "upb/reflection/extension_range_internal.h" -// #include "upb/reflection/message_def.h" +char* upb_MtDataEncoder_EncodeMap(upb_MtDataEncoder* e, char* ptr, + upb_FieldType key_type, + upb_FieldType value_type, uint64_t key_mod, + uint64_t value_mod) { + upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr); + in->state.msg_state.msg_modifiers = 0; + in->state.msg_state.last_field_num = 0; + in->state.msg_state.oneof_state = kUpb_OneofState_NotStarted; -// Must be last. + ptr = upb_MtDataEncoder_PutRaw(e, ptr, kUpb_EncodedVersion_MapV1); + if (!ptr) return NULL; -struct upb_EnumReservedRange { - int32_t start; - int32_t end; -}; + ptr = upb_MtDataEncoder_PutField(e, ptr, key_type, 1, key_mod); + if (!ptr) return NULL; -upb_EnumReservedRange* _upb_EnumReservedRange_At(const upb_EnumReservedRange* r, - int i) { - return (upb_EnumReservedRange*)&r[i]; + return upb_MtDataEncoder_PutField(e, ptr, value_type, 2, value_mod); } -int32_t upb_EnumReservedRange_Start(const upb_EnumReservedRange* r) { - return r->start; -} -int32_t upb_EnumReservedRange_End(const upb_EnumReservedRange* r) { - return r->end; +char* upb_MtDataEncoder_EncodeMessageSet(upb_MtDataEncoder* e, char* ptr) { + (void)upb_MtDataEncoder_GetInternal(e, ptr); + return upb_MtDataEncoder_PutRaw(e, ptr, kUpb_EncodedVersion_MessageSetV1); } -upb_EnumReservedRange* _upb_EnumReservedRanges_New( - upb_DefBuilder* ctx, int n, - const UPB_DESC(EnumDescriptorProto_EnumReservedRange) * const* protos, - const upb_EnumDef* e) { - upb_EnumReservedRange* r = - _upb_DefBuilder_Alloc(ctx, sizeof(upb_EnumReservedRange) * n); - - for (int i = 0; i < n; i++) { - const int32_t start = - UPB_DESC(EnumDescriptorProto_EnumReservedRange_start)(protos[i]); - const int32_t end = - UPB_DESC(EnumDescriptorProto_EnumReservedRange_end)(protos[i]); +char* upb_MtDataEncoder_StartMessage(upb_MtDataEncoder* e, char* ptr, + uint64_t msg_mod) { + upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr); + in->state.msg_state.msg_modifiers = msg_mod; + in->state.msg_state.last_field_num = 0; + in->state.msg_state.oneof_state = kUpb_OneofState_NotStarted; - // A full validation would also check that each range is disjoint, and that - // none of the fields overlap with the extension ranges, but we are just - // sanity checking here. + ptr = upb_MtDataEncoder_PutRaw(e, ptr, kUpb_EncodedVersion_MessageV1); + if (!ptr) return NULL; - // Note: Not a typo! Unlike extension ranges and message reserved ranges, - // the end value of an enum reserved range is *inclusive*! - if (end < start) { - _upb_DefBuilder_Errf(ctx, "Reserved range (%d, %d) is invalid, enum=%s\n", - (int)start, (int)end, upb_EnumDef_FullName(e)); - } + return upb_MtDataEncoder_PutModifier(e, ptr, msg_mod); +} - r[i].start = start; - r[i].end = end; +static char* _upb_MtDataEncoder_MaybePutFieldSkip(upb_MtDataEncoder* e, + char* ptr, + uint32_t field_num) { + upb_MtDataEncoderInternal* in = (upb_MtDataEncoderInternal*)e->internal; + if (field_num <= in->state.msg_state.last_field_num) return NULL; + if (in->state.msg_state.last_field_num + 1 != field_num) { + // Put skip. + UPB_ASSERT(field_num > in->state.msg_state.last_field_num); + uint32_t skip = field_num - in->state.msg_state.last_field_num; + ptr = upb_MtDataEncoder_PutBase92Varint( + e, ptr, skip, kUpb_EncodedValue_MinSkip, kUpb_EncodedValue_MaxSkip); + if (!ptr) return NULL; } - - return r; + in->state.msg_state.last_field_num = field_num; + return ptr; } +static char* _upb_MtDataEncoder_PutFieldType(upb_MtDataEncoder* e, char* ptr, + upb_FieldType type, + uint64_t field_mod) { + static const char kUpb_TypeToEncoded[] = { + [kUpb_FieldType_Double] = kUpb_EncodedType_Double, + [kUpb_FieldType_Float] = kUpb_EncodedType_Float, + [kUpb_FieldType_Int64] = kUpb_EncodedType_Int64, + [kUpb_FieldType_UInt64] = kUpb_EncodedType_UInt64, + [kUpb_FieldType_Int32] = kUpb_EncodedType_Int32, + [kUpb_FieldType_Fixed64] = kUpb_EncodedType_Fixed64, + [kUpb_FieldType_Fixed32] = kUpb_EncodedType_Fixed32, + [kUpb_FieldType_Bool] = kUpb_EncodedType_Bool, + [kUpb_FieldType_String] = kUpb_EncodedType_String, + [kUpb_FieldType_Group] = kUpb_EncodedType_Group, + [kUpb_FieldType_Message] = kUpb_EncodedType_Message, + [kUpb_FieldType_Bytes] = kUpb_EncodedType_Bytes, + [kUpb_FieldType_UInt32] = kUpb_EncodedType_UInt32, + [kUpb_FieldType_Enum] = kUpb_EncodedType_OpenEnum, + [kUpb_FieldType_SFixed32] = kUpb_EncodedType_SFixed32, + [kUpb_FieldType_SFixed64] = kUpb_EncodedType_SFixed64, + [kUpb_FieldType_SInt32] = kUpb_EncodedType_SInt32, + [kUpb_FieldType_SInt64] = kUpb_EncodedType_SInt64, + }; + + int encoded_type = kUpb_TypeToEncoded[type]; -// Must be last. + if (field_mod & kUpb_FieldModifier_IsClosedEnum) { + UPB_ASSERT(type == kUpb_FieldType_Enum); + encoded_type = kUpb_EncodedType_ClosedEnum; + } -struct upb_EnumValueDef { - const UPB_DESC(EnumValueOptions) * opts; - const upb_EnumDef* parent; - const char* full_name; - int32_t number; -}; + if (field_mod & kUpb_FieldModifier_IsRepeated) { + // Repeated fields shift the type number up (unlike other modifiers which + // are bit flags). + encoded_type += kUpb_EncodedType_RepeatedBase; + } -upb_EnumValueDef* _upb_EnumValueDef_At(const upb_EnumValueDef* v, int i) { - return (upb_EnumValueDef*)&v[i]; + return upb_MtDataEncoder_Put(e, ptr, encoded_type); } -static int _upb_EnumValueDef_Compare(const void* p1, const void* p2) { - const uint32_t v1 = (*(const upb_EnumValueDef**)p1)->number; - const uint32_t v2 = (*(const upb_EnumValueDef**)p2)->number; - return (v1 < v2) ? -1 : (v1 > v2); -} +static char* _upb_MtDataEncoder_MaybePutModifiers(upb_MtDataEncoder* e, + char* ptr, upb_FieldType type, + uint64_t field_mod) { + upb_MtDataEncoderInternal* in = (upb_MtDataEncoderInternal*)e->internal; + uint32_t encoded_modifiers = 0; + if ((field_mod & kUpb_FieldModifier_IsRepeated) && + upb_FieldType_IsPackable(type)) { + bool field_is_packed = field_mod & kUpb_FieldModifier_IsPacked; + bool default_is_packed = in->state.msg_state.msg_modifiers & + kUpb_MessageModifier_DefaultIsPacked; + if (field_is_packed != default_is_packed) { + encoded_modifiers |= kUpb_EncodedFieldModifier_FlipPacked; + } + } -const upb_EnumValueDef** _upb_EnumValueDefs_Sorted(const upb_EnumValueDef* v, - int n, upb_Arena* a) { - // TODO: Try to replace this arena alloc with a persistent scratch buffer. - upb_EnumValueDef** out = - (upb_EnumValueDef**)upb_Arena_Malloc(a, n * sizeof(void*)); - if (!out) return NULL; + if (field_mod & kUpb_FieldModifier_IsProto3Singular) { + encoded_modifiers |= kUpb_EncodedFieldModifier_IsProto3Singular; + } - for (int i = 0; i < n; i++) { - out[i] = (upb_EnumValueDef*)&v[i]; + if (field_mod & kUpb_FieldModifier_IsRequired) { + encoded_modifiers |= kUpb_EncodedFieldModifier_IsRequired; } - qsort(out, n, sizeof(void*), _upb_EnumValueDef_Compare); - return (const upb_EnumValueDef**)out; + return upb_MtDataEncoder_PutModifier(e, ptr, encoded_modifiers); } -const UPB_DESC(EnumValueOptions) * - upb_EnumValueDef_Options(const upb_EnumValueDef* v) { - return v->opts; -} +char* upb_MtDataEncoder_PutField(upb_MtDataEncoder* e, char* ptr, + upb_FieldType type, uint32_t field_num, + uint64_t field_mod) { + upb_MtDataEncoder_GetInternal(e, ptr); -bool upb_EnumValueDef_HasOptions(const upb_EnumValueDef* v) { - return v->opts != (void*)kUpbDefOptDefault; -} + ptr = _upb_MtDataEncoder_MaybePutFieldSkip(e, ptr, field_num); + if (!ptr) return NULL; -const upb_EnumDef* upb_EnumValueDef_Enum(const upb_EnumValueDef* v) { - return v->parent; + ptr = _upb_MtDataEncoder_PutFieldType(e, ptr, type, field_mod); + if (!ptr) return NULL; + + return _upb_MtDataEncoder_MaybePutModifiers(e, ptr, type, field_mod); } -const char* upb_EnumValueDef_FullName(const upb_EnumValueDef* v) { - return v->full_name; +char* upb_MtDataEncoder_StartOneof(upb_MtDataEncoder* e, char* ptr) { + upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr); + if (in->state.msg_state.oneof_state == kUpb_OneofState_NotStarted) { + ptr = upb_MtDataEncoder_Put(e, ptr, _upb_FromBase92(kUpb_EncodedValue_End)); + } else { + ptr = upb_MtDataEncoder_Put( + e, ptr, _upb_FromBase92(kUpb_EncodedValue_OneofSeparator)); + } + in->state.msg_state.oneof_state = kUpb_OneofState_StartedOneof; + return ptr; } -const char* upb_EnumValueDef_Name(const upb_EnumValueDef* v) { - return _upb_DefBuilder_FullToShort(v->full_name); +char* upb_MtDataEncoder_PutOneofField(upb_MtDataEncoder* e, char* ptr, + uint32_t field_num) { + upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr); + if (in->state.msg_state.oneof_state == kUpb_OneofState_EmittedOneofField) { + ptr = upb_MtDataEncoder_Put( + e, ptr, _upb_FromBase92(kUpb_EncodedValue_FieldSeparator)); + if (!ptr) return NULL; + } + ptr = upb_MtDataEncoder_PutBase92Varint(e, ptr, field_num, _upb_ToBase92(0), + _upb_ToBase92(63)); + in->state.msg_state.oneof_state = kUpb_OneofState_EmittedOneofField; + return ptr; } -int32_t upb_EnumValueDef_Number(const upb_EnumValueDef* v) { return v->number; } +char* upb_MtDataEncoder_StartEnum(upb_MtDataEncoder* e, char* ptr) { + upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr); + in->state.enum_state.present_values_mask = 0; + in->state.enum_state.last_written_value = 0; -uint32_t upb_EnumValueDef_Index(const upb_EnumValueDef* v) { - // Compute index in our parent's array. - return v - upb_EnumDef_Value(v->parent, 0); + return upb_MtDataEncoder_PutRaw(e, ptr, kUpb_EncodedVersion_EnumV1); } -static void create_enumvaldef(upb_DefBuilder* ctx, const char* prefix, - const UPB_DESC(EnumValueDescriptorProto) * - val_proto, - upb_EnumDef* e, upb_EnumValueDef* v) { - upb_StringView name = UPB_DESC(EnumValueDescriptorProto_name)(val_proto); +static char* upb_MtDataEncoder_FlushDenseEnumMask(upb_MtDataEncoder* e, + char* ptr) { + upb_MtDataEncoderInternal* in = (upb_MtDataEncoderInternal*)e->internal; + ptr = upb_MtDataEncoder_Put(e, ptr, in->state.enum_state.present_values_mask); + in->state.enum_state.present_values_mask = 0; + in->state.enum_state.last_written_value += 5; + return ptr; +} - v->parent = e; // Must happen prior to _upb_DefBuilder_Add() - v->full_name = _upb_DefBuilder_MakeFullName(ctx, prefix, name); - v->number = UPB_DESC(EnumValueDescriptorProto_number)(val_proto); - _upb_DefBuilder_Add(ctx, v->full_name, - _upb_DefType_Pack(v, UPB_DEFTYPE_ENUMVAL)); +char* upb_MtDataEncoder_PutEnumValue(upb_MtDataEncoder* e, char* ptr, + uint32_t val) { + // TODO(b/229641772): optimize this encoding. + upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr); + UPB_ASSERT(val >= in->state.enum_state.last_written_value); + uint32_t delta = val - in->state.enum_state.last_written_value; + if (delta >= 5 && in->state.enum_state.present_values_mask) { + ptr = upb_MtDataEncoder_FlushDenseEnumMask(e, ptr); + if (!ptr) { + return NULL; + } + delta -= 5; + } - UPB_DEF_SET_OPTIONS(v->opts, EnumValueDescriptorProto, EnumValueOptions, - val_proto); + if (delta >= 5) { + ptr = upb_MtDataEncoder_PutBase92Varint( + e, ptr, delta, kUpb_EncodedValue_MinSkip, kUpb_EncodedValue_MaxSkip); + in->state.enum_state.last_written_value += delta; + delta = 0; + } - bool ok = _upb_EnumDef_Insert(e, v, ctx->arena); - if (!ok) _upb_DefBuilder_OomErr(ctx); + UPB_ASSERT((in->state.enum_state.present_values_mask >> delta) == 0); + in->state.enum_state.present_values_mask |= 1ULL << delta; + return ptr; } -// Allocate and initialize an array of |n| enum value defs owned by |e|. -upb_EnumValueDef* _upb_EnumValueDefs_New( - upb_DefBuilder* ctx, const char* prefix, int n, - const UPB_DESC(EnumValueDescriptorProto) * const* protos, upb_EnumDef* e, - bool* is_sorted) { - _upb_DefType_CheckPadding(sizeof(upb_EnumValueDef)); - - upb_EnumValueDef* v = - _upb_DefBuilder_Alloc(ctx, sizeof(upb_EnumValueDef) * n); +char* upb_MtDataEncoder_EndEnum(upb_MtDataEncoder* e, char* ptr) { + upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr); + if (!in->state.enum_state.present_values_mask) return ptr; + return upb_MtDataEncoder_FlushDenseEnumMask(e, ptr); +} - *is_sorted = true; - uint32_t previous = 0; - for (size_t i = 0; i < n; i++) { - create_enumvaldef(ctx, prefix, protos[i], e, &v[i]); - const uint32_t current = v[i].number; - if (previous > current) *is_sorted = false; - previous = current; - } +const char _kUpb_ToBase92[] = { + ' ', '!', '#', '$', '%', '&', '(', ')', '*', '+', ',', '-', '.', '/', + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<', '=', + '>', '?', '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', + 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', + 'Z', '[', ']', '^', '_', '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', + 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', + 'w', 'x', 'y', 'z', '{', '|', '}', '~', +}; - if (upb_FileDef_Syntax(ctx->file) == kUpb_Syntax_Proto3 && n > 0 && - v[0].number != 0) { - _upb_DefBuilder_Errf(ctx, - "for proto3, the first enum value must be zero (%s)", - upb_EnumDef_FullName(e)); - } +const int8_t _kUpb_FromBase92[] = { + 0, 1, -1, 2, 3, 4, 5, -1, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, + 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, + 55, 56, 57, -1, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, + 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, +}; - return v; -} // Must be last. -struct upb_ExtensionRange { - const UPB_DESC(ExtensionRangeOptions) * opts; - int32_t start; - int32_t end; +#define EXTREG_KEY_SIZE (sizeof(upb_MiniTable*) + sizeof(uint32_t)) + +struct upb_ExtensionRegistry { + upb_Arena* arena; + upb_strtable exts; // Key is upb_MiniTable* concatenated with fieldnum. }; -upb_ExtensionRange* _upb_ExtensionRange_At(const upb_ExtensionRange* r, int i) { - return (upb_ExtensionRange*)&r[i]; +static void extreg_key(char* buf, const upb_MiniTable* l, uint32_t fieldnum) { + memcpy(buf, &l, sizeof(l)); + memcpy(buf + sizeof(l), &fieldnum, sizeof(fieldnum)); } -const UPB_DESC(ExtensionRangeOptions) * - upb_ExtensionRange_Options(const upb_ExtensionRange* r) { - return r->opts; +upb_ExtensionRegistry* upb_ExtensionRegistry_New(upb_Arena* arena) { + upb_ExtensionRegistry* r = upb_Arena_Malloc(arena, sizeof(*r)); + if (!r) return NULL; + r->arena = arena; + if (!upb_strtable_init(&r->exts, 8, arena)) return NULL; + return r; } -bool upb_ExtensionRange_HasOptions(const upb_ExtensionRange* r) { - return r->opts != (void*)kUpbDefOptDefault; +UPB_API bool upb_ExtensionRegistry_Add(upb_ExtensionRegistry* r, + const upb_MiniTableExtension* e) { + char buf[EXTREG_KEY_SIZE]; + extreg_key(buf, e->extendee, e->field.number); + if (upb_strtable_lookup2(&r->exts, buf, EXTREG_KEY_SIZE, NULL)) return false; + return upb_strtable_insert(&r->exts, buf, EXTREG_KEY_SIZE, + upb_value_constptr(e), r->arena); } -int32_t upb_ExtensionRange_Start(const upb_ExtensionRange* r) { - return r->start; +bool upb_ExtensionRegistry_AddArray(upb_ExtensionRegistry* r, + const upb_MiniTableExtension** e, + size_t count) { + const upb_MiniTableExtension** start = e; + const upb_MiniTableExtension** end = UPB_PTRADD(e, count); + for (; e < end; e++) { + if (!upb_ExtensionRegistry_Add(r, *e)) goto failure; + } + return true; + +failure: + // Back out the entries previously added. + for (end = e, e = start; e < end; e++) { + const upb_MiniTableExtension* ext = *e; + char buf[EXTREG_KEY_SIZE]; + extreg_key(buf, ext->extendee, ext->field.number); + upb_strtable_remove2(&r->exts, buf, EXTREG_KEY_SIZE, NULL); + } + return false; } -int32_t upb_ExtensionRange_End(const upb_ExtensionRange* r) { return r->end; } +const upb_MiniTableExtension* upb_ExtensionRegistry_Lookup( + const upb_ExtensionRegistry* r, const upb_MiniTable* t, uint32_t num) { + char buf[EXTREG_KEY_SIZE]; + upb_value v; + extreg_key(buf, t, num); + if (upb_strtable_lookup2(&r->exts, buf, EXTREG_KEY_SIZE, &v)) { + return upb_value_getconstptr(v); + } else { + return NULL; + } +} -upb_ExtensionRange* _upb_ExtensionRanges_New( - upb_DefBuilder* ctx, int n, - const UPB_DESC(DescriptorProto_ExtensionRange) * const* protos, - const upb_MessageDef* m) { - upb_ExtensionRange* r = - _upb_DefBuilder_Alloc(ctx, sizeof(upb_ExtensionRange) * n); - for (int i = 0; i < n; i++) { - const int32_t start = - UPB_DESC(DescriptorProto_ExtensionRange_start)(protos[i]); - const int32_t end = UPB_DESC(DescriptorProto_ExtensionRange_end)(protos[i]); - const int32_t max = UPB_DESC(MessageOptions_message_set_wire_format)( - upb_MessageDef_Options(m)) - ? INT32_MAX - : kUpb_MaxFieldNumber + 1; +#include - // A full validation would also check that each range is disjoint, and that - // none of the fields overlap with the extension ranges, but we are just - // sanity checking here. - if (start < 1 || end <= start || end > max) { - _upb_DefBuilder_Errf(ctx, - "Extension range (%d, %d) is invalid, message=%s\n", - (int)start, (int)end, upb_MessageDef_FullName(m)); + +// Must be last. + +const upb_MiniTableField* upb_MiniTable_FindFieldByNumber( + const upb_MiniTable* t, uint32_t number) { + const size_t i = ((size_t)number) - 1; // 0 wraps to SIZE_MAX + + // Ideal case: index into dense fields + if (i < t->dense_below) { + UPB_ASSERT(t->fields[i].number == number); + return &t->fields[i]; + } + + // Slow case: binary search + int lo = t->dense_below; + int hi = t->field_count - 1; + while (lo <= hi) { + int mid = (lo + hi) / 2; + uint32_t num = t->fields[mid].number; + if (num < number) { + lo = mid + 1; + continue; + } + if (num > number) { + hi = mid - 1; + continue; } + return &t->fields[mid]; + } + return NULL; +} - r[i].start = start; - r[i].end = end; - UPB_DEF_SET_OPTIONS(r[i].opts, DescriptorProto_ExtensionRange, - ExtensionRangeOptions, protos[i]); +static bool upb_MiniTable_Is_Oneof(const upb_MiniTableField* f) { + return f->presence < 0; +} + +const upb_MiniTableField* upb_MiniTable_GetOneof(const upb_MiniTable* m, + const upb_MiniTableField* f) { + if (UPB_UNLIKELY(!upb_MiniTable_Is_Oneof(f))) { + return NULL; + } + const upb_MiniTableField* ptr = &m->fields[0]; + const upb_MiniTableField* end = &m->fields[m->field_count]; + while (++ptr < end) { + if (ptr->presence == (*f).presence) { + return ptr; + } } + return NULL; +} - return r; +bool upb_MiniTable_NextOneofField(const upb_MiniTable* m, + const upb_MiniTableField** f) { + const upb_MiniTableField* ptr = *f; + const upb_MiniTableField* end = &m->fields[m->field_count]; + while (++ptr < end) { + if (ptr->presence == (*f)->presence) { + *f = ptr; + return true; + } + } + return false; } -#include -#include +const struct upb_MiniTable _kUpb_MiniTable_Empty = { + .subs = NULL, + .fields = NULL, + .size = 0, + .field_count = 0, + .ext = kUpb_ExtMode_NonExtendable, + .dense_below = 0, + .table_mask = -1, + .required_count = 0, +}; -// Must be last. -#define UPB_FIELD_TYPE_UNSPECIFIED 0 +#include -typedef struct { - size_t len; - char str[1]; // Null-terminated string data follows. -} str_t; -struct upb_FieldDef { - const UPB_DESC(FieldOptions) * opts; - const upb_FileDef* file; - const upb_MessageDef* msgdef; - const char* full_name; - const char* json_name; - union { - int64_t sint; - uint64_t uint; - double dbl; - float flt; - bool boolean; - str_t* str; - void* msg; // Always NULL. - } defaultval; - union { - const upb_OneofDef* oneof; - const upb_MessageDef* extension_scope; - } scope; - union { - const upb_MessageDef* msgdef; - const upb_EnumDef* enumdef; - const UPB_DESC(FieldDescriptorProto) * unresolved; - } sub; - uint32_t number_; - uint16_t index_; - uint16_t layout_index; // Index into msgdef->layout->fields or file->exts - bool has_default; - bool has_json_name; - bool has_presence; - bool is_extension; - bool is_packed; - bool is_proto3_optional; - upb_FieldType type_; - upb_Label label_; -#if UINTPTR_MAX == 0xffffffff - uint32_t padding; // Increase size to a multiple of 8. -#endif -}; +// Must be last. -upb_FieldDef* _upb_FieldDef_At(const upb_FieldDef* f, int i) { - return (upb_FieldDef*)&f[i]; -} +/* The upb core does not generally have a concept of default instances. However + * for descriptor options we make an exception since the max size is known and + * modest (<200 bytes). All types can share a default instance since it is + * initialized to zeroes. + * + * We have to allocate an extra pointer for upb's internal metadata. */ +static const char opt_default_buf[_UPB_MAXOPT_SIZE + sizeof(void*)] = {0}; +const char* kUpbDefOptDefault = &opt_default_buf[sizeof(void*)]; -const UPB_DESC(FieldOptions) * upb_FieldDef_Options(const upb_FieldDef* f) { - return f->opts; -} +const char* _upb_DefBuilder_FullToShort(const char* fullname) { + const char* p; -bool upb_FieldDef_HasOptions(const upb_FieldDef* f) { - return f->opts != (void*)kUpbDefOptDefault; + if (fullname == NULL) { + return NULL; + } else if ((p = strrchr(fullname, '.')) == NULL) { + /* No '.' in the name, return the full string. */ + return fullname; + } else { + /* Return one past the last '.'. */ + return p + 1; + } } -const char* upb_FieldDef_FullName(const upb_FieldDef* f) { - return f->full_name; +void _upb_DefBuilder_FailJmp(upb_DefBuilder* ctx) { UPB_LONGJMP(ctx->err, 1); } + +void _upb_DefBuilder_Errf(upb_DefBuilder* ctx, const char* fmt, ...) { + va_list argp; + va_start(argp, fmt); + upb_Status_VSetErrorFormat(ctx->status, fmt, argp); + va_end(argp); + _upb_DefBuilder_FailJmp(ctx); } -upb_CType upb_FieldDef_CType(const upb_FieldDef* f) { - switch (f->type_) { - case kUpb_FieldType_Double: - return kUpb_CType_Double; - case kUpb_FieldType_Float: - return kUpb_CType_Float; - case kUpb_FieldType_Int64: - case kUpb_FieldType_SInt64: - case kUpb_FieldType_SFixed64: - return kUpb_CType_Int64; - case kUpb_FieldType_Int32: - case kUpb_FieldType_SFixed32: - case kUpb_FieldType_SInt32: - return kUpb_CType_Int32; - case kUpb_FieldType_UInt64: - case kUpb_FieldType_Fixed64: - return kUpb_CType_UInt64; - case kUpb_FieldType_UInt32: - case kUpb_FieldType_Fixed32: - return kUpb_CType_UInt32; - case kUpb_FieldType_Enum: - return kUpb_CType_Enum; - case kUpb_FieldType_Bool: - return kUpb_CType_Bool; - case kUpb_FieldType_String: - return kUpb_CType_String; - case kUpb_FieldType_Bytes: - return kUpb_CType_Bytes; - case kUpb_FieldType_Group: - case kUpb_FieldType_Message: - return kUpb_CType_Message; - } - UPB_UNREACHABLE(); +void _upb_DefBuilder_OomErr(upb_DefBuilder* ctx) { + upb_Status_SetErrorMessage(ctx->status, "out of memory"); + _upb_DefBuilder_FailJmp(ctx); } -upb_FieldType upb_FieldDef_Type(const upb_FieldDef* f) { return f->type_; } +// Verify a relative identifier string. The loop is branchless for speed. +static void _upb_DefBuilder_CheckIdentNotFull(upb_DefBuilder* ctx, + upb_StringView name) { + bool good = name.size > 0; -uint32_t upb_FieldDef_Index(const upb_FieldDef* f) { return f->index_; } + for (size_t i = 0; i < name.size; i++) { + const char c = name.data[i]; + const char d = c | 0x20; // force lowercase + const bool is_alpha = (('a' <= d) & (d <= 'z')) | (c == '_'); + const bool is_numer = ('0' <= c) & (c <= '9') & (i != 0); -upb_Label upb_FieldDef_Label(const upb_FieldDef* f) { return f->label_; } + good &= is_alpha | is_numer; + } -uint32_t upb_FieldDef_Number(const upb_FieldDef* f) { return f->number_; } + if (!good) _upb_DefBuilder_CheckIdentSlow(ctx, name, false); +} -bool upb_FieldDef_IsExtension(const upb_FieldDef* f) { return f->is_extension; } +const char* _upb_DefBuilder_MakeFullName(upb_DefBuilder* ctx, + const char* prefix, + upb_StringView name) { + _upb_DefBuilder_CheckIdentNotFull(ctx, name); + if (prefix) { + // ret = prefix + '.' + name; + size_t n = strlen(prefix); + char* ret = _upb_DefBuilder_Alloc(ctx, n + name.size + 2); + strcpy(ret, prefix); + ret[n] = '.'; + memcpy(&ret[n + 1], name.data, name.size); + ret[n + 1 + name.size] = '\0'; + return ret; + } else { + char* ret = upb_strdup2(name.data, name.size, ctx->arena); + if (!ret) _upb_DefBuilder_OomErr(ctx); + return ret; + } +} -bool upb_FieldDef_IsPacked(const upb_FieldDef* f) { return f->is_packed; } +static bool remove_component(char* base, size_t* len) { + if (*len == 0) return false; -const char* upb_FieldDef_Name(const upb_FieldDef* f) { - return _upb_DefBuilder_FullToShort(f->full_name); + for (size_t i = *len - 1; i > 0; i--) { + if (base[i] == '.') { + *len = i; + return true; + } + } + + *len = 0; + return true; } -const char* upb_FieldDef_JsonName(const upb_FieldDef* f) { - return f->json_name; +const void* _upb_DefBuilder_ResolveAny(upb_DefBuilder* ctx, + const char* from_name_dbg, + const char* base, upb_StringView sym, + upb_deftype_t* type) { + if (sym.size == 0) goto notfound; + upb_value v; + if (sym.data[0] == '.') { + /* Symbols starting with '.' are absolute, so we do a single lookup. + * Slice to omit the leading '.' */ + if (!_upb_DefPool_LookupSym(ctx->symtab, sym.data + 1, sym.size - 1, &v)) { + goto notfound; + } + } else { + /* Remove components from base until we find an entry or run out. */ + size_t baselen = base ? strlen(base) : 0; + char* tmp = malloc(sym.size + baselen + 1); + while (1) { + char* p = tmp; + if (baselen) { + memcpy(p, base, baselen); + p[baselen] = '.'; + p += baselen + 1; + } + memcpy(p, sym.data, sym.size); + p += sym.size; + if (_upb_DefPool_LookupSym(ctx->symtab, tmp, p - tmp, &v)) { + break; + } + if (!remove_component(tmp, &baselen)) { + free(tmp); + goto notfound; + } + } + free(tmp); + } + + *type = _upb_DefType_Type(v); + return _upb_DefType_Unpack(v, *type); + +notfound: + _upb_DefBuilder_Errf(ctx, "couldn't resolve name '" UPB_STRINGVIEW_FORMAT "'", + UPB_STRINGVIEW_ARGS(sym)); } -bool upb_FieldDef_HasJsonName(const upb_FieldDef* f) { - return f->has_json_name; +const void* _upb_DefBuilder_Resolve(upb_DefBuilder* ctx, + const char* from_name_dbg, const char* base, + upb_StringView sym, upb_deftype_t type) { + upb_deftype_t found_type; + const void* ret = + _upb_DefBuilder_ResolveAny(ctx, from_name_dbg, base, sym, &found_type); + if (ret && found_type != type) { + _upb_DefBuilder_Errf(ctx, + "type mismatch when resolving %s: couldn't find " + "name " UPB_STRINGVIEW_FORMAT " with type=%d", + from_name_dbg, UPB_STRINGVIEW_ARGS(sym), (int)type); + } + return ret; } -const upb_FileDef* upb_FieldDef_File(const upb_FieldDef* f) { return f->file; } +// Per ASCII this will lower-case a letter. If the result is a letter, the +// input was definitely a letter. If the output is not a letter, this may +// have transformed the character unpredictably. +static char upb_ascii_lower(char ch) { return ch | 0x20; } -const upb_MessageDef* upb_FieldDef_ContainingType(const upb_FieldDef* f) { - return f->msgdef; +// isalpha() etc. from are locale-dependent, which we don't want. +static bool upb_isbetween(uint8_t c, uint8_t low, uint8_t high) { + return low <= c && c <= high; } -const upb_MessageDef* upb_FieldDef_ExtensionScope(const upb_FieldDef* f) { - return f->is_extension ? f->scope.extension_scope : NULL; +static bool upb_isletter(char c) { + char lower = upb_ascii_lower(c); + return upb_isbetween(lower, 'a', 'z') || c == '_'; } -const upb_OneofDef* upb_FieldDef_ContainingOneof(const upb_FieldDef* f) { - return f->is_extension ? NULL : f->scope.oneof; +static bool upb_isalphanum(char c) { + return upb_isletter(c) || upb_isbetween(c, '0', '9'); } -const upb_OneofDef* upb_FieldDef_RealContainingOneof(const upb_FieldDef* f) { - const upb_OneofDef* oneof = upb_FieldDef_ContainingOneof(f); - if (!oneof || upb_OneofDef_IsSynthetic(oneof)) return NULL; - return oneof; +static bool TryGetChar(const char** src, const char* end, char* ch) { + if (*src == end) return false; + *ch = **src; + *src += 1; + return true; } -upb_MessageValue upb_FieldDef_Default(const upb_FieldDef* f) { - upb_MessageValue ret; - - if (upb_FieldDef_IsRepeated(f) || upb_FieldDef_IsSubMessage(f)) { - return (upb_MessageValue){.msg_val = NULL}; +static int TryGetHexDigit(const char** src, const char* end) { + char ch; + if (!TryGetChar(src, end, &ch)) return -1; + if ('0' <= ch && ch <= '9') { + return ch - '0'; } - - switch (upb_FieldDef_CType(f)) { - case kUpb_CType_Bool: - return (upb_MessageValue){.bool_val = f->defaultval.boolean}; - case kUpb_CType_Int64: - return (upb_MessageValue){.int64_val = f->defaultval.sint}; - case kUpb_CType_UInt64: - return (upb_MessageValue){.uint64_val = f->defaultval.uint}; - case kUpb_CType_Enum: - case kUpb_CType_Int32: - return (upb_MessageValue){.int32_val = (int32_t)f->defaultval.sint}; - case kUpb_CType_UInt32: - return (upb_MessageValue){.uint32_val = (uint32_t)f->defaultval.uint}; - case kUpb_CType_Float: - return (upb_MessageValue){.float_val = f->defaultval.flt}; - case kUpb_CType_Double: - return (upb_MessageValue){.double_val = f->defaultval.dbl}; - case kUpb_CType_String: - case kUpb_CType_Bytes: { - str_t* str = f->defaultval.str; - if (str) { - return (upb_MessageValue){ - .str_val = (upb_StringView){.data = str->str, .size = str->len}}; - } else { - return (upb_MessageValue){ - .str_val = (upb_StringView){.data = NULL, .size = 0}}; - } - } - default: - UPB_UNREACHABLE(); + ch = upb_ascii_lower(ch); + if ('a' <= ch && ch <= 'f') { + return ch - 'a' + 0xa; } + *src -= 1; // Char wasn't actually a hex digit. + return -1; +} +static char upb_DefBuilder_ParseHexEscape(upb_DefBuilder* ctx, + const upb_FieldDef* f, + const char** src, const char* end) { + int hex_digit = TryGetHexDigit(src, end); + if (hex_digit < 0) { + _upb_DefBuilder_Errf( + ctx, "\\x must be followed by at least one hex digit (field='%s')", + upb_FieldDef_FullName(f)); + return 0; + } + unsigned int ret = hex_digit; + while ((hex_digit = TryGetHexDigit(src, end)) >= 0) { + ret = (ret << 4) | hex_digit; + } + if (ret > 0xff) { + _upb_DefBuilder_Errf(ctx, "Value of hex escape in field %s exceeds 8 bits", + upb_FieldDef_FullName(f)); + return 0; + } return ret; } -const upb_MessageDef* upb_FieldDef_MessageSubDef(const upb_FieldDef* f) { - return upb_FieldDef_CType(f) == kUpb_CType_Message ? f->sub.msgdef : NULL; +static char TryGetOctalDigit(const char** src, const char* end) { + char ch; + if (!TryGetChar(src, end, &ch)) return -1; + if ('0' <= ch && ch <= '7') { + return ch - '0'; + } + *src -= 1; // Char wasn't actually an octal digit. + return -1; } -const upb_EnumDef* upb_FieldDef_EnumSubDef(const upb_FieldDef* f) { - return upb_FieldDef_CType(f) == kUpb_CType_Enum ? f->sub.enumdef : NULL; +static char upb_DefBuilder_ParseOctalEscape(upb_DefBuilder* ctx, + const upb_FieldDef* f, + const char** src, const char* end) { + char ch = 0; + for (int i = 0; i < 3; i++) { + char digit; + if ((digit = TryGetOctalDigit(src, end)) >= 0) { + ch = (ch << 3) | digit; + } + } + return ch; } -const upb_MiniTableField* upb_FieldDef_MiniTable(const upb_FieldDef* f) { - if (upb_FieldDef_IsExtension(f)) { - const upb_FileDef* file = upb_FieldDef_File(f); - return (upb_MiniTableField*)_upb_FileDef_ExtensionMiniTable( - file, f->layout_index); - } else { - const upb_MiniTable* layout = upb_MessageDef_MiniTable(f->msgdef); - return &layout->fields[f->layout_index]; +char _upb_DefBuilder_ParseEscape(upb_DefBuilder* ctx, const upb_FieldDef* f, + const char** src, const char* end) { + char ch; + if (!TryGetChar(src, end, &ch)) { + _upb_DefBuilder_Errf(ctx, "unterminated escape sequence in field %s", + upb_FieldDef_FullName(f)); + return 0; + } + switch (ch) { + case 'a': + return '\a'; + case 'b': + return '\b'; + case 'f': + return '\f'; + case 'n': + return '\n'; + case 'r': + return '\r'; + case 't': + return '\t'; + case 'v': + return '\v'; + case '\\': + return '\\'; + case '\'': + return '\''; + case '\"': + return '\"'; + case '?': + return '\?'; + case 'x': + case 'X': + return upb_DefBuilder_ParseHexEscape(ctx, f, src, end); + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + *src -= 1; + return upb_DefBuilder_ParseOctalEscape(ctx, f, src, end); } + _upb_DefBuilder_Errf(ctx, "Unknown escape sequence: \\%c", ch); } -const upb_MiniTableExtension* _upb_FieldDef_ExtensionMiniTable( - const upb_FieldDef* f) { - UPB_ASSERT(upb_FieldDef_IsExtension(f)); - const upb_FileDef* file = upb_FieldDef_File(f); - return _upb_FileDef_ExtensionMiniTable(file, f->layout_index); -} +void _upb_DefBuilder_CheckIdentSlow(upb_DefBuilder* ctx, upb_StringView name, + bool full) { + const char* str = name.data; + const size_t len = name.size; + bool start = true; + for (size_t i = 0; i < len; i++) { + const char c = str[i]; + if (c == '.') { + if (start || !full) { + _upb_DefBuilder_Errf( + ctx, "invalid name: unexpected '.' (" UPB_STRINGVIEW_FORMAT ")", + UPB_STRINGVIEW_ARGS(name)); + } + start = true; + } else if (start) { + if (!upb_isletter(c)) { + _upb_DefBuilder_Errf(ctx, + "invalid name: path components must start with a " + "letter (" UPB_STRINGVIEW_FORMAT ")", + UPB_STRINGVIEW_ARGS(name)); + } + start = false; + } else if (!upb_isalphanum(c)) { + _upb_DefBuilder_Errf( + ctx, + "invalid name: non-alphanumeric character (" UPB_STRINGVIEW_FORMAT + ")", + UPB_STRINGVIEW_ARGS(name)); + } + } + if (start) { + _upb_DefBuilder_Errf(ctx, + "invalid name: empty part (" UPB_STRINGVIEW_FORMAT ")", + UPB_STRINGVIEW_ARGS(name)); + } -bool _upb_FieldDef_IsClosedEnum(const upb_FieldDef* f) { - if (f->type_ != kUpb_FieldType_Enum) return false; - return upb_EnumDef_IsClosed(f->sub.enumdef); + // We should never reach this point. + UPB_ASSERT(false); } -bool _upb_FieldDef_IsProto3Optional(const upb_FieldDef* f) { - return f->is_proto3_optional; -} -int _upb_FieldDef_LayoutIndex(const upb_FieldDef* f) { return f->layout_index; } -uint64_t _upb_FieldDef_Modifiers(const upb_FieldDef* f) { - uint64_t out = f->is_packed ? kUpb_FieldModifier_IsPacked : 0; +// Must be last. - switch (f->label_) { - case kUpb_Label_Optional: - if (!upb_FieldDef_HasPresence(f)) { - out |= kUpb_FieldModifier_IsProto3Singular; - } - break; - case kUpb_Label_Repeated: - out |= kUpb_FieldModifier_IsRepeated; - break; - case kUpb_Label_Required: - out |= kUpb_FieldModifier_IsRequired; - break; - } +struct upb_DefPool { + upb_Arena* arena; + upb_strtable syms; // full_name -> packed def ptr + upb_strtable files; // file_name -> (upb_FileDef*) + upb_inttable exts; // (upb_MiniTableExtension*) -> (upb_FieldDef*) + upb_ExtensionRegistry* extreg; + upb_MiniTablePlatform platform; + void* scratch_data; + size_t scratch_size; + size_t bytes_loaded; +}; - if (_upb_FieldDef_IsClosedEnum(f)) { - out |= kUpb_FieldModifier_IsClosedEnum; - } - return out; +void upb_DefPool_Free(upb_DefPool* s) { + upb_Arena_Free(s->arena); + upb_gfree(s->scratch_data); + upb_gfree(s); } -bool upb_FieldDef_HasDefault(const upb_FieldDef* f) { return f->has_default; } -bool upb_FieldDef_HasPresence(const upb_FieldDef* f) { return f->has_presence; } +upb_DefPool* upb_DefPool_New(void) { + upb_DefPool* s = upb_gmalloc(sizeof(*s)); + if (!s) return NULL; -bool upb_FieldDef_HasSubDef(const upb_FieldDef* f) { - return upb_FieldDef_IsSubMessage(f) || - upb_FieldDef_CType(f) == kUpb_CType_Enum; -} + s->arena = upb_Arena_New(); + s->bytes_loaded = 0; -bool upb_FieldDef_IsMap(const upb_FieldDef* f) { - return upb_FieldDef_IsRepeated(f) && upb_FieldDef_IsSubMessage(f) && - upb_MessageDef_IsMapEntry(upb_FieldDef_MessageSubDef(f)); -} + s->scratch_size = 240; + s->scratch_data = upb_gmalloc(s->scratch_size); + if (!s->scratch_data) goto err; -bool upb_FieldDef_IsOptional(const upb_FieldDef* f) { - return upb_FieldDef_Label(f) == kUpb_Label_Optional; -} + if (!upb_strtable_init(&s->syms, 32, s->arena)) goto err; + if (!upb_strtable_init(&s->files, 4, s->arena)) goto err; + if (!upb_inttable_init(&s->exts, s->arena)) goto err; -bool upb_FieldDef_IsPrimitive(const upb_FieldDef* f) { - return !upb_FieldDef_IsString(f) && !upb_FieldDef_IsSubMessage(f); -} + s->extreg = upb_ExtensionRegistry_New(s->arena); + if (!s->extreg) goto err; -bool upb_FieldDef_IsRepeated(const upb_FieldDef* f) { - return upb_FieldDef_Label(f) == kUpb_Label_Repeated; -} + s->platform = kUpb_MiniTablePlatform_Native; -bool upb_FieldDef_IsRequired(const upb_FieldDef* f) { - return upb_FieldDef_Label(f) == kUpb_Label_Required; -} + return s; -bool upb_FieldDef_IsString(const upb_FieldDef* f) { - return upb_FieldDef_CType(f) == kUpb_CType_String || - upb_FieldDef_CType(f) == kUpb_CType_Bytes; +err: + upb_DefPool_Free(s); + return NULL; } -bool upb_FieldDef_IsSubMessage(const upb_FieldDef* f) { - return upb_FieldDef_CType(f) == kUpb_CType_Message; -} +bool _upb_DefPool_InsertExt(upb_DefPool* s, const upb_MiniTableExtension* ext, + const upb_FieldDef* f) { + return upb_inttable_insert(&s->exts, (uintptr_t)ext, upb_value_constptr(f), + s->arena); +} -static bool between(int32_t x, int32_t low, int32_t high) { - return x >= low && x <= high; +bool _upb_DefPool_InsertSym(upb_DefPool* s, upb_StringView sym, upb_value v, + upb_Status* status) { + // TODO: table should support an operation "tryinsert" to avoid the double + // lookup. + if (upb_strtable_lookup2(&s->syms, sym.data, sym.size, NULL)) { + upb_Status_SetErrorFormat(status, "duplicate symbol '%s'", sym.data); + return false; + } + if (!upb_strtable_insert(&s->syms, sym.data, sym.size, v, s->arena)) { + upb_Status_SetErrorMessage(status, "out of memory"); + return false; + } + return true; } -bool upb_FieldDef_checklabel(int32_t label) { return between(label, 1, 3); } -bool upb_FieldDef_checktype(int32_t type) { return between(type, 1, 11); } -bool upb_FieldDef_checkintfmt(int32_t fmt) { return between(fmt, 1, 3); } +static const void* _upb_DefPool_Unpack(const upb_DefPool* s, const char* sym, + size_t size, upb_deftype_t type) { + upb_value v; + return upb_strtable_lookup2(&s->syms, sym, size, &v) + ? _upb_DefType_Unpack(v, type) + : NULL; +} -bool upb_FieldDef_checkdescriptortype(int32_t type) { - return between(type, 1, 18); +bool _upb_DefPool_LookupSym(const upb_DefPool* s, const char* sym, size_t size, + upb_value* v) { + return upb_strtable_lookup2(&s->syms, sym, size, v); } -static bool streql2(const char* a, size_t n, const char* b) { - return n == strlen(b) && memcmp(a, b, n) == 0; +upb_ExtensionRegistry* _upb_DefPool_ExtReg(const upb_DefPool* s) { + return s->extreg; } -// Implement the transformation as described in the spec: -// 1. upper case all letters after an underscore. -// 2. remove all underscores. -static char* make_json_name(const char* name, size_t size, upb_Arena* a) { - char* out = upb_Arena_Malloc(a, size + 1); // +1 is to add a trailing '\0' - if (out == NULL) return NULL; +void** _upb_DefPool_ScratchData(const upb_DefPool* s) { + return (void**)&s->scratch_data; +} - bool ucase_next = false; - char* des = out; - for (size_t i = 0; i < size; i++) { - if (name[i] == '_') { - ucase_next = true; - } else { - *des++ = ucase_next ? toupper(name[i]) : name[i]; - ucase_next = false; - } - } - *des++ = '\0'; - return out; +size_t* _upb_DefPool_ScratchSize(const upb_DefPool* s) { + return (size_t*)&s->scratch_size; } -static str_t* newstr(upb_DefBuilder* ctx, const char* data, size_t len) { - str_t* ret = _upb_DefBuilder_Alloc(ctx, sizeof(*ret) + len); - if (!ret) _upb_DefBuilder_OomErr(ctx); - ret->len = len; - if (len) memcpy(ret->str, data, len); - ret->str[len] = '\0'; - return ret; +void _upb_DefPool_SetPlatform(upb_DefPool* s, upb_MiniTablePlatform platform) { + assert(upb_strtable_count(&s->files) == 0); + s->platform = platform; } -static str_t* unescape(upb_DefBuilder* ctx, const upb_FieldDef* f, - const char* data, size_t len) { - // Size here is an upper bound; escape sequences could ultimately shrink it. - str_t* ret = _upb_DefBuilder_Alloc(ctx, sizeof(*ret) + len); - char* dst = &ret->str[0]; - const char* src = data; - const char* end = data + len; +const upb_MessageDef* upb_DefPool_FindMessageByName(const upb_DefPool* s, + const char* sym) { + return _upb_DefPool_Unpack(s, sym, strlen(sym), UPB_DEFTYPE_MSG); +} - while (src < end) { - if (*src == '\\') { - src++; - *dst++ = _upb_DefBuilder_ParseEscape(ctx, f, &src, end); - } else { - *dst++ = *src++; - } - } +const upb_MessageDef* upb_DefPool_FindMessageByNameWithSize( + const upb_DefPool* s, const char* sym, size_t len) { + return _upb_DefPool_Unpack(s, sym, len, UPB_DEFTYPE_MSG); +} - ret->len = dst - &ret->str[0]; - return ret; +const upb_EnumDef* upb_DefPool_FindEnumByName(const upb_DefPool* s, + const char* sym) { + return _upb_DefPool_Unpack(s, sym, strlen(sym), UPB_DEFTYPE_ENUM); } -static void parse_default(upb_DefBuilder* ctx, const char* str, size_t len, - upb_FieldDef* f) { - char* end; - char nullz[64]; - errno = 0; +const upb_EnumValueDef* upb_DefPool_FindEnumByNameval(const upb_DefPool* s, + const char* sym) { + return _upb_DefPool_Unpack(s, sym, strlen(sym), UPB_DEFTYPE_ENUMVAL); +} - switch (upb_FieldDef_CType(f)) { - case kUpb_CType_Int32: - case kUpb_CType_Int64: - case kUpb_CType_UInt32: - case kUpb_CType_UInt64: - case kUpb_CType_Double: - case kUpb_CType_Float: - // Standard C number parsing functions expect null-terminated strings. - if (len >= sizeof(nullz) - 1) { - _upb_DefBuilder_Errf(ctx, "Default too long: %.*s", (int)len, str); - } - memcpy(nullz, str, len); - nullz[len] = '\0'; - str = nullz; - break; +const upb_FileDef* upb_DefPool_FindFileByName(const upb_DefPool* s, + const char* name) { + upb_value v; + return upb_strtable_lookup(&s->files, name, &v) ? upb_value_getconstptr(v) + : NULL; +} + +const upb_FileDef* upb_DefPool_FindFileByNameWithSize(const upb_DefPool* s, + const char* name, + size_t len) { + upb_value v; + return upb_strtable_lookup2(&s->files, name, len, &v) + ? upb_value_getconstptr(v) + : NULL; +} + +const upb_FieldDef* upb_DefPool_FindExtensionByNameWithSize( + const upb_DefPool* s, const char* name, size_t size) { + upb_value v; + if (!upb_strtable_lookup2(&s->syms, name, size, &v)) return NULL; + + switch (_upb_DefType_Type(v)) { + case UPB_DEFTYPE_FIELD: + return _upb_DefType_Unpack(v, UPB_DEFTYPE_FIELD); + case UPB_DEFTYPE_MSG: { + const upb_MessageDef* m = _upb_DefType_Unpack(v, UPB_DEFTYPE_MSG); + return _upb_MessageDef_InMessageSet(m) + ? upb_MessageDef_NestedExtension(m, 0) + : NULL; + } default: break; } - switch (upb_FieldDef_CType(f)) { - case kUpb_CType_Int32: { - long val = strtol(str, &end, 0); - if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || *end) { - goto invalid; + return NULL; +} + +const upb_FieldDef* upb_DefPool_FindExtensionByName(const upb_DefPool* s, + const char* sym) { + return upb_DefPool_FindExtensionByNameWithSize(s, sym, strlen(sym)); +} + +const upb_ServiceDef* upb_DefPool_FindServiceByName(const upb_DefPool* s, + const char* name) { + return _upb_DefPool_Unpack(s, name, strlen(name), UPB_DEFTYPE_SERVICE); +} + +const upb_ServiceDef* upb_DefPool_FindServiceByNameWithSize( + const upb_DefPool* s, const char* name, size_t size) { + return _upb_DefPool_Unpack(s, name, size, UPB_DEFTYPE_SERVICE); +} + +const upb_FileDef* upb_DefPool_FindFileContainingSymbol(const upb_DefPool* s, + const char* name) { + upb_value v; + // TODO(haberman): non-extension fields and oneofs. + if (upb_strtable_lookup(&s->syms, name, &v)) { + switch (_upb_DefType_Type(v)) { + case UPB_DEFTYPE_EXT: { + const upb_FieldDef* f = _upb_DefType_Unpack(v, UPB_DEFTYPE_EXT); + return upb_FieldDef_File(f); } - f->defaultval.sint = val; - break; - } - case kUpb_CType_Enum: { - const upb_EnumDef* e = f->sub.enumdef; - const upb_EnumValueDef* ev = - upb_EnumDef_FindValueByNameWithSize(e, str, len); - if (!ev) { - goto invalid; + case UPB_DEFTYPE_MSG: { + const upb_MessageDef* m = _upb_DefType_Unpack(v, UPB_DEFTYPE_MSG); + return upb_MessageDef_File(m); } - f->defaultval.sint = upb_EnumValueDef_Number(ev); - break; - } - case kUpb_CType_Int64: { - long long val = strtoll(str, &end, 0); - if (val > INT64_MAX || val < INT64_MIN || errno == ERANGE || *end) { - goto invalid; + case UPB_DEFTYPE_ENUM: { + const upb_EnumDef* e = _upb_DefType_Unpack(v, UPB_DEFTYPE_ENUM); + return upb_EnumDef_File(e); } - f->defaultval.sint = val; - break; - } - case kUpb_CType_UInt32: { - unsigned long val = strtoul(str, &end, 0); - if (val > UINT32_MAX || errno == ERANGE || *end) { - goto invalid; + case UPB_DEFTYPE_ENUMVAL: { + const upb_EnumValueDef* ev = + _upb_DefType_Unpack(v, UPB_DEFTYPE_ENUMVAL); + return upb_EnumDef_File(upb_EnumValueDef_Enum(ev)); } - f->defaultval.uint = val; - break; - } - case kUpb_CType_UInt64: { - unsigned long long val = strtoull(str, &end, 0); - if (val > UINT64_MAX || errno == ERANGE || *end) { - goto invalid; + case UPB_DEFTYPE_SERVICE: { + const upb_ServiceDef* service = + _upb_DefType_Unpack(v, UPB_DEFTYPE_SERVICE); + return upb_ServiceDef_File(service); } - f->defaultval.uint = val; - break; + default: + UPB_UNREACHABLE(); } - case kUpb_CType_Double: { - double val = strtod(str, &end); - if (errno == ERANGE || *end) { - goto invalid; + } + + const char* last_dot = strrchr(name, '.'); + if (last_dot) { + const upb_MessageDef* parent = + upb_DefPool_FindMessageByNameWithSize(s, name, last_dot - name); + if (parent) { + const char* shortname = last_dot + 1; + if (upb_MessageDef_FindByNameWithSize(parent, shortname, + strlen(shortname), NULL, NULL)) { + return upb_MessageDef_File(parent); } - f->defaultval.dbl = val; - break; } - case kUpb_CType_Float: { - float val = strtof(str, &end); - if (errno == ERANGE || *end) { - goto invalid; - } - f->defaultval.flt = val; - break; - } - case kUpb_CType_Bool: { - if (streql2(str, len, "false")) { - f->defaultval.boolean = false; - } else if (streql2(str, len, "true")) { - f->defaultval.boolean = true; - } else { - goto invalid; - } - break; - } - case kUpb_CType_String: - f->defaultval.str = newstr(ctx, str, len); - break; - case kUpb_CType_Bytes: - f->defaultval.str = unescape(ctx, f, str, len); - break; - case kUpb_CType_Message: - /* Should not have a default value. */ - _upb_DefBuilder_Errf(ctx, "Message should not have a default (%s)", - upb_FieldDef_FullName(f)); } - return; - -invalid: - _upb_DefBuilder_Errf(ctx, "Invalid default '%.*s' for field %s of type %d", - (int)len, str, upb_FieldDef_FullName(f), - (int)upb_FieldDef_Type(f)); + return NULL; } -static void set_default_default(upb_DefBuilder* ctx, upb_FieldDef* f) { - switch (upb_FieldDef_CType(f)) { - case kUpb_CType_Int32: - case kUpb_CType_Int64: - f->defaultval.sint = 0; - break; - case kUpb_CType_UInt64: - case kUpb_CType_UInt32: - f->defaultval.uint = 0; - break; - case kUpb_CType_Double: - case kUpb_CType_Float: - f->defaultval.dbl = 0; - break; - case kUpb_CType_String: - case kUpb_CType_Bytes: - f->defaultval.str = newstr(ctx, NULL, 0); - break; - case kUpb_CType_Bool: - f->defaultval.boolean = false; - break; - case kUpb_CType_Enum: { - const upb_EnumValueDef* v = upb_EnumDef_Value(f->sub.enumdef, 0); - f->defaultval.sint = upb_EnumValueDef_Number(v); - break; +static void remove_filedef(upb_DefPool* s, upb_FileDef* file) { + intptr_t iter = UPB_INTTABLE_BEGIN; + upb_StringView key; + upb_value val; + while (upb_strtable_next2(&s->syms, &key, &val, &iter)) { + const upb_FileDef* f; + switch (_upb_DefType_Type(val)) { + case UPB_DEFTYPE_EXT: + f = upb_FieldDef_File(_upb_DefType_Unpack(val, UPB_DEFTYPE_EXT)); + break; + case UPB_DEFTYPE_MSG: + f = upb_MessageDef_File(_upb_DefType_Unpack(val, UPB_DEFTYPE_MSG)); + break; + case UPB_DEFTYPE_ENUM: + f = upb_EnumDef_File(_upb_DefType_Unpack(val, UPB_DEFTYPE_ENUM)); + break; + case UPB_DEFTYPE_ENUMVAL: + f = upb_EnumDef_File(upb_EnumValueDef_Enum( + _upb_DefType_Unpack(val, UPB_DEFTYPE_ENUMVAL))); + break; + case UPB_DEFTYPE_SERVICE: + f = upb_ServiceDef_File(_upb_DefType_Unpack(val, UPB_DEFTYPE_SERVICE)); + break; + default: + UPB_UNREACHABLE(); } - case kUpb_CType_Message: - break; - } -} - -static void _upb_FieldDef_Create(upb_DefBuilder* ctx, const char* prefix, - const UPB_DESC(FieldDescriptorProto) * - field_proto, - upb_MessageDef* m, upb_FieldDef* f) { - // Must happen before _upb_DefBuilder_Add() - f->file = _upb_DefBuilder_File(ctx); - if (!UPB_DESC(FieldDescriptorProto_has_name)(field_proto)) { - _upb_DefBuilder_Errf(ctx, "field has no name"); + if (f == file) upb_strtable_removeiter(&s->syms, &iter); } +} - const upb_StringView name = UPB_DESC(FieldDescriptorProto_name)(field_proto); - - f->full_name = _upb_DefBuilder_MakeFullName(ctx, prefix, name); - f->label_ = (int)UPB_DESC(FieldDescriptorProto_label)(field_proto); - f->number_ = UPB_DESC(FieldDescriptorProto_number)(field_proto); - f->is_proto3_optional = - UPB_DESC(FieldDescriptorProto_proto3_optional)(field_proto); - f->msgdef = m; - f->scope.oneof = NULL; - - f->has_json_name = UPB_DESC(FieldDescriptorProto_has_json_name)(field_proto); - if (f->has_json_name) { - const upb_StringView sv = - UPB_DESC(FieldDescriptorProto_json_name)(field_proto); - f->json_name = upb_strdup2(sv.data, sv.size, ctx->arena); +static const upb_FileDef* upb_DefBuilder_AddFileToPool( + upb_DefBuilder* const builder, upb_DefPool* const s, + const UPB_DESC(FileDescriptorProto) * const file_proto, + const upb_StringView name, upb_Status* const status) { + if (UPB_SETJMP(builder->err) != 0) { + UPB_ASSERT(!upb_Status_IsOk(status)); + if (builder->file) { + remove_filedef(s, builder->file); + builder->file = NULL; + } + } else if (!builder->arena || !builder->tmp_arena) { + _upb_DefBuilder_OomErr(builder); } else { - f->json_name = make_json_name(name.data, name.size, ctx->arena); + _upb_FileDef_Create(builder, file_proto); + upb_strtable_insert(&s->files, name.data, name.size, + upb_value_constptr(builder->file), builder->arena); + UPB_ASSERT(upb_Status_IsOk(status)); + upb_Arena_Fuse(s->arena, builder->arena); } - if (!f->json_name) _upb_DefBuilder_OomErr(ctx); - const bool has_type = UPB_DESC(FieldDescriptorProto_has_type)(field_proto); - const bool has_type_name = - UPB_DESC(FieldDescriptorProto_has_type_name)(field_proto); + if (builder->arena) upb_Arena_Free(builder->arena); + if (builder->tmp_arena) upb_Arena_Free(builder->tmp_arena); + return builder->file; +} - f->type_ = (int)UPB_DESC(FieldDescriptorProto_type)(field_proto); +static const upb_FileDef* _upb_DefPool_AddFile( + upb_DefPool* s, const UPB_DESC(FileDescriptorProto) * file_proto, + const upb_MiniTableFile* layout, upb_Status* status) { + const upb_StringView name = UPB_DESC(FileDescriptorProto_name)(file_proto); - if (has_type) { - switch (f->type_) { - case kUpb_FieldType_Message: - case kUpb_FieldType_Group: - case kUpb_FieldType_Enum: - if (!has_type_name) { - _upb_DefBuilder_Errf(ctx, "field of type %d requires type name (%s)", - (int)f->type_, f->full_name); - } - break; - default: - if (has_type_name) { - _upb_DefBuilder_Errf( - ctx, "invalid type for field with type_name set (%s, %d)", - f->full_name, (int)f->type_); - } + // Determine whether we already know about this file. + { + upb_value v; + if (upb_strtable_lookup2(&s->files, name.data, name.size, &v)) { + upb_Status_SetErrorFormat(status, + "duplicate file name " UPB_STRINGVIEW_FORMAT, + UPB_STRINGVIEW_ARGS(name)); + return NULL; } } - if (!has_type && has_type_name) { - f->type_ = - UPB_FIELD_TYPE_UNSPECIFIED; // We'll assign this in resolve_subdef() - } else { - if (f->type_ < kUpb_FieldType_Double || f->type_ > kUpb_FieldType_SInt64) { - _upb_DefBuilder_Errf(ctx, "invalid type for field %s (%d)", f->full_name, - f->type_); - } - } + upb_DefBuilder ctx = { + .symtab = s, + .layout = layout, + .platform = s->platform, + .msg_count = 0, + .enum_count = 0, + .ext_count = 0, + .status = status, + .file = NULL, + .arena = upb_Arena_New(), + .tmp_arena = upb_Arena_New(), + }; - if (f->label_ < kUpb_Label_Optional || f->label_ > kUpb_Label_Repeated) { - _upb_DefBuilder_Errf(ctx, "invalid label for field %s (%d)", f->full_name, - f->label_); - } + return upb_DefBuilder_AddFileToPool(&ctx, s, file_proto, name, status); +} - /* We can't resolve the subdef or (in the case of extensions) the containing - * message yet, because it may not have been defined yet. We stash a pointer - * to the field_proto until later when we can properly resolve it. */ - f->sub.unresolved = field_proto; +const upb_FileDef* upb_DefPool_AddFile(upb_DefPool* s, + const UPB_DESC(FileDescriptorProto) * + file_proto, + upb_Status* status) { + return _upb_DefPool_AddFile(s, file_proto, NULL, status); +} - if (f->label_ == kUpb_Label_Required && - upb_FileDef_Syntax(f->file) == kUpb_Syntax_Proto3) { - _upb_DefBuilder_Errf(ctx, "proto3 fields cannot be required (%s)", - f->full_name); - } +bool _upb_DefPool_LoadDefInitEx(upb_DefPool* s, const _upb_DefPool_Init* init, + bool rebuild_minitable) { + /* Since this function should never fail (it would indicate a bug in upb) we + * print errors to stderr instead of returning error status to the user. */ + _upb_DefPool_Init** deps = init->deps; + UPB_DESC(FileDescriptorProto) * file; + upb_Arena* arena; + upb_Status status; - if (UPB_DESC(FieldDescriptorProto_has_oneof_index)(field_proto)) { - uint32_t oneof_index = - UPB_DESC(FieldDescriptorProto_oneof_index)(field_proto); + upb_Status_Clear(&status); - if (upb_FieldDef_Label(f) != kUpb_Label_Optional) { - _upb_DefBuilder_Errf(ctx, "fields in oneof must have OPTIONAL label (%s)", - f->full_name); - } + if (upb_DefPool_FindFileByName(s, init->filename)) { + return true; + } - if (!m) { - _upb_DefBuilder_Errf(ctx, "oneof field (%s) has no containing msg", - f->full_name); - } + arena = upb_Arena_New(); - if (oneof_index >= upb_MessageDef_OneofCount(m)) { - _upb_DefBuilder_Errf(ctx, "oneof_index out of range (%s)", f->full_name); - } + for (; *deps; deps++) { + if (!_upb_DefPool_LoadDefInitEx(s, *deps, rebuild_minitable)) goto err; + } - upb_OneofDef* oneof = (upb_OneofDef*)upb_MessageDef_Oneof(m, oneof_index); - f->scope.oneof = oneof; + file = UPB_DESC(FileDescriptorProto_parse_ex)( + init->descriptor.data, init->descriptor.size, NULL, + kUpb_DecodeOption_AliasString, arena); + s->bytes_loaded += init->descriptor.size; - _upb_OneofDef_Insert(ctx, oneof, f, name.data, name.size); + if (!file) { + upb_Status_SetErrorFormat( + &status, + "Failed to parse compiled-in descriptor for file '%s'. This should " + "never happen.", + init->filename); + goto err; } - UPB_DEF_SET_OPTIONS(f->opts, FieldDescriptorProto, FieldOptions, field_proto); + const upb_MiniTableFile* mt = rebuild_minitable ? NULL : init->layout; + if (!_upb_DefPool_AddFile(s, file, mt, &status)) { + goto err; + } - if (UPB_DESC(FieldOptions_has_packed)(f->opts)) { - f->is_packed = UPB_DESC(FieldOptions_packed)(f->opts); - } else { - // Repeated fields default to packed for proto3 only. - f->is_packed = has_type && upb_FieldDef_IsPrimitive(f) && - f->label_ == kUpb_Label_Repeated && - upb_FileDef_Syntax(f->file) == kUpb_Syntax_Proto3; - } + upb_Arena_Free(arena); + return true; - f->has_presence = - (!upb_FieldDef_IsRepeated(f)) && - (f->type_ == kUpb_FieldType_Message || f->type_ == kUpb_FieldType_Group || - upb_FieldDef_ContainingOneof(f) || - (upb_FileDef_Syntax(f->file) == kUpb_Syntax_Proto2)); +err: + fprintf(stderr, + "Error loading compiled-in descriptor for file '%s' (this should " + "never happen): %s\n", + init->filename, upb_Status_ErrorMessage(&status)); + upb_Arena_Free(arena); + return false; } -static void _upb_FieldDef_CreateExt(upb_DefBuilder* ctx, const char* prefix, - const UPB_DESC(FieldDescriptorProto) * - field_proto, - upb_MessageDef* m, upb_FieldDef* f) { - f->is_extension = true; - _upb_FieldDef_Create(ctx, prefix, field_proto, m, f); - - if (UPB_DESC(FieldDescriptorProto_has_oneof_index)(field_proto)) { - _upb_DefBuilder_Errf(ctx, "oneof_index provided for extension field (%s)", - f->full_name); - } - - f->scope.extension_scope = m; - _upb_DefBuilder_Add(ctx, f->full_name, _upb_DefType_Pack(f, UPB_DEFTYPE_EXT)); - f->layout_index = ctx->ext_count++; - - if (ctx->layout) { - UPB_ASSERT(_upb_FieldDef_ExtensionMiniTable(f)->field.number == f->number_); - } +size_t _upb_DefPool_BytesLoaded(const upb_DefPool* s) { + return s->bytes_loaded; } -static void _upb_FieldDef_CreateNotExt(upb_DefBuilder* ctx, const char* prefix, - const UPB_DESC(FieldDescriptorProto) * - field_proto, - upb_MessageDef* m, upb_FieldDef* f) { - f->is_extension = false; - _upb_FieldDef_Create(ctx, prefix, field_proto, m, f); - - if (!UPB_DESC(FieldDescriptorProto_has_oneof_index)(field_proto)) { - if (f->is_proto3_optional) { - _upb_DefBuilder_Errf( - ctx, - "non-extension field (%s) with proto3_optional was not in a oneof", - f->full_name); - } - } +upb_Arena* _upb_DefPool_Arena(const upb_DefPool* s) { return s->arena; } - _upb_MessageDef_InsertField(ctx, m, f); +const upb_FieldDef* upb_DefPool_FindExtensionByMiniTable( + const upb_DefPool* s, const upb_MiniTableExtension* ext) { + upb_value v; + bool ok = upb_inttable_lookup(&s->exts, (uintptr_t)ext, &v); + UPB_ASSERT(ok); + return upb_value_getconstptr(v); } -upb_FieldDef* _upb_Extensions_New( - upb_DefBuilder* ctx, int n, - const UPB_DESC(FieldDescriptorProto) * const* protos, const char* prefix, - upb_MessageDef* m) { - _upb_DefType_CheckPadding(sizeof(upb_FieldDef)); - upb_FieldDef* defs = - (upb_FieldDef*)_upb_DefBuilder_Alloc(ctx, sizeof(upb_FieldDef) * n); +const upb_FieldDef* upb_DefPool_FindExtensionByNumber(const upb_DefPool* s, + const upb_MessageDef* m, + int32_t fieldnum) { + const upb_MiniTable* t = upb_MessageDef_MiniTable(m); + const upb_MiniTableExtension* ext = + upb_ExtensionRegistry_Lookup(s->extreg, t, fieldnum); + return ext ? upb_DefPool_FindExtensionByMiniTable(s, ext) : NULL; +} - for (int i = 0; i < n; i++) { - upb_FieldDef* f = &defs[i]; +const upb_ExtensionRegistry* upb_DefPool_ExtensionRegistry( + const upb_DefPool* s) { + return s->extreg; +} - _upb_FieldDef_CreateExt(ctx, prefix, protos[i], m, f); - f->index_ = i; +const upb_FieldDef** upb_DefPool_GetAllExtensions(const upb_DefPool* s, + const upb_MessageDef* m, + size_t* count) { + size_t n = 0; + intptr_t iter = UPB_INTTABLE_BEGIN; + uintptr_t key; + upb_value val; + // This is O(all exts) instead of O(exts for m). If we need this to be + // efficient we may need to make extreg into a two-level table, or have a + // second per-message index. + while (upb_inttable_next(&s->exts, &key, &val, &iter)) { + const upb_FieldDef* f = upb_value_getconstptr(val); + if (upb_FieldDef_ContainingType(f) == m) n++; } - - return defs; + const upb_FieldDef** exts = malloc(n * sizeof(*exts)); + iter = UPB_INTTABLE_BEGIN; + size_t i = 0; + while (upb_inttable_next(&s->exts, &key, &val, &iter)) { + const upb_FieldDef* f = upb_value_getconstptr(val); + if (upb_FieldDef_ContainingType(f) == m) exts[i++] = f; + } + *count = n; + return exts; } -upb_FieldDef* _upb_FieldDefs_New( - upb_DefBuilder* ctx, int n, - const UPB_DESC(FieldDescriptorProto) * const* protos, const char* prefix, - upb_MessageDef* m, bool* is_sorted) { - _upb_DefType_CheckPadding(sizeof(upb_FieldDef)); - upb_FieldDef* defs = - (upb_FieldDef*)_upb_DefBuilder_Alloc(ctx, sizeof(upb_FieldDef) * n); - - uint32_t previous = 0; - for (int i = 0; i < n; i++) { - upb_FieldDef* f = &defs[i]; +bool _upb_DefPool_LoadDefInit(upb_DefPool* s, const _upb_DefPool_Init* init) { + return _upb_DefPool_LoadDefInitEx(s, init, false); +} - _upb_FieldDef_CreateNotExt(ctx, prefix, protos[i], m, f); - f->index_ = i; - if (!ctx->layout) { - // Speculate that the def fields are sorted. We will always sort the - // MiniTable fields, so if defs are sorted then indices will match. - // - // If this is incorrect, we will overwrite later. - f->layout_index = i; - } - const uint32_t current = f->number_; - if (previous > current) *is_sorted = false; - previous = current; - } +// Must be last. - return defs; +upb_deftype_t _upb_DefType_Type(upb_value v) { + const uintptr_t num = (uintptr_t)upb_value_getconstptr(v); + return num & UPB_DEFTYPE_MASK; } -static void resolve_subdef(upb_DefBuilder* ctx, const char* prefix, - upb_FieldDef* f) { - const UPB_DESC(FieldDescriptorProto)* field_proto = f->sub.unresolved; - upb_StringView name = UPB_DESC(FieldDescriptorProto_type_name)(field_proto); - bool has_name = UPB_DESC(FieldDescriptorProto_has_type_name)(field_proto); - switch ((int)f->type_) { - case UPB_FIELD_TYPE_UNSPECIFIED: { - // Type was not specified and must be inferred. - UPB_ASSERT(has_name); - upb_deftype_t type; - const void* def = - _upb_DefBuilder_ResolveAny(ctx, f->full_name, prefix, name, &type); - switch (type) { - case UPB_DEFTYPE_ENUM: - f->sub.enumdef = def; - f->type_ = kUpb_FieldType_Enum; - if (!UPB_DESC(FieldOptions_has_packed)(f->opts)) { - f->is_packed = f->label_ == kUpb_Label_Repeated && - upb_FileDef_Syntax(f->file) == kUpb_Syntax_Proto3; - } - break; - case UPB_DEFTYPE_MSG: - f->sub.msgdef = def; - f->type_ = kUpb_FieldType_Message; // It appears there is no way of - // this being a group. - f->has_presence = !upb_FieldDef_IsRepeated(f); - break; - default: - _upb_DefBuilder_Errf(ctx, "Couldn't resolve type name for field %s", - f->full_name); - } - break; - } - case kUpb_FieldType_Message: - case kUpb_FieldType_Group: - UPB_ASSERT(has_name); - f->sub.msgdef = _upb_DefBuilder_Resolve(ctx, f->full_name, prefix, name, - UPB_DEFTYPE_MSG); - break; - case kUpb_FieldType_Enum: - UPB_ASSERT(has_name); - f->sub.enumdef = _upb_DefBuilder_Resolve(ctx, f->full_name, prefix, name, - UPB_DEFTYPE_ENUM); - break; - default: - // No resolution necessary. - break; - } +upb_value _upb_DefType_Pack(const void* ptr, upb_deftype_t type) { + uintptr_t num = (uintptr_t)ptr; + UPB_ASSERT((num & UPB_DEFTYPE_MASK) == 0); + num |= type; + return upb_value_constptr((const void*)num); } -static int _upb_FieldDef_Compare(const void* p1, const void* p2) { - const uint32_t v1 = (*(upb_FieldDef**)p1)->number_; - const uint32_t v2 = (*(upb_FieldDef**)p2)->number_; - return (v1 < v2) ? -1 : (v1 > v2); +const void* _upb_DefType_Unpack(upb_value v, upb_deftype_t type) { + uintptr_t num = (uintptr_t)upb_value_getconstptr(v); + return (num & UPB_DEFTYPE_MASK) == type + ? (const void*)(num & ~UPB_DEFTYPE_MASK) + : NULL; } -// _upb_FieldDefs_Sorted() is mostly a pure function of its inputs, but has one -// critical side effect that we depend on: it sets layout_index appropriately -// for non-sorted lists of fields. -const upb_FieldDef** _upb_FieldDefs_Sorted(const upb_FieldDef* f, int n, - upb_Arena* a) { - // TODO(salo): Replace this arena alloc with a persistent scratch buffer. - upb_FieldDef** out = (upb_FieldDef**)upb_Arena_Malloc(a, n * sizeof(void*)); - if (!out) return NULL; - for (int i = 0; i < n; i++) { - out[i] = (upb_FieldDef*)&f[i]; - } - qsort(out, n, sizeof(void*), _upb_FieldDef_Compare); +// Must be last. - for (int i = 0; i < n; i++) { - out[i]->layout_index = i; +bool _upb_DescState_Grow(upb_DescState* d, upb_Arena* a) { + const size_t oldbufsize = d->bufsize; + const int used = d->ptr - d->buf; + + if (!d->buf) { + d->buf = upb_Arena_Malloc(a, d->bufsize); + if (!d->buf) return false; + d->ptr = d->buf; + d->e.end = d->buf + d->bufsize; } - return (const upb_FieldDef**)out; -} -bool upb_FieldDef_MiniDescriptorEncode(const upb_FieldDef* f, upb_Arena* a, - upb_StringView* out) { - UPB_ASSERT(f->is_extension); + if (oldbufsize - used < kUpb_MtDataEncoder_MinSize) { + d->bufsize *= 2; + d->buf = upb_Arena_Realloc(a, d->buf, oldbufsize, d->bufsize); + if (!d->buf) return false; + d->ptr = d->buf + used; + d->e.end = d->buf + d->bufsize; + } - upb_DescState s; - _upb_DescState_Init(&s); + return true; +} - const int number = upb_FieldDef_Number(f); - const uint64_t modifiers = _upb_FieldDef_Modifiers(f); - if (!_upb_DescState_Grow(&s, a)) return false; - s.ptr = upb_MtDataEncoder_EncodeExtension(&s.e, s.ptr, f->type_, number, - modifiers); - *s.ptr = '\0'; - out->data = s.buf; - out->size = s.ptr - s.buf; - return true; -} +// Must be last. -static void resolve_extension(upb_DefBuilder* ctx, const char* prefix, - upb_FieldDef* f, - const UPB_DESC(FieldDescriptorProto) * - field_proto) { - if (!UPB_DESC(FieldDescriptorProto_has_extendee)(field_proto)) { - _upb_DefBuilder_Errf(ctx, "extension for field '%s' had no extendee", - f->full_name); - } - - upb_StringView name = UPB_DESC(FieldDescriptorProto_extendee)(field_proto); - const upb_MessageDef* m = - _upb_DefBuilder_Resolve(ctx, f->full_name, prefix, name, UPB_DEFTYPE_MSG); - f->msgdef = m; +struct upb_EnumDef { + const UPB_DESC(EnumOptions) * opts; + const upb_MiniTableEnum* layout; // Only for proto2. + const upb_FileDef* file; + const upb_MessageDef* containing_type; // Could be merged with "file". + const char* full_name; + upb_strtable ntoi; + upb_inttable iton; + const upb_EnumValueDef* values; + const upb_EnumReservedRange* res_ranges; + const upb_StringView* res_names; + int value_count; + int res_range_count; + int res_name_count; + int32_t defaultval; + bool is_closed; + bool is_sorted; // Whether all of the values are defined in ascending order. +}; - if (!_upb_MessageDef_IsValidExtensionNumber(m, f->number_)) { - _upb_DefBuilder_Errf( - ctx, - "field number %u in extension %s has no extension range in message %s", - (unsigned)f->number_, f->full_name, upb_MessageDef_FullName(m)); - } +upb_EnumDef* _upb_EnumDef_At(const upb_EnumDef* e, int i) { + return (upb_EnumDef*)&e[i]; } -void _upb_FieldDef_BuildMiniTableExtension(upb_DefBuilder* ctx, - const upb_FieldDef* f) { - const upb_MiniTableExtension* ext = _upb_FieldDef_ExtensionMiniTable(f); - - if (ctx->layout) { - UPB_ASSERT(upb_FieldDef_Number(f) == ext->field.number); - } else { - upb_StringView desc; - if (!upb_FieldDef_MiniDescriptorEncode(f, ctx->tmp_arena, &desc)) { - _upb_DefBuilder_OomErr(ctx); - } - - upb_MiniTableExtension* mut_ext = (upb_MiniTableExtension*)ext; - upb_MiniTableSub sub = {NULL}; - if (upb_FieldDef_IsSubMessage(f)) { - sub.submsg = upb_MessageDef_MiniTable(f->sub.msgdef); - } else if (_upb_FieldDef_IsClosedEnum(f)) { - sub.subenum = _upb_EnumDef_MiniTable(f->sub.enumdef); - } - bool ok2 = upb_MiniTableExtension_Init(desc.data, desc.size, mut_ext, - upb_MessageDef_MiniTable(f->msgdef), - sub, ctx->status); - if (!ok2) _upb_DefBuilder_Errf(ctx, "Could not build extension mini table"); - } - - bool ok = _upb_DefPool_InsertExt(ctx->symtab, ext, f); - if (!ok) _upb_DefBuilder_OomErr(ctx); +const upb_MiniTableEnum* _upb_EnumDef_MiniTable(const upb_EnumDef* e) { + return e->layout; } -static void resolve_default(upb_DefBuilder* ctx, upb_FieldDef* f, - const UPB_DESC(FieldDescriptorProto) * - field_proto) { - // Have to delay resolving of the default value until now because of the enum - // case, since enum defaults are specified with a label. - if (UPB_DESC(FieldDescriptorProto_has_default_value)(field_proto)) { - upb_StringView defaultval = - UPB_DESC(FieldDescriptorProto_default_value)(field_proto); - - if (upb_FileDef_Syntax(f->file) == kUpb_Syntax_Proto3) { - _upb_DefBuilder_Errf(ctx, - "proto3 fields cannot have explicit defaults (%s)", - f->full_name); - } - - if (upb_FieldDef_IsSubMessage(f)) { - _upb_DefBuilder_Errf(ctx, - "message fields cannot have explicit defaults (%s)", - f->full_name); - } +bool _upb_EnumDef_Insert(upb_EnumDef* e, upb_EnumValueDef* v, upb_Arena* a) { + const char* name = upb_EnumValueDef_Name(v); + const upb_value val = upb_value_constptr(v); + bool ok = upb_strtable_insert(&e->ntoi, name, strlen(name), val, a); + if (!ok) return false; - parse_default(ctx, defaultval.data, defaultval.size, f); - f->has_default = true; - } else { - set_default_default(ctx, f); - f->has_default = false; + // Multiple enumerators can have the same number, first one wins. + const int number = upb_EnumValueDef_Number(v); + if (!upb_inttable_lookup(&e->iton, number, NULL)) { + return upb_inttable_insert(&e->iton, number, val, a); } + return true; } -void _upb_FieldDef_Resolve(upb_DefBuilder* ctx, const char* prefix, - upb_FieldDef* f) { - // We have to stash this away since resolve_subdef() may overwrite it. - const UPB_DESC(FieldDescriptorProto)* field_proto = f->sub.unresolved; - - resolve_subdef(ctx, prefix, f); - resolve_default(ctx, f, field_proto); - - if (f->is_extension) { - resolve_extension(ctx, prefix, f, field_proto); - } +const UPB_DESC(EnumOptions) * upb_EnumDef_Options(const upb_EnumDef* e) { + return e->opts; } +bool upb_EnumDef_HasOptions(const upb_EnumDef* e) { + return e->opts != (void*)kUpbDefOptDefault; +} -// Must be last. - -struct upb_FileDef { - const UPB_DESC(FileOptions) * opts; - const char* name; - const char* package; - const char* edition; +const char* upb_EnumDef_FullName(const upb_EnumDef* e) { return e->full_name; } - const upb_FileDef** deps; - const int32_t* public_deps; - const int32_t* weak_deps; - const upb_MessageDef* top_lvl_msgs; - const upb_EnumDef* top_lvl_enums; - const upb_FieldDef* top_lvl_exts; - const upb_ServiceDef* services; - const upb_MiniTableExtension** ext_layouts; - const upb_DefPool* symtab; +const char* upb_EnumDef_Name(const upb_EnumDef* e) { + return _upb_DefBuilder_FullToShort(e->full_name); +} - int dep_count; - int public_dep_count; - int weak_dep_count; - int top_lvl_msg_count; - int top_lvl_enum_count; - int top_lvl_ext_count; - int service_count; - int ext_count; // All exts in the file. - upb_Syntax syntax; -}; +const upb_FileDef* upb_EnumDef_File(const upb_EnumDef* e) { return e->file; } -const UPB_DESC(FileOptions) * upb_FileDef_Options(const upb_FileDef* f) { - return f->opts; +const upb_MessageDef* upb_EnumDef_ContainingType(const upb_EnumDef* e) { + return e->containing_type; } -bool upb_FileDef_HasOptions(const upb_FileDef* f) { - return f->opts != (void*)kUpbDefOptDefault; +int32_t upb_EnumDef_Default(const upb_EnumDef* e) { + UPB_ASSERT(upb_EnumDef_FindValueByNumber(e, e->defaultval)); + return e->defaultval; } -const char* upb_FileDef_Name(const upb_FileDef* f) { return f->name; } - -const char* upb_FileDef_Package(const upb_FileDef* f) { - return f->package ? f->package : ""; +int upb_EnumDef_ReservedRangeCount(const upb_EnumDef* e) { + return e->res_range_count; } -const char* upb_FileDef_Edition(const upb_FileDef* f) { - return f->edition ? f->edition : ""; +const upb_EnumReservedRange* upb_EnumDef_ReservedRange(const upb_EnumDef* e, + int i) { + UPB_ASSERT(0 <= i && i < e->res_range_count); + return _upb_EnumReservedRange_At(e->res_ranges, i); } -const char* _upb_FileDef_RawPackage(const upb_FileDef* f) { return f->package; } - -upb_Syntax upb_FileDef_Syntax(const upb_FileDef* f) { return f->syntax; } +int upb_EnumDef_ReservedNameCount(const upb_EnumDef* e) { + return e->res_name_count; +} -int upb_FileDef_TopLevelMessageCount(const upb_FileDef* f) { - return f->top_lvl_msg_count; +upb_StringView upb_EnumDef_ReservedName(const upb_EnumDef* e, int i) { + UPB_ASSERT(0 <= i && i < e->res_name_count); + return e->res_names[i]; } -int upb_FileDef_DependencyCount(const upb_FileDef* f) { return f->dep_count; } +int upb_EnumDef_ValueCount(const upb_EnumDef* e) { return e->value_count; } -int upb_FileDef_PublicDependencyCount(const upb_FileDef* f) { - return f->public_dep_count; +const upb_EnumValueDef* upb_EnumDef_FindValueByName(const upb_EnumDef* e, + const char* name) { + return upb_EnumDef_FindValueByNameWithSize(e, name, strlen(name)); } -int upb_FileDef_WeakDependencyCount(const upb_FileDef* f) { - return f->weak_dep_count; +const upb_EnumValueDef* upb_EnumDef_FindValueByNameWithSize( + const upb_EnumDef* e, const char* name, size_t size) { + upb_value v; + return upb_strtable_lookup2(&e->ntoi, name, size, &v) + ? upb_value_getconstptr(v) + : NULL; } -const int32_t* _upb_FileDef_PublicDependencyIndexes(const upb_FileDef* f) { - return f->public_deps; +const upb_EnumValueDef* upb_EnumDef_FindValueByNumber(const upb_EnumDef* e, + int32_t num) { + upb_value v; + return upb_inttable_lookup(&e->iton, num, &v) ? upb_value_getconstptr(v) + : NULL; } -const int32_t* _upb_FileDef_WeakDependencyIndexes(const upb_FileDef* f) { - return f->weak_deps; +bool upb_EnumDef_CheckNumber(const upb_EnumDef* e, int32_t num) { + // We could use upb_EnumDef_FindValueByNumber(e, num) != NULL, but we expect + // this to be faster (especially for small numbers). + return upb_MiniTableEnum_CheckValue(e->layout, num); } -int upb_FileDef_TopLevelEnumCount(const upb_FileDef* f) { - return f->top_lvl_enum_count; +const upb_EnumValueDef* upb_EnumDef_Value(const upb_EnumDef* e, int i) { + UPB_ASSERT(0 <= i && i < e->value_count); + return _upb_EnumValueDef_At(e->values, i); } -int upb_FileDef_TopLevelExtensionCount(const upb_FileDef* f) { - return f->top_lvl_ext_count; -} +bool upb_EnumDef_IsClosed(const upb_EnumDef* e) { return e->is_closed; } -int upb_FileDef_ServiceCount(const upb_FileDef* f) { return f->service_count; } +bool upb_EnumDef_MiniDescriptorEncode(const upb_EnumDef* e, upb_Arena* a, + upb_StringView* out) { + upb_DescState s; + _upb_DescState_Init(&s); -const upb_FileDef* upb_FileDef_Dependency(const upb_FileDef* f, int i) { - UPB_ASSERT(0 <= i && i < f->dep_count); - return f->deps[i]; -} + const upb_EnumValueDef** sorted = NULL; + if (!e->is_sorted) { + sorted = _upb_EnumValueDefs_Sorted(e->values, e->value_count, a); + if (!sorted) return false; + } -const upb_FileDef* upb_FileDef_PublicDependency(const upb_FileDef* f, int i) { - UPB_ASSERT(0 <= i && i < f->public_dep_count); - return f->deps[f->public_deps[i]]; -} + if (!_upb_DescState_Grow(&s, a)) return false; + s.ptr = upb_MtDataEncoder_StartEnum(&s.e, s.ptr); -const upb_FileDef* upb_FileDef_WeakDependency(const upb_FileDef* f, int i) { - UPB_ASSERT(0 <= i && i < f->public_dep_count); - return f->deps[f->weak_deps[i]]; -} + // Duplicate values are allowed but we only encode each value once. + uint32_t previous = 0; -const upb_MessageDef* upb_FileDef_TopLevelMessage(const upb_FileDef* f, int i) { - UPB_ASSERT(0 <= i && i < f->top_lvl_msg_count); - return _upb_MessageDef_At(f->top_lvl_msgs, i); -} + for (int i = 0; i < e->value_count; i++) { + const uint32_t current = + upb_EnumValueDef_Number(sorted ? sorted[i] : upb_EnumDef_Value(e, i)); + if (i != 0 && previous == current) continue; -const upb_EnumDef* upb_FileDef_TopLevelEnum(const upb_FileDef* f, int i) { - UPB_ASSERT(0 <= i && i < f->top_lvl_enum_count); - return _upb_EnumDef_At(f->top_lvl_enums, i); -} + if (!_upb_DescState_Grow(&s, a)) return false; + s.ptr = upb_MtDataEncoder_PutEnumValue(&s.e, s.ptr, current); + previous = current; + } -const upb_FieldDef* upb_FileDef_TopLevelExtension(const upb_FileDef* f, int i) { - UPB_ASSERT(0 <= i && i < f->top_lvl_ext_count); - return _upb_FieldDef_At(f->top_lvl_exts, i); -} + if (!_upb_DescState_Grow(&s, a)) return false; + s.ptr = upb_MtDataEncoder_EndEnum(&s.e, s.ptr); -const upb_ServiceDef* upb_FileDef_Service(const upb_FileDef* f, int i) { - UPB_ASSERT(0 <= i && i < f->service_count); - return _upb_ServiceDef_At(f->services, i); + // There will always be room for this '\0' in the encoder buffer because + // kUpb_MtDataEncoder_MinSize is overkill for upb_MtDataEncoder_EndEnum(). + UPB_ASSERT(s.ptr < s.buf + s.bufsize); + *s.ptr = '\0'; + + out->data = s.buf; + out->size = s.ptr - s.buf; + return true; } -const upb_DefPool* upb_FileDef_Pool(const upb_FileDef* f) { return f->symtab; } +static upb_MiniTableEnum* create_enumlayout(upb_DefBuilder* ctx, + const upb_EnumDef* e) { + upb_StringView sv; + bool ok = upb_EnumDef_MiniDescriptorEncode(e, ctx->tmp_arena, &sv); + if (!ok) _upb_DefBuilder_Errf(ctx, "OOM while building enum MiniDescriptor"); -const upb_MiniTableExtension* _upb_FileDef_ExtensionMiniTable( - const upb_FileDef* f, int i) { - return f->ext_layouts[i]; + upb_Status status; + upb_MiniTableEnum* layout = + upb_MiniTableEnum_Build(sv.data, sv.size, ctx->arena, &status); + if (!layout) + _upb_DefBuilder_Errf(ctx, "Error building enum MiniTable: %s", status.msg); + return layout; } -static char* strviewdup(upb_DefBuilder* ctx, upb_StringView view) { - char* ret = upb_strdup2(view.data, view.size, _upb_DefBuilder_Arena(ctx)); - if (!ret) _upb_DefBuilder_OomErr(ctx); - return ret; +static upb_StringView* _upb_EnumReservedNames_New( + upb_DefBuilder* ctx, int n, const upb_StringView* protos) { + upb_StringView* sv = _upb_DefBuilder_Alloc(ctx, sizeof(upb_StringView) * n); + for (int i = 0; i < n; i++) { + sv[i].data = + upb_strdup2(protos[i].data, protos[i].size, _upb_DefBuilder_Arena(ctx)); + sv[i].size = protos[i].size; + } + return sv; } -static bool streql_view(upb_StringView view, const char* b) { - return view.size == strlen(b) && memcmp(view.data, b, view.size) == 0; -} +static void create_enumdef(upb_DefBuilder* ctx, const char* prefix, + const UPB_DESC(EnumDescriptorProto) * enum_proto, + upb_EnumDef* e) { + const UPB_DESC(EnumValueDescriptorProto)* const* values; + const UPB_DESC(EnumDescriptorProto_EnumReservedRange)* const* res_ranges; + const upb_StringView* res_names; + upb_StringView name; + size_t n_value, n_res_range, n_res_name; -static int count_exts_in_msg(const UPB_DESC(DescriptorProto) * msg_proto) { - size_t n; - UPB_DESC(DescriptorProto_extension)(msg_proto, &n); - int ext_count = n; + // Must happen before _upb_DefBuilder_Add() + e->file = _upb_DefBuilder_File(ctx); - const UPB_DESC(DescriptorProto)* const* nested_msgs = - UPB_DESC(DescriptorProto_nested_type)(msg_proto, &n); - for (size_t i = 0; i < n; i++) { - ext_count += count_exts_in_msg(nested_msgs[i]); - } + name = UPB_DESC(EnumDescriptorProto_name)(enum_proto); - return ext_count; -} + e->full_name = _upb_DefBuilder_MakeFullName(ctx, prefix, name); + _upb_DefBuilder_Add(ctx, e->full_name, + _upb_DefType_Pack(e, UPB_DEFTYPE_ENUM)); -// Allocate and initialize one file def, and add it to the context object. -void _upb_FileDef_Create(upb_DefBuilder* ctx, - const UPB_DESC(FileDescriptorProto) * file_proto) { - upb_FileDef* file = _upb_DefBuilder_Alloc(ctx, sizeof(upb_FileDef)); - ctx->file = file; + e->is_closed = (!UPB_TREAT_PROTO2_ENUMS_LIKE_PROTO3) && + (upb_FileDef_Syntax(e->file) == kUpb_Syntax_Proto2); - const UPB_DESC(DescriptorProto)* const* msgs; - const UPB_DESC(EnumDescriptorProto)* const* enums; - const UPB_DESC(FieldDescriptorProto)* const* exts; - const UPB_DESC(ServiceDescriptorProto)* const* services; - const upb_StringView* strs; - const int32_t* public_deps; - const int32_t* weak_deps; - size_t n; + values = UPB_DESC(EnumDescriptorProto_value)(enum_proto, &n_value); - file->symtab = ctx->symtab; + bool ok = upb_strtable_init(&e->ntoi, n_value, ctx->arena); + if (!ok) _upb_DefBuilder_OomErr(ctx); - // Count all extensions in the file, to build a flat array of layouts. - UPB_DESC(FileDescriptorProto_extension)(file_proto, &n); - int ext_count = n; - msgs = UPB_DESC(FileDescriptorProto_message_type)(file_proto, &n); - for (int i = 0; i < n; i++) { - ext_count += count_exts_in_msg(msgs[i]); - } - file->ext_count = ext_count; + ok = upb_inttable_init(&e->iton, ctx->arena); + if (!ok) _upb_DefBuilder_OomErr(ctx); - if (ctx->layout) { - // We are using the ext layouts that were passed in. - file->ext_layouts = ctx->layout->exts; - if (ctx->layout->ext_count != file->ext_count) { - _upb_DefBuilder_Errf(ctx, - "Extension count did not match layout (%d vs %d)", - ctx->layout->ext_count, file->ext_count); - } - } else { - // We are building ext layouts from scratch. - file->ext_layouts = _upb_DefBuilder_Alloc( - ctx, sizeof(*file->ext_layouts) * file->ext_count); - upb_MiniTableExtension* ext = - _upb_DefBuilder_Alloc(ctx, sizeof(*ext) * file->ext_count); - for (int i = 0; i < file->ext_count; i++) { - file->ext_layouts[i] = &ext[i]; - } - } + e->defaultval = 0; + e->value_count = n_value; + e->values = + _upb_EnumValueDefs_New(ctx, prefix, n_value, values, e, &e->is_sorted); - upb_StringView name = UPB_DESC(FileDescriptorProto_name)(file_proto); - file->name = strviewdup(ctx, name); - if (strlen(file->name) != name.size) { - _upb_DefBuilder_Errf(ctx, "File name contained embedded NULL"); + if (n_value == 0) { + _upb_DefBuilder_Errf(ctx, "enums must contain at least one value (%s)", + e->full_name); } - upb_StringView package = UPB_DESC(FileDescriptorProto_package)(file_proto); - - if (package.size) { - _upb_DefBuilder_CheckIdentFull(ctx, package); - file->package = strviewdup(ctx, package); - } else { - file->package = NULL; - } + res_ranges = + UPB_DESC(EnumDescriptorProto_reserved_range)(enum_proto, &n_res_range); + e->res_range_count = n_res_range; + e->res_ranges = _upb_EnumReservedRanges_New(ctx, n_res_range, res_ranges, e); - upb_StringView edition = UPB_DESC(FileDescriptorProto_edition)(file_proto); + res_names = + UPB_DESC(EnumDescriptorProto_reserved_name)(enum_proto, &n_res_name); + e->res_name_count = n_res_name; + e->res_names = _upb_EnumReservedNames_New(ctx, n_res_name, res_names); - if (edition.size == 0) { - file->edition = NULL; - } else { - // TODO(b/267770604): How should we validate this? - file->edition = strviewdup(ctx, edition); - if (strlen(file->edition) != edition.size) { - _upb_DefBuilder_Errf(ctx, "Edition name contained embedded NULL"); - } - } + UPB_DEF_SET_OPTIONS(e->opts, EnumDescriptorProto, EnumOptions, enum_proto); - if (UPB_DESC(FileDescriptorProto_has_syntax)(file_proto)) { - upb_StringView syntax = UPB_DESC(FileDescriptorProto_syntax)(file_proto); + upb_inttable_compact(&e->iton, ctx->arena); - if (streql_view(syntax, "proto2")) { - file->syntax = kUpb_Syntax_Proto2; - } else if (streql_view(syntax, "proto3")) { - file->syntax = kUpb_Syntax_Proto3; + if (e->is_closed) { + if (ctx->layout) { + UPB_ASSERT(ctx->enum_count < ctx->layout->enum_count); + e->layout = ctx->layout->enums[ctx->enum_count++]; } else { - _upb_DefBuilder_Errf(ctx, "Invalid syntax '" UPB_STRINGVIEW_FORMAT "'", - UPB_STRINGVIEW_ARGS(syntax)); + e->layout = create_enumlayout(ctx, e); } } else { - file->syntax = kUpb_Syntax_Proto2; + e->layout = NULL; } +} - // Read options. - UPB_DEF_SET_OPTIONS(file->opts, FileDescriptorProto, FileOptions, file_proto); +upb_EnumDef* _upb_EnumDefs_New( + upb_DefBuilder* ctx, int n, + const UPB_DESC(EnumDescriptorProto) * const* protos, + const upb_MessageDef* containing_type) { + _upb_DefType_CheckPadding(sizeof(upb_EnumDef)); - // Verify dependencies. - strs = UPB_DESC(FileDescriptorProto_dependency)(file_proto, &n); - file->dep_count = n; - file->deps = _upb_DefBuilder_Alloc(ctx, sizeof(*file->deps) * n); + // If a containing type is defined then get the full name from that. + // Otherwise use the package name from the file def. + const char* name = containing_type ? upb_MessageDef_FullName(containing_type) + : _upb_FileDef_RawPackage(ctx->file); - for (size_t i = 0; i < n; i++) { - upb_StringView str = strs[i]; - file->deps[i] = - upb_DefPool_FindFileByNameWithSize(ctx->symtab, str.data, str.size); - if (!file->deps[i]) { - _upb_DefBuilder_Errf(ctx, - "Depends on file '" UPB_STRINGVIEW_FORMAT - "', but it has not been loaded", - UPB_STRINGVIEW_ARGS(str)); - } + upb_EnumDef* e = _upb_DefBuilder_Alloc(ctx, sizeof(upb_EnumDef) * n); + for (int i = 0; i < n; i++) { + create_enumdef(ctx, name, protos[i], &e[i]); + e[i].containing_type = containing_type; } + return e; +} - public_deps = UPB_DESC(FileDescriptorProto_public_dependency)(file_proto, &n); - file->public_dep_count = n; - file->public_deps = - _upb_DefBuilder_Alloc(ctx, sizeof(*file->public_deps) * n); - int32_t* mutable_public_deps = (int32_t*)file->public_deps; - for (size_t i = 0; i < n; i++) { - if (public_deps[i] >= file->dep_count) { - _upb_DefBuilder_Errf(ctx, "public_dep %d is out of range", - (int)public_deps[i]); - } - mutable_public_deps[i] = public_deps[i]; - } - weak_deps = UPB_DESC(FileDescriptorProto_weak_dependency)(file_proto, &n); - file->weak_dep_count = n; - file->weak_deps = _upb_DefBuilder_Alloc(ctx, sizeof(*file->weak_deps) * n); - int32_t* mutable_weak_deps = (int32_t*)file->weak_deps; - for (size_t i = 0; i < n; i++) { - if (weak_deps[i] >= file->dep_count) { - _upb_DefBuilder_Errf(ctx, "weak_dep %d is out of range", - (int)weak_deps[i]); - } - mutable_weak_deps[i] = weak_deps[i]; - } - // Create enums. - enums = UPB_DESC(FileDescriptorProto_enum_type)(file_proto, &n); - file->top_lvl_enum_count = n; - file->top_lvl_enums = _upb_EnumDefs_New(ctx, n, enums, NULL); +// Must be last. - // Create extensions. - exts = UPB_DESC(FileDescriptorProto_extension)(file_proto, &n); - file->top_lvl_ext_count = n; - file->top_lvl_exts = _upb_Extensions_New(ctx, n, exts, file->package, NULL); +struct upb_EnumReservedRange { + int32_t start; + int32_t end; +}; - // Create messages. - msgs = UPB_DESC(FileDescriptorProto_message_type)(file_proto, &n); - file->top_lvl_msg_count = n; - file->top_lvl_msgs = _upb_MessageDefs_New(ctx, n, msgs, NULL); +upb_EnumReservedRange* _upb_EnumReservedRange_At(const upb_EnumReservedRange* r, + int i) { + return (upb_EnumReservedRange*)&r[i]; +} - // Create services. - services = UPB_DESC(FileDescriptorProto_service)(file_proto, &n); - file->service_count = n; - file->services = _upb_ServiceDefs_New(ctx, n, services); - - // Now that all names are in the table, build layouts and resolve refs. +int32_t upb_EnumReservedRange_Start(const upb_EnumReservedRange* r) { + return r->start; +} +int32_t upb_EnumReservedRange_End(const upb_EnumReservedRange* r) { + return r->end; +} - for (int i = 0; i < file->top_lvl_msg_count; i++) { - upb_MessageDef* m = (upb_MessageDef*)upb_FileDef_TopLevelMessage(file, i); - _upb_MessageDef_Resolve(ctx, m); - } +upb_EnumReservedRange* _upb_EnumReservedRanges_New( + upb_DefBuilder* ctx, int n, + const UPB_DESC(EnumDescriptorProto_EnumReservedRange) * const* protos, + const upb_EnumDef* e) { + upb_EnumReservedRange* r = + _upb_DefBuilder_Alloc(ctx, sizeof(upb_EnumReservedRange) * n); - for (int i = 0; i < file->top_lvl_ext_count; i++) { - upb_FieldDef* f = (upb_FieldDef*)upb_FileDef_TopLevelExtension(file, i); - _upb_FieldDef_Resolve(ctx, file->package, f); - } + for (int i = 0; i < n; i++) { + const int32_t start = + UPB_DESC(EnumDescriptorProto_EnumReservedRange_start)(protos[i]); + const int32_t end = + UPB_DESC(EnumDescriptorProto_EnumReservedRange_end)(protos[i]); - for (int i = 0; i < file->top_lvl_msg_count; i++) { - upb_MessageDef* m = (upb_MessageDef*)upb_FileDef_TopLevelMessage(file, i); - _upb_MessageDef_CreateMiniTable(ctx, (upb_MessageDef*)m); - } + // A full validation would also check that each range is disjoint, and that + // none of the fields overlap with the extension ranges, but we are just + // sanity checking here. - for (int i = 0; i < file->top_lvl_ext_count; i++) { - upb_FieldDef* f = (upb_FieldDef*)upb_FileDef_TopLevelExtension(file, i); - _upb_FieldDef_BuildMiniTableExtension(ctx, f); - } + // Note: Not a typo! Unlike extension ranges and message reserved ranges, + // the end value of an enum reserved range is *inclusive*! + if (end < start) { + _upb_DefBuilder_Errf(ctx, "Reserved range (%d, %d) is invalid, enum=%s\n", + (int)start, (int)end, upb_EnumDef_FullName(e)); + } - for (int i = 0; i < file->top_lvl_msg_count; i++) { - upb_MessageDef* m = (upb_MessageDef*)upb_FileDef_TopLevelMessage(file, i); - _upb_MessageDef_LinkMiniTable(ctx, m); + r[i].start = start; + r[i].end = end; } - if (file->ext_count) { - bool ok = upb_ExtensionRegistry_AddArray( - _upb_DefPool_ExtReg(ctx->symtab), file->ext_layouts, file->ext_count); - if (!ok) _upb_DefBuilder_OomErr(ctx); - } + return r; } -#include - // Must be last. -bool upb_Message_HasFieldByDef(const upb_Message* msg, const upb_FieldDef* f) { - UPB_ASSERT(upb_FieldDef_HasPresence(f)); - return upb_Message_HasField(msg, upb_FieldDef_MiniTable(f)); -} +struct upb_EnumValueDef { + const UPB_DESC(EnumValueOptions) * opts; + const upb_EnumDef* parent; + const char* full_name; + int32_t number; +}; -const upb_FieldDef* upb_Message_WhichOneof(const upb_Message* msg, - const upb_OneofDef* o) { - const upb_FieldDef* f = upb_OneofDef_Field(o, 0); - if (upb_OneofDef_IsSynthetic(o)) { - UPB_ASSERT(upb_OneofDef_FieldCount(o) == 1); - return upb_Message_HasFieldByDef(msg, f) ? f : NULL; - } else { - const upb_MiniTableField* field = upb_FieldDef_MiniTable(f); - uint32_t oneof_case = upb_Message_WhichOneofFieldNumber(msg, field); - f = oneof_case ? upb_OneofDef_LookupNumber(o, oneof_case) : NULL; - UPB_ASSERT((f != NULL) == (oneof_case != 0)); - return f; - } +upb_EnumValueDef* _upb_EnumValueDef_At(const upb_EnumValueDef* v, int i) { + return (upb_EnumValueDef*)&v[i]; } -upb_MessageValue upb_Message_GetFieldByDef(const upb_Message* msg, - const upb_FieldDef* f) { - upb_MessageValue default_val = upb_FieldDef_Default(f); - upb_MessageValue ret; - _upb_Message_GetField(msg, upb_FieldDef_MiniTable(f), &default_val, &ret); - return ret; +static int _upb_EnumValueDef_Compare(const void* p1, const void* p2) { + const uint32_t v1 = (*(const upb_EnumValueDef**)p1)->number; + const uint32_t v2 = (*(const upb_EnumValueDef**)p2)->number; + return (v1 < v2) ? -1 : (v1 > v2); } -upb_MutableMessageValue upb_Message_Mutable(upb_Message* msg, - const upb_FieldDef* f, - upb_Arena* a) { - UPB_ASSERT(upb_FieldDef_IsSubMessage(f) || upb_FieldDef_IsRepeated(f)); - if (upb_FieldDef_HasPresence(f) && !upb_Message_HasFieldByDef(msg, f)) { - // We need to skip the upb_Message_GetFieldByDef() call in this case. - goto make; - } +const upb_EnumValueDef** _upb_EnumValueDefs_Sorted(const upb_EnumValueDef* v, + int n, upb_Arena* a) { + // TODO: Try to replace this arena alloc with a persistent scratch buffer. + upb_EnumValueDef** out = + (upb_EnumValueDef**)upb_Arena_Malloc(a, n * sizeof(void*)); + if (!out) return NULL; - upb_MessageValue val = upb_Message_GetFieldByDef(msg, f); - if (val.array_val) { - return (upb_MutableMessageValue){.array = (upb_Array*)val.array_val}; + for (int i = 0; i < n; i++) { + out[i] = (upb_EnumValueDef*)&v[i]; } + qsort(out, n, sizeof(void*), _upb_EnumValueDef_Compare); - upb_MutableMessageValue ret; -make: - if (!a) return (upb_MutableMessageValue){.array = NULL}; - if (upb_FieldDef_IsMap(f)) { - const upb_MessageDef* entry = upb_FieldDef_MessageSubDef(f); - const upb_FieldDef* key = - upb_MessageDef_FindFieldByNumber(entry, kUpb_MapEntry_KeyFieldNumber); - const upb_FieldDef* value = - upb_MessageDef_FindFieldByNumber(entry, kUpb_MapEntry_ValueFieldNumber); - ret.map = - upb_Map_New(a, upb_FieldDef_CType(key), upb_FieldDef_CType(value)); - } else if (upb_FieldDef_IsRepeated(f)) { - ret.array = upb_Array_New(a, upb_FieldDef_CType(f)); - } else { - UPB_ASSERT(upb_FieldDef_IsSubMessage(f)); - const upb_MessageDef* m = upb_FieldDef_MessageSubDef(f); - ret.msg = upb_Message_New(upb_MessageDef_MiniTable(m), a); - } + return (const upb_EnumValueDef**)out; +} - val.array_val = ret.array; - upb_Message_SetFieldByDef(msg, f, val, a); +const UPB_DESC(EnumValueOptions) * + upb_EnumValueDef_Options(const upb_EnumValueDef* v) { + return v->opts; +} - return ret; +bool upb_EnumValueDef_HasOptions(const upb_EnumValueDef* v) { + return v->opts != (void*)kUpbDefOptDefault; } -bool upb_Message_SetFieldByDef(upb_Message* msg, const upb_FieldDef* f, - upb_MessageValue val, upb_Arena* a) { - return _upb_Message_SetField(msg, upb_FieldDef_MiniTable(f), &val, a); +const upb_EnumDef* upb_EnumValueDef_Enum(const upb_EnumValueDef* v) { + return v->parent; } -void upb_Message_ClearFieldByDef(upb_Message* msg, const upb_FieldDef* f) { - upb_Message_ClearField(msg, upb_FieldDef_MiniTable(f)); +const char* upb_EnumValueDef_FullName(const upb_EnumValueDef* v) { + return v->full_name; } -void upb_Message_ClearByDef(upb_Message* msg, const upb_MessageDef* m) { - upb_Message_Clear(msg, upb_MessageDef_MiniTable(m)); +const char* upb_EnumValueDef_Name(const upb_EnumValueDef* v) { + return _upb_DefBuilder_FullToShort(v->full_name); } -bool upb_Message_Next(const upb_Message* msg, const upb_MessageDef* m, - const upb_DefPool* ext_pool, const upb_FieldDef** out_f, - upb_MessageValue* out_val, size_t* iter) { - size_t i = *iter; - size_t n = upb_MessageDef_FieldCount(m); - UPB_UNUSED(ext_pool); +int32_t upb_EnumValueDef_Number(const upb_EnumValueDef* v) { return v->number; } - // Iterate over normal fields, returning the first one that is set. - while (++i < n) { - const upb_FieldDef* f = upb_MessageDef_Field(m, i); - const upb_MiniTableField* field = upb_FieldDef_MiniTable(f); - upb_MessageValue val = upb_Message_GetFieldByDef(msg, f); +uint32_t upb_EnumValueDef_Index(const upb_EnumValueDef* v) { + // Compute index in our parent's array. + return v - upb_EnumDef_Value(v->parent, 0); +} - // Skip field if unset or empty. - if (upb_MiniTableField_HasPresence(field)) { - if (!upb_Message_HasFieldByDef(msg, f)) continue; - } else { - switch (upb_FieldMode_Get(field)) { - case kUpb_FieldMode_Map: - if (!val.map_val || upb_Map_Size(val.map_val) == 0) continue; - break; - case kUpb_FieldMode_Array: - if (!val.array_val || upb_Array_Size(val.array_val) == 0) continue; - break; - case kUpb_FieldMode_Scalar: - if (!_upb_MiniTable_ValueIsNonZero(&val, field)) continue; - break; - } - } +static void create_enumvaldef(upb_DefBuilder* ctx, const char* prefix, + const UPB_DESC(EnumValueDescriptorProto) * + val_proto, + upb_EnumDef* e, upb_EnumValueDef* v) { + upb_StringView name = UPB_DESC(EnumValueDescriptorProto_name)(val_proto); - *out_val = val; - *out_f = f; - *iter = i; - return true; + v->parent = e; // Must happen prior to _upb_DefBuilder_Add() + v->full_name = _upb_DefBuilder_MakeFullName(ctx, prefix, name); + v->number = UPB_DESC(EnumValueDescriptorProto_number)(val_proto); + _upb_DefBuilder_Add(ctx, v->full_name, + _upb_DefType_Pack(v, UPB_DEFTYPE_ENUMVAL)); + + UPB_DEF_SET_OPTIONS(v->opts, EnumValueDescriptorProto, EnumValueOptions, + val_proto); + + bool ok = _upb_EnumDef_Insert(e, v, ctx->arena); + if (!ok) _upb_DefBuilder_OomErr(ctx); +} + +// Allocate and initialize an array of |n| enum value defs owned by |e|. +upb_EnumValueDef* _upb_EnumValueDefs_New( + upb_DefBuilder* ctx, const char* prefix, int n, + const UPB_DESC(EnumValueDescriptorProto) * const* protos, upb_EnumDef* e, + bool* is_sorted) { + _upb_DefType_CheckPadding(sizeof(upb_EnumValueDef)); + + upb_EnumValueDef* v = + _upb_DefBuilder_Alloc(ctx, sizeof(upb_EnumValueDef) * n); + + *is_sorted = true; + uint32_t previous = 0; + for (int i = 0; i < n; i++) { + create_enumvaldef(ctx, prefix, protos[i], e, &v[i]); + + const uint32_t current = v[i].number; + if (previous > current) *is_sorted = false; + previous = current; } - if (ext_pool) { - // Return any extensions that are set. - size_t count; - const upb_Message_Extension* ext = _upb_Message_Getexts(msg, &count); - if (i - n < count) { - ext += count - 1 - (i - n); - memcpy(out_val, &ext->data, sizeof(*out_val)); - *out_f = upb_DefPool_FindExtensionByMiniTable(ext_pool, ext->ext); - *iter = i; - return true; - } + if (upb_FileDef_Syntax(ctx->file) == kUpb_Syntax_Proto3 && n > 0 && + v[0].number != 0) { + _upb_DefBuilder_Errf(ctx, + "for proto3, the first enum value must be zero (%s)", + upb_EnumDef_FullName(e)); } - *iter = i; - return false; + return v; } -bool _upb_Message_DiscardUnknown(upb_Message* msg, const upb_MessageDef* m, - int depth) { - size_t iter = kUpb_Message_Begin; - const upb_FieldDef* f; - upb_MessageValue val; - bool ret = true; - if (--depth == 0) return false; - _upb_Message_DiscardUnknown_shallow(msg); +// Must be last. - while (upb_Message_Next(msg, m, NULL /*ext_pool*/, &f, &val, &iter)) { - const upb_MessageDef* subm = upb_FieldDef_MessageSubDef(f); - if (!subm) continue; - if (upb_FieldDef_IsMap(f)) { - const upb_FieldDef* val_f = upb_MessageDef_FindFieldByNumber(subm, 2); - const upb_MessageDef* val_m = upb_FieldDef_MessageSubDef(val_f); - upb_Map* map = (upb_Map*)val.map_val; - size_t iter = kUpb_Map_Begin; - - if (!val_m) continue; - - upb_MessageValue map_key, map_val; - while (upb_Map_Next(map, &map_key, &map_val, &iter)) { - if (!_upb_Message_DiscardUnknown((upb_Message*)map_val.msg_val, val_m, - depth)) { - ret = false; - } - } - } else if (upb_FieldDef_IsRepeated(f)) { - const upb_Array* arr = val.array_val; - size_t i, n = upb_Array_Size(arr); - for (i = 0; i < n; i++) { - upb_MessageValue elem = upb_Array_Get(arr, i); - if (!_upb_Message_DiscardUnknown((upb_Message*)elem.msg_val, subm, - depth)) { - ret = false; - } - } - } else { - if (!_upb_Message_DiscardUnknown((upb_Message*)val.msg_val, subm, - depth)) { - ret = false; - } - } - } +struct upb_ExtensionRange { + const UPB_DESC(ExtensionRangeOptions) * opts; + int32_t start; + int32_t end; +}; - return ret; +upb_ExtensionRange* _upb_ExtensionRange_At(const upb_ExtensionRange* r, int i) { + return (upb_ExtensionRange*)&r[i]; } -bool upb_Message_DiscardUnknown(upb_Message* msg, const upb_MessageDef* m, - int maxdepth) { - return _upb_Message_DiscardUnknown(msg, m, maxdepth); +const UPB_DESC(ExtensionRangeOptions) * + upb_ExtensionRange_Options(const upb_ExtensionRange* r) { + return r->opts; } +bool upb_ExtensionRange_HasOptions(const upb_ExtensionRange* r) { + return r->opts != (void*)kUpbDefOptDefault; +} -// Must be last. +int32_t upb_ExtensionRange_Start(const upb_ExtensionRange* r) { + return r->start; +} -struct upb_MessageDef { - const UPB_DESC(MessageOptions) * opts; - const upb_MiniTable* layout; - const upb_FileDef* file; - const upb_MessageDef* containing_type; - const char* full_name; +int32_t upb_ExtensionRange_End(const upb_ExtensionRange* r) { return r->end; } - // Tables for looking up fields by number and name. - upb_inttable itof; - upb_strtable ntof; +upb_ExtensionRange* _upb_ExtensionRanges_New( + upb_DefBuilder* ctx, int n, + const UPB_DESC(DescriptorProto_ExtensionRange) * const* protos, + const upb_MessageDef* m) { + upb_ExtensionRange* r = + _upb_DefBuilder_Alloc(ctx, sizeof(upb_ExtensionRange) * n); - /* All nested defs. - * MEM: We could save some space here by putting nested defs in a contiguous - * region and calculating counts from offsets or vice-versa. */ - const upb_FieldDef* fields; - const upb_OneofDef* oneofs; - const upb_ExtensionRange* ext_ranges; - const upb_StringView* res_names; - const upb_MessageDef* nested_msgs; - const upb_MessageReservedRange* res_ranges; - const upb_EnumDef* nested_enums; - const upb_FieldDef* nested_exts; + for (int i = 0; i < n; i++) { + const int32_t start = + UPB_DESC(DescriptorProto_ExtensionRange_start)(protos[i]); + const int32_t end = UPB_DESC(DescriptorProto_ExtensionRange_end)(protos[i]); + const int32_t max = UPB_DESC(MessageOptions_message_set_wire_format)( + upb_MessageDef_Options(m)) + ? INT32_MAX + : kUpb_MaxFieldNumber + 1; - // TODO(salo): These counters don't need anywhere near 32 bits. - int field_count; - int real_oneof_count; - int oneof_count; - int ext_range_count; - int res_range_count; - int res_name_count; - int nested_msg_count; - int nested_enum_count; - int nested_ext_count; - bool in_message_set; - bool is_sorted; - upb_WellKnown well_known_type; -#if UINTPTR_MAX == 0xffffffff - uint32_t padding; // Increase size to a multiple of 8. -#endif -}; + // A full validation would also check that each range is disjoint, and that + // none of the fields overlap with the extension ranges, but we are just + // sanity checking here. + if (start < 1 || end <= start || end > max) { + _upb_DefBuilder_Errf(ctx, + "Extension range (%d, %d) is invalid, message=%s\n", + (int)start, (int)end, upb_MessageDef_FullName(m)); + } -static void assign_msg_wellknowntype(upb_MessageDef* m) { - const char* name = m->full_name; - if (name == NULL) { - m->well_known_type = kUpb_WellKnown_Unspecified; - return; - } - if (!strcmp(name, "google.protobuf.Any")) { - m->well_known_type = kUpb_WellKnown_Any; - } else if (!strcmp(name, "google.protobuf.FieldMask")) { - m->well_known_type = kUpb_WellKnown_FieldMask; - } else if (!strcmp(name, "google.protobuf.Duration")) { - m->well_known_type = kUpb_WellKnown_Duration; - } else if (!strcmp(name, "google.protobuf.Timestamp")) { - m->well_known_type = kUpb_WellKnown_Timestamp; - } else if (!strcmp(name, "google.protobuf.DoubleValue")) { - m->well_known_type = kUpb_WellKnown_DoubleValue; - } else if (!strcmp(name, "google.protobuf.FloatValue")) { - m->well_known_type = kUpb_WellKnown_FloatValue; - } else if (!strcmp(name, "google.protobuf.Int64Value")) { - m->well_known_type = kUpb_WellKnown_Int64Value; - } else if (!strcmp(name, "google.protobuf.UInt64Value")) { - m->well_known_type = kUpb_WellKnown_UInt64Value; - } else if (!strcmp(name, "google.protobuf.Int32Value")) { - m->well_known_type = kUpb_WellKnown_Int32Value; - } else if (!strcmp(name, "google.protobuf.UInt32Value")) { - m->well_known_type = kUpb_WellKnown_UInt32Value; - } else if (!strcmp(name, "google.protobuf.BoolValue")) { - m->well_known_type = kUpb_WellKnown_BoolValue; - } else if (!strcmp(name, "google.protobuf.StringValue")) { - m->well_known_type = kUpb_WellKnown_StringValue; - } else if (!strcmp(name, "google.protobuf.BytesValue")) { - m->well_known_type = kUpb_WellKnown_BytesValue; - } else if (!strcmp(name, "google.protobuf.Value")) { - m->well_known_type = kUpb_WellKnown_Value; - } else if (!strcmp(name, "google.protobuf.ListValue")) { - m->well_known_type = kUpb_WellKnown_ListValue; - } else if (!strcmp(name, "google.protobuf.Struct")) { - m->well_known_type = kUpb_WellKnown_Struct; - } else { - m->well_known_type = kUpb_WellKnown_Unspecified; + r[i].start = start; + r[i].end = end; + UPB_DEF_SET_OPTIONS(r[i].opts, DescriptorProto_ExtensionRange, + ExtensionRangeOptions, protos[i]); } -} -upb_MessageDef* _upb_MessageDef_At(const upb_MessageDef* m, int i) { - return (upb_MessageDef*)&m[i]; + return r; } -bool _upb_MessageDef_IsValidExtensionNumber(const upb_MessageDef* m, int n) { - for (int i = 0; i < m->ext_range_count; i++) { - const upb_ExtensionRange* r = upb_MessageDef_ExtensionRange(m, i); - if (upb_ExtensionRange_Start(r) <= n && n < upb_ExtensionRange_End(r)) { - return true; - } - } - return false; -} -const UPB_DESC(MessageOptions) * - upb_MessageDef_Options(const upb_MessageDef* m) { - return m->opts; -} +#include +#include -bool upb_MessageDef_HasOptions(const upb_MessageDef* m) { - return m->opts != (void*)kUpbDefOptDefault; -} -const char* upb_MessageDef_FullName(const upb_MessageDef* m) { - return m->full_name; -} +// Must be last. -const upb_FileDef* upb_MessageDef_File(const upb_MessageDef* m) { - return m->file; -} +#define UPB_FIELD_TYPE_UNSPECIFIED 0 -const upb_MessageDef* upb_MessageDef_ContainingType(const upb_MessageDef* m) { - return m->containing_type; +typedef struct { + size_t len; + char str[1]; // Null-terminated string data follows. +} str_t; + +struct upb_FieldDef { + const UPB_DESC(FieldOptions) * opts; + const upb_FileDef* file; + const upb_MessageDef* msgdef; + const char* full_name; + const char* json_name; + union { + int64_t sint; + uint64_t uint; + double dbl; + float flt; + bool boolean; + str_t* str; + void* msg; // Always NULL. + } defaultval; + union { + const upb_OneofDef* oneof; + const upb_MessageDef* extension_scope; + } scope; + union { + const upb_MessageDef* msgdef; + const upb_EnumDef* enumdef; + const UPB_DESC(FieldDescriptorProto) * unresolved; + } sub; + uint32_t number_; + uint16_t index_; + uint16_t layout_index; // Index into msgdef->layout->fields or file->exts + bool has_default; + bool has_json_name; + bool has_presence; + bool is_extension; + bool is_packed; + bool is_proto3_optional; + upb_FieldType type_; + upb_Label label_; +#if UINTPTR_MAX == 0xffffffff + uint32_t padding; // Increase size to a multiple of 8. +#endif +}; + +upb_FieldDef* _upb_FieldDef_At(const upb_FieldDef* f, int i) { + return (upb_FieldDef*)&f[i]; } -const char* upb_MessageDef_Name(const upb_MessageDef* m) { - return _upb_DefBuilder_FullToShort(m->full_name); +const UPB_DESC(FieldOptions) * upb_FieldDef_Options(const upb_FieldDef* f) { + return f->opts; } -upb_Syntax upb_MessageDef_Syntax(const upb_MessageDef* m) { - return upb_FileDef_Syntax(m->file); +bool upb_FieldDef_HasOptions(const upb_FieldDef* f) { + return f->opts != (void*)kUpbDefOptDefault; } -const upb_FieldDef* upb_MessageDef_FindFieldByNumber(const upb_MessageDef* m, - uint32_t i) { - upb_value val; - return upb_inttable_lookup(&m->itof, i, &val) ? upb_value_getconstptr(val) - : NULL; +const char* upb_FieldDef_FullName(const upb_FieldDef* f) { + return f->full_name; } -const upb_FieldDef* upb_MessageDef_FindFieldByNameWithSize( - const upb_MessageDef* m, const char* name, size_t size) { - upb_value val; - - if (!upb_strtable_lookup2(&m->ntof, name, size, &val)) { - return NULL; +upb_CType upb_FieldDef_CType(const upb_FieldDef* f) { + switch (f->type_) { + case kUpb_FieldType_Double: + return kUpb_CType_Double; + case kUpb_FieldType_Float: + return kUpb_CType_Float; + case kUpb_FieldType_Int64: + case kUpb_FieldType_SInt64: + case kUpb_FieldType_SFixed64: + return kUpb_CType_Int64; + case kUpb_FieldType_Int32: + case kUpb_FieldType_SFixed32: + case kUpb_FieldType_SInt32: + return kUpb_CType_Int32; + case kUpb_FieldType_UInt64: + case kUpb_FieldType_Fixed64: + return kUpb_CType_UInt64; + case kUpb_FieldType_UInt32: + case kUpb_FieldType_Fixed32: + return kUpb_CType_UInt32; + case kUpb_FieldType_Enum: + return kUpb_CType_Enum; + case kUpb_FieldType_Bool: + return kUpb_CType_Bool; + case kUpb_FieldType_String: + return kUpb_CType_String; + case kUpb_FieldType_Bytes: + return kUpb_CType_Bytes; + case kUpb_FieldType_Group: + case kUpb_FieldType_Message: + return kUpb_CType_Message; } - - return _upb_DefType_Unpack(val, UPB_DEFTYPE_FIELD); + UPB_UNREACHABLE(); } -const upb_OneofDef* upb_MessageDef_FindOneofByNameWithSize( - const upb_MessageDef* m, const char* name, size_t size) { - upb_value val; +upb_FieldType upb_FieldDef_Type(const upb_FieldDef* f) { return f->type_; } - if (!upb_strtable_lookup2(&m->ntof, name, size, &val)) { - return NULL; - } +uint32_t upb_FieldDef_Index(const upb_FieldDef* f) { return f->index_; } - return _upb_DefType_Unpack(val, UPB_DEFTYPE_ONEOF); -} +upb_Label upb_FieldDef_Label(const upb_FieldDef* f) { return f->label_; } -bool _upb_MessageDef_Insert(upb_MessageDef* m, const char* name, size_t len, - upb_value v, upb_Arena* a) { - return upb_strtable_insert(&m->ntof, name, len, v, a); -} +uint32_t upb_FieldDef_Number(const upb_FieldDef* f) { return f->number_; } -bool upb_MessageDef_FindByNameWithSize(const upb_MessageDef* m, - const char* name, size_t len, - const upb_FieldDef** out_f, - const upb_OneofDef** out_o) { - upb_value val; +bool upb_FieldDef_IsExtension(const upb_FieldDef* f) { return f->is_extension; } - if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) { - return false; - } +bool upb_FieldDef_IsPacked(const upb_FieldDef* f) { return f->is_packed; } - const upb_FieldDef* f = _upb_DefType_Unpack(val, UPB_DEFTYPE_FIELD); - const upb_OneofDef* o = _upb_DefType_Unpack(val, UPB_DEFTYPE_ONEOF); - if (out_f) *out_f = f; - if (out_o) *out_o = o; - return f || o; /* False if this was a JSON name. */ +const char* upb_FieldDef_Name(const upb_FieldDef* f) { + return _upb_DefBuilder_FullToShort(f->full_name); } -const upb_FieldDef* upb_MessageDef_FindByJsonNameWithSize( - const upb_MessageDef* m, const char* name, size_t size) { - upb_value val; - const upb_FieldDef* f; +const char* upb_FieldDef_JsonName(const upb_FieldDef* f) { + return f->json_name; +} - if (!upb_strtable_lookup2(&m->ntof, name, size, &val)) { - return NULL; - } +bool upb_FieldDef_HasJsonName(const upb_FieldDef* f) { + return f->has_json_name; +} - f = _upb_DefType_Unpack(val, UPB_DEFTYPE_FIELD); - if (!f) f = _upb_DefType_Unpack(val, UPB_DEFTYPE_FIELD_JSONNAME); +const upb_FileDef* upb_FieldDef_File(const upb_FieldDef* f) { return f->file; } - return f; +const upb_MessageDef* upb_FieldDef_ContainingType(const upb_FieldDef* f) { + return f->msgdef; } -int upb_MessageDef_ExtensionRangeCount(const upb_MessageDef* m) { - return m->ext_range_count; +const upb_MessageDef* upb_FieldDef_ExtensionScope(const upb_FieldDef* f) { + return f->is_extension ? f->scope.extension_scope : NULL; } -int upb_MessageDef_ReservedRangeCount(const upb_MessageDef* m) { - return m->res_range_count; +const upb_OneofDef* upb_FieldDef_ContainingOneof(const upb_FieldDef* f) { + return f->is_extension ? NULL : f->scope.oneof; } -int upb_MessageDef_ReservedNameCount(const upb_MessageDef* m) { - return m->res_name_count; +const upb_OneofDef* upb_FieldDef_RealContainingOneof(const upb_FieldDef* f) { + const upb_OneofDef* oneof = upb_FieldDef_ContainingOneof(f); + if (!oneof || upb_OneofDef_IsSynthetic(oneof)) return NULL; + return oneof; } -int upb_MessageDef_FieldCount(const upb_MessageDef* m) { - return m->field_count; -} +upb_MessageValue upb_FieldDef_Default(const upb_FieldDef* f) { + upb_MessageValue ret; -int upb_MessageDef_OneofCount(const upb_MessageDef* m) { - return m->oneof_count; -} + if (upb_FieldDef_IsRepeated(f) || upb_FieldDef_IsSubMessage(f)) { + return (upb_MessageValue){.msg_val = NULL}; + } -int upb_MessageDef_RealOneofCount(const upb_MessageDef* m) { - return m->real_oneof_count; -} + switch (upb_FieldDef_CType(f)) { + case kUpb_CType_Bool: + return (upb_MessageValue){.bool_val = f->defaultval.boolean}; + case kUpb_CType_Int64: + return (upb_MessageValue){.int64_val = f->defaultval.sint}; + case kUpb_CType_UInt64: + return (upb_MessageValue){.uint64_val = f->defaultval.uint}; + case kUpb_CType_Enum: + case kUpb_CType_Int32: + return (upb_MessageValue){.int32_val = (int32_t)f->defaultval.sint}; + case kUpb_CType_UInt32: + return (upb_MessageValue){.uint32_val = (uint32_t)f->defaultval.uint}; + case kUpb_CType_Float: + return (upb_MessageValue){.float_val = f->defaultval.flt}; + case kUpb_CType_Double: + return (upb_MessageValue){.double_val = f->defaultval.dbl}; + case kUpb_CType_String: + case kUpb_CType_Bytes: { + str_t* str = f->defaultval.str; + if (str) { + return (upb_MessageValue){ + .str_val = (upb_StringView){.data = str->str, .size = str->len}}; + } else { + return (upb_MessageValue){ + .str_val = (upb_StringView){.data = NULL, .size = 0}}; + } + } + default: + UPB_UNREACHABLE(); + } -int upb_MessageDef_NestedMessageCount(const upb_MessageDef* m) { - return m->nested_msg_count; + return ret; } -int upb_MessageDef_NestedEnumCount(const upb_MessageDef* m) { - return m->nested_enum_count; +const upb_MessageDef* upb_FieldDef_MessageSubDef(const upb_FieldDef* f) { + return upb_FieldDef_CType(f) == kUpb_CType_Message ? f->sub.msgdef : NULL; } -int upb_MessageDef_NestedExtensionCount(const upb_MessageDef* m) { - return m->nested_ext_count; +const upb_EnumDef* upb_FieldDef_EnumSubDef(const upb_FieldDef* f) { + return upb_FieldDef_CType(f) == kUpb_CType_Enum ? f->sub.enumdef : NULL; } -const upb_MiniTable* upb_MessageDef_MiniTable(const upb_MessageDef* m) { - return m->layout; +const upb_MiniTableField* upb_FieldDef_MiniTable(const upb_FieldDef* f) { + if (upb_FieldDef_IsExtension(f)) { + const upb_FileDef* file = upb_FieldDef_File(f); + return (upb_MiniTableField*)_upb_FileDef_ExtensionMiniTable( + file, f->layout_index); + } else { + const upb_MiniTable* layout = upb_MessageDef_MiniTable(f->msgdef); + return &layout->fields[f->layout_index]; + } } -const upb_ExtensionRange* upb_MessageDef_ExtensionRange(const upb_MessageDef* m, - int i) { - UPB_ASSERT(0 <= i && i < m->ext_range_count); - return _upb_ExtensionRange_At(m->ext_ranges, i); +const upb_MiniTableExtension* _upb_FieldDef_ExtensionMiniTable( + const upb_FieldDef* f) { + UPB_ASSERT(upb_FieldDef_IsExtension(f)); + const upb_FileDef* file = upb_FieldDef_File(f); + return _upb_FileDef_ExtensionMiniTable(file, f->layout_index); } -const upb_MessageReservedRange* upb_MessageDef_ReservedRange( - const upb_MessageDef* m, int i) { - UPB_ASSERT(0 <= i && i < m->res_range_count); - return _upb_MessageReservedRange_At(m->res_ranges, i); +bool _upb_FieldDef_IsClosedEnum(const upb_FieldDef* f) { + if (f->type_ != kUpb_FieldType_Enum) return false; + return upb_EnumDef_IsClosed(f->sub.enumdef); } -upb_StringView upb_MessageDef_ReservedName(const upb_MessageDef* m, int i) { - UPB_ASSERT(0 <= i && i < m->res_name_count); - return m->res_names[i]; +bool _upb_FieldDef_IsProto3Optional(const upb_FieldDef* f) { + return f->is_proto3_optional; } -const upb_FieldDef* upb_MessageDef_Field(const upb_MessageDef* m, int i) { - UPB_ASSERT(0 <= i && i < m->field_count); - return _upb_FieldDef_At(m->fields, i); -} +int _upb_FieldDef_LayoutIndex(const upb_FieldDef* f) { return f->layout_index; } -const upb_OneofDef* upb_MessageDef_Oneof(const upb_MessageDef* m, int i) { - UPB_ASSERT(0 <= i && i < m->oneof_count); - return _upb_OneofDef_At(m->oneofs, i); -} +uint64_t _upb_FieldDef_Modifiers(const upb_FieldDef* f) { + uint64_t out = f->is_packed ? kUpb_FieldModifier_IsPacked : 0; -const upb_MessageDef* upb_MessageDef_NestedMessage(const upb_MessageDef* m, - int i) { - UPB_ASSERT(0 <= i && i < m->nested_msg_count); - return &m->nested_msgs[i]; -} + switch (f->label_) { + case kUpb_Label_Optional: + if (!upb_FieldDef_HasPresence(f)) { + out |= kUpb_FieldModifier_IsProto3Singular; + } + break; + case kUpb_Label_Repeated: + out |= kUpb_FieldModifier_IsRepeated; + break; + case kUpb_Label_Required: + out |= kUpb_FieldModifier_IsRequired; + break; + } -const upb_EnumDef* upb_MessageDef_NestedEnum(const upb_MessageDef* m, int i) { - UPB_ASSERT(0 <= i && i < m->nested_enum_count); - return _upb_EnumDef_At(m->nested_enums, i); + if (_upb_FieldDef_IsClosedEnum(f)) { + out |= kUpb_FieldModifier_IsClosedEnum; + } + return out; } -const upb_FieldDef* upb_MessageDef_NestedExtension(const upb_MessageDef* m, - int i) { - UPB_ASSERT(0 <= i && i < m->nested_ext_count); - return _upb_FieldDef_At(m->nested_exts, i); +bool upb_FieldDef_HasDefault(const upb_FieldDef* f) { return f->has_default; } +bool upb_FieldDef_HasPresence(const upb_FieldDef* f) { return f->has_presence; } + +bool upb_FieldDef_HasSubDef(const upb_FieldDef* f) { + return upb_FieldDef_IsSubMessage(f) || + upb_FieldDef_CType(f) == kUpb_CType_Enum; } -upb_WellKnown upb_MessageDef_WellKnownType(const upb_MessageDef* m) { - return m->well_known_type; +bool upb_FieldDef_IsMap(const upb_FieldDef* f) { + return upb_FieldDef_IsRepeated(f) && upb_FieldDef_IsSubMessage(f) && + upb_MessageDef_IsMapEntry(upb_FieldDef_MessageSubDef(f)); } -bool _upb_MessageDef_InMessageSet(const upb_MessageDef* m) { - return m->in_message_set; +bool upb_FieldDef_IsOptional(const upb_FieldDef* f) { + return upb_FieldDef_Label(f) == kUpb_Label_Optional; } -const upb_FieldDef* upb_MessageDef_FindFieldByName(const upb_MessageDef* m, - const char* name) { - return upb_MessageDef_FindFieldByNameWithSize(m, name, strlen(name)); +bool upb_FieldDef_IsPrimitive(const upb_FieldDef* f) { + return !upb_FieldDef_IsString(f) && !upb_FieldDef_IsSubMessage(f); } -const upb_OneofDef* upb_MessageDef_FindOneofByName(const upb_MessageDef* m, - const char* name) { - return upb_MessageDef_FindOneofByNameWithSize(m, name, strlen(name)); +bool upb_FieldDef_IsRepeated(const upb_FieldDef* f) { + return upb_FieldDef_Label(f) == kUpb_Label_Repeated; } -bool upb_MessageDef_IsMapEntry(const upb_MessageDef* m) { - return UPB_DESC(MessageOptions_map_entry)(m->opts); +bool upb_FieldDef_IsRequired(const upb_FieldDef* f) { + return upb_FieldDef_Label(f) == kUpb_Label_Required; } -bool upb_MessageDef_IsMessageSet(const upb_MessageDef* m) { - return UPB_DESC(MessageOptions_message_set_wire_format)(m->opts); +bool upb_FieldDef_IsString(const upb_FieldDef* f) { + return upb_FieldDef_CType(f) == kUpb_CType_String || + upb_FieldDef_CType(f) == kUpb_CType_Bytes; } -static upb_MiniTable* _upb_MessageDef_MakeMiniTable(upb_DefBuilder* ctx, - const upb_MessageDef* m) { - upb_StringView desc; - // Note: this will assign layout_index for fields, so upb_FieldDef_MiniTable() - // is safe to call only after this call. - bool ok = upb_MessageDef_MiniDescriptorEncode(m, ctx->tmp_arena, &desc); - if (!ok) _upb_DefBuilder_OomErr(ctx); - - void** scratch_data = _upb_DefPool_ScratchData(ctx->symtab); - size_t* scratch_size = _upb_DefPool_ScratchSize(ctx->symtab); - upb_MiniTable* ret = upb_MiniTable_BuildWithBuf( - desc.data, desc.size, ctx->platform, ctx->arena, scratch_data, - scratch_size, ctx->status); - if (!ret) _upb_DefBuilder_FailJmp(ctx); - - return ret; +bool upb_FieldDef_IsSubMessage(const upb_FieldDef* f) { + return upb_FieldDef_CType(f) == kUpb_CType_Message; } -void _upb_MessageDef_Resolve(upb_DefBuilder* ctx, upb_MessageDef* m) { - for (int i = 0; i < m->field_count; i++) { - upb_FieldDef* f = (upb_FieldDef*)upb_MessageDef_Field(m, i); - _upb_FieldDef_Resolve(ctx, m->full_name, f); - } - - m->in_message_set = false; - for (int i = 0; i < upb_MessageDef_NestedExtensionCount(m); i++) { - upb_FieldDef* ext = (upb_FieldDef*)upb_MessageDef_NestedExtension(m, i); - _upb_FieldDef_Resolve(ctx, m->full_name, ext); - if (upb_FieldDef_Type(ext) == kUpb_FieldType_Message && - upb_FieldDef_Label(ext) == kUpb_Label_Optional && - upb_FieldDef_MessageSubDef(ext) == m && - UPB_DESC(MessageOptions_message_set_wire_format)( - upb_MessageDef_Options(upb_FieldDef_ContainingType(ext)))) { - m->in_message_set = true; - } - } - - for (int i = 0; i < upb_MessageDef_NestedMessageCount(m); i++) { - upb_MessageDef* n = (upb_MessageDef*)upb_MessageDef_NestedMessage(m, i); - _upb_MessageDef_Resolve(ctx, n); - } +static bool between(int32_t x, int32_t low, int32_t high) { + return x >= low && x <= high; } -void _upb_MessageDef_InsertField(upb_DefBuilder* ctx, upb_MessageDef* m, - const upb_FieldDef* f) { - const int32_t field_number = upb_FieldDef_Number(f); - - if (field_number <= 0 || field_number > kUpb_MaxFieldNumber) { - _upb_DefBuilder_Errf(ctx, "invalid field number (%u)", field_number); - } - - const char* json_name = upb_FieldDef_JsonName(f); - const char* shortname = upb_FieldDef_Name(f); - const size_t shortnamelen = strlen(shortname); +bool upb_FieldDef_checklabel(int32_t label) { return between(label, 1, 3); } +bool upb_FieldDef_checktype(int32_t type) { return between(type, 1, 11); } +bool upb_FieldDef_checkintfmt(int32_t fmt) { return between(fmt, 1, 3); } - upb_value v = upb_value_constptr(f); +bool upb_FieldDef_checkdescriptortype(int32_t type) { + return between(type, 1, 18); +} - upb_value existing_v; - if (upb_strtable_lookup(&m->ntof, shortname, &existing_v)) { - _upb_DefBuilder_Errf(ctx, "duplicate field name (%s)", shortname); - } +static bool streql2(const char* a, size_t n, const char* b) { + return n == strlen(b) && memcmp(a, b, n) == 0; +} - const upb_value field_v = _upb_DefType_Pack(f, UPB_DEFTYPE_FIELD); - bool ok = - _upb_MessageDef_Insert(m, shortname, shortnamelen, field_v, ctx->arena); - if (!ok) _upb_DefBuilder_OomErr(ctx); +// Implement the transformation as described in the spec: +// 1. upper case all letters after an underscore. +// 2. remove all underscores. +static char* make_json_name(const char* name, size_t size, upb_Arena* a) { + char* out = upb_Arena_Malloc(a, size + 1); // +1 is to add a trailing '\0' + if (out == NULL) return NULL; - if (strcmp(shortname, json_name) != 0) { - if (upb_strtable_lookup(&m->ntof, json_name, &v)) { - _upb_DefBuilder_Errf(ctx, "duplicate json_name (%s)", json_name); + bool ucase_next = false; + char* des = out; + for (size_t i = 0; i < size; i++) { + if (name[i] == '_') { + ucase_next = true; + } else { + *des++ = ucase_next ? toupper(name[i]) : name[i]; + ucase_next = false; } - - const size_t json_size = strlen(json_name); - const upb_value json_v = _upb_DefType_Pack(f, UPB_DEFTYPE_FIELD_JSONNAME); - ok = _upb_MessageDef_Insert(m, json_name, json_size, json_v, ctx->arena); - if (!ok) _upb_DefBuilder_OomErr(ctx); - } - - if (upb_inttable_lookup(&m->itof, field_number, NULL)) { - _upb_DefBuilder_Errf(ctx, "duplicate field number (%u)", field_number); } + *des++ = '\0'; + return out; +} - ok = upb_inttable_insert(&m->itof, field_number, v, ctx->arena); - if (!ok) _upb_DefBuilder_OomErr(ctx); +static str_t* newstr(upb_DefBuilder* ctx, const char* data, size_t len) { + str_t* ret = _upb_DefBuilder_Alloc(ctx, sizeof(*ret) + len); + if (!ret) _upb_DefBuilder_OomErr(ctx); + ret->len = len; + if (len) memcpy(ret->str, data, len); + ret->str[len] = '\0'; + return ret; } -void _upb_MessageDef_CreateMiniTable(upb_DefBuilder* ctx, upb_MessageDef* m) { - if (ctx->layout == NULL) { - m->layout = _upb_MessageDef_MakeMiniTable(ctx, m); - } else { - UPB_ASSERT(ctx->msg_count < ctx->layout->msg_count); - m->layout = ctx->layout->msgs[ctx->msg_count++]; - UPB_ASSERT(m->field_count == m->layout->field_count); +static str_t* unescape(upb_DefBuilder* ctx, const upb_FieldDef* f, + const char* data, size_t len) { + // Size here is an upper bound; escape sequences could ultimately shrink it. + str_t* ret = _upb_DefBuilder_Alloc(ctx, sizeof(*ret) + len); + char* dst = &ret->str[0]; + const char* src = data; + const char* end = data + len; - // We don't need the result of this call, but it will assign layout_index - // for all the fields in O(n lg n) time. - _upb_FieldDefs_Sorted(m->fields, m->field_count, ctx->tmp_arena); + while (src < end) { + if (*src == '\\') { + src++; + *dst++ = _upb_DefBuilder_ParseEscape(ctx, f, &src, end); + } else { + *dst++ = *src++; + } } - for (int i = 0; i < m->nested_msg_count; i++) { - upb_MessageDef* nested = - (upb_MessageDef*)upb_MessageDef_NestedMessage(m, i); - _upb_MessageDef_CreateMiniTable(ctx, nested); - } + ret->len = dst - &ret->str[0]; + return ret; } -void _upb_MessageDef_LinkMiniTable(upb_DefBuilder* ctx, - const upb_MessageDef* m) { - for (int i = 0; i < upb_MessageDef_NestedExtensionCount(m); i++) { - const upb_FieldDef* ext = upb_MessageDef_NestedExtension(m, i); - _upb_FieldDef_BuildMiniTableExtension(ctx, ext); - } +static void parse_default(upb_DefBuilder* ctx, const char* str, size_t len, + upb_FieldDef* f) { + char* end; + char nullz[64]; + errno = 0; - for (int i = 0; i < m->nested_msg_count; i++) { - _upb_MessageDef_LinkMiniTable(ctx, upb_MessageDef_NestedMessage(m, i)); + switch (upb_FieldDef_CType(f)) { + case kUpb_CType_Int32: + case kUpb_CType_Int64: + case kUpb_CType_UInt32: + case kUpb_CType_UInt64: + case kUpb_CType_Double: + case kUpb_CType_Float: + // Standard C number parsing functions expect null-terminated strings. + if (len >= sizeof(nullz) - 1) { + _upb_DefBuilder_Errf(ctx, "Default too long: %.*s", (int)len, str); + } + memcpy(nullz, str, len); + nullz[len] = '\0'; + str = nullz; + break; + default: + break; } - if (ctx->layout) return; - - for (int i = 0; i < m->field_count; i++) { - const upb_FieldDef* f = upb_MessageDef_Field(m, i); - const upb_MessageDef* sub_m = upb_FieldDef_MessageSubDef(f); - const upb_EnumDef* sub_e = upb_FieldDef_EnumSubDef(f); - const int layout_index = _upb_FieldDef_LayoutIndex(f); - upb_MiniTable* mt = (upb_MiniTable*)upb_MessageDef_MiniTable(m); - - UPB_ASSERT(layout_index < m->field_count); - upb_MiniTableField* mt_f = - (upb_MiniTableField*)&m->layout->fields[layout_index]; - if (sub_m) { - if (!mt->subs) { - _upb_DefBuilder_Errf(ctx, "unexpected submsg for (%s)", m->full_name); + switch (upb_FieldDef_CType(f)) { + case kUpb_CType_Int32: { + long val = strtol(str, &end, 0); + if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || *end) { + goto invalid; } - UPB_ASSERT(mt_f); - UPB_ASSERT(sub_m->layout); - if (UPB_UNLIKELY(!upb_MiniTable_SetSubMessage(mt, mt_f, sub_m->layout))) { - _upb_DefBuilder_Errf(ctx, "invalid submsg for (%s)", m->full_name); + f->defaultval.sint = val; + break; + } + case kUpb_CType_Enum: { + const upb_EnumDef* e = f->sub.enumdef; + const upb_EnumValueDef* ev = + upb_EnumDef_FindValueByNameWithSize(e, str, len); + if (!ev) { + goto invalid; } - } else if (_upb_FieldDef_IsClosedEnum(f)) { - const upb_MiniTableEnum* mt_e = _upb_EnumDef_MiniTable(sub_e); - if (UPB_UNLIKELY(!upb_MiniTable_SetSubEnum(mt, mt_f, mt_e))) { - _upb_DefBuilder_Errf(ctx, "invalid subenum for (%s)", m->full_name); + f->defaultval.sint = upb_EnumValueDef_Number(ev); + break; + } + case kUpb_CType_Int64: { + long long val = strtoll(str, &end, 0); + if (val > INT64_MAX || val < INT64_MIN || errno == ERANGE || *end) { + goto invalid; } + f->defaultval.sint = val; + break; } - } - -#ifndef NDEBUG - for (int i = 0; i < m->field_count; i++) { - const upb_FieldDef* f = upb_MessageDef_Field(m, i); - const int layout_index = _upb_FieldDef_LayoutIndex(f); - UPB_ASSERT(layout_index < m->layout->field_count); - const upb_MiniTableField* mt_f = &m->layout->fields[layout_index]; - UPB_ASSERT(upb_FieldDef_Type(f) == upb_MiniTableField_Type(mt_f)); - UPB_ASSERT(upb_FieldDef_CType(f) == upb_MiniTableField_CType(mt_f)); - UPB_ASSERT(upb_FieldDef_HasPresence(f) == - upb_MiniTableField_HasPresence(mt_f)); - } -#endif -} - -static uint64_t _upb_MessageDef_Modifiers(const upb_MessageDef* m) { - uint64_t out = 0; - if (upb_FileDef_Syntax(m->file) == kUpb_Syntax_Proto3) { - out |= kUpb_MessageModifier_ValidateUtf8; - out |= kUpb_MessageModifier_DefaultIsPacked; - } - if (m->ext_range_count) { - out |= kUpb_MessageModifier_IsExtendable; - } - return out; -} - -static bool _upb_MessageDef_EncodeMap(upb_DescState* s, const upb_MessageDef* m, - upb_Arena* a) { - if (m->field_count != 2) return false; - - const upb_FieldDef* key_field = upb_MessageDef_Field(m, 0); - const upb_FieldDef* val_field = upb_MessageDef_Field(m, 1); - if (key_field == NULL || val_field == NULL) return false; + case kUpb_CType_UInt32: { + unsigned long val = strtoul(str, &end, 0); + if (val > UINT32_MAX || errno == ERANGE || *end) { + goto invalid; + } + f->defaultval.uint = val; + break; + } + case kUpb_CType_UInt64: { + unsigned long long val = strtoull(str, &end, 0); + if (val > UINT64_MAX || errno == ERANGE || *end) { + goto invalid; + } + f->defaultval.uint = val; + break; + } + case kUpb_CType_Double: { + double val = strtod(str, &end); + if (errno == ERANGE || *end) { + goto invalid; + } + f->defaultval.dbl = val; + break; + } + case kUpb_CType_Float: { + float val = strtof(str, &end); + if (errno == ERANGE || *end) { + goto invalid; + } + f->defaultval.flt = val; + break; + } + case kUpb_CType_Bool: { + if (streql2(str, len, "false")) { + f->defaultval.boolean = false; + } else if (streql2(str, len, "true")) { + f->defaultval.boolean = true; + } else { + goto invalid; + } + break; + } + case kUpb_CType_String: + f->defaultval.str = newstr(ctx, str, len); + break; + case kUpb_CType_Bytes: + f->defaultval.str = unescape(ctx, f, str, len); + break; + case kUpb_CType_Message: + /* Should not have a default value. */ + _upb_DefBuilder_Errf(ctx, "Message should not have a default (%s)", + upb_FieldDef_FullName(f)); + } - UPB_ASSERT(_upb_FieldDef_LayoutIndex(key_field) == 0); - UPB_ASSERT(_upb_FieldDef_LayoutIndex(val_field) == 1); + return; - s->ptr = upb_MtDataEncoder_EncodeMap( - &s->e, s->ptr, upb_FieldDef_Type(key_field), upb_FieldDef_Type(val_field), - _upb_FieldDef_Modifiers(key_field), _upb_FieldDef_Modifiers(val_field)); - return true; +invalid: + _upb_DefBuilder_Errf(ctx, "Invalid default '%.*s' for field %s of type %d", + (int)len, str, upb_FieldDef_FullName(f), + (int)upb_FieldDef_Type(f)); } -static bool _upb_MessageDef_EncodeMessage(upb_DescState* s, - const upb_MessageDef* m, - upb_Arena* a) { - const upb_FieldDef** sorted = NULL; - if (!m->is_sorted) { - sorted = _upb_FieldDefs_Sorted(m->fields, m->field_count, a); - if (!sorted) return false; +static void set_default_default(upb_DefBuilder* ctx, upb_FieldDef* f) { + switch (upb_FieldDef_CType(f)) { + case kUpb_CType_Int32: + case kUpb_CType_Int64: + f->defaultval.sint = 0; + break; + case kUpb_CType_UInt64: + case kUpb_CType_UInt32: + f->defaultval.uint = 0; + break; + case kUpb_CType_Double: + case kUpb_CType_Float: + f->defaultval.dbl = 0; + break; + case kUpb_CType_String: + case kUpb_CType_Bytes: + f->defaultval.str = newstr(ctx, NULL, 0); + break; + case kUpb_CType_Bool: + f->defaultval.boolean = false; + break; + case kUpb_CType_Enum: { + const upb_EnumValueDef* v = upb_EnumDef_Value(f->sub.enumdef, 0); + f->defaultval.sint = upb_EnumValueDef_Number(v); + break; + } + case kUpb_CType_Message: + break; } +} - s->ptr = upb_MtDataEncoder_StartMessage(&s->e, s->ptr, - _upb_MessageDef_Modifiers(m)); - - for (int i = 0; i < m->field_count; i++) { - const upb_FieldDef* f = sorted ? sorted[i] : upb_MessageDef_Field(m, i); - const upb_FieldType type = upb_FieldDef_Type(f); - const int number = upb_FieldDef_Number(f); - const uint64_t modifiers = _upb_FieldDef_Modifiers(f); +static void _upb_FieldDef_Create(upb_DefBuilder* ctx, const char* prefix, + const UPB_DESC(FieldDescriptorProto) * + field_proto, + upb_MessageDef* m, upb_FieldDef* f) { + // Must happen before _upb_DefBuilder_Add() + f->file = _upb_DefBuilder_File(ctx); - if (!_upb_DescState_Grow(s, a)) return false; - s->ptr = upb_MtDataEncoder_PutField(&s->e, s->ptr, type, number, modifiers); + if (!UPB_DESC(FieldDescriptorProto_has_name)(field_proto)) { + _upb_DefBuilder_Errf(ctx, "field has no name"); } - for (int i = 0; i < m->real_oneof_count; i++) { - if (!_upb_DescState_Grow(s, a)) return false; - s->ptr = upb_MtDataEncoder_StartOneof(&s->e, s->ptr); + const upb_StringView name = UPB_DESC(FieldDescriptorProto_name)(field_proto); - const upb_OneofDef* o = upb_MessageDef_Oneof(m, i); - const int field_count = upb_OneofDef_FieldCount(o); - for (int j = 0; j < field_count; j++) { - const int number = upb_FieldDef_Number(upb_OneofDef_Field(o, j)); + f->full_name = _upb_DefBuilder_MakeFullName(ctx, prefix, name); + f->label_ = (int)UPB_DESC(FieldDescriptorProto_label)(field_proto); + f->number_ = UPB_DESC(FieldDescriptorProto_number)(field_proto); + f->is_proto3_optional = + UPB_DESC(FieldDescriptorProto_proto3_optional)(field_proto); + f->msgdef = m; + f->scope.oneof = NULL; - if (!_upb_DescState_Grow(s, a)) return false; - s->ptr = upb_MtDataEncoder_PutOneofField(&s->e, s->ptr, number); - } + f->has_json_name = UPB_DESC(FieldDescriptorProto_has_json_name)(field_proto); + if (f->has_json_name) { + const upb_StringView sv = + UPB_DESC(FieldDescriptorProto_json_name)(field_proto); + f->json_name = upb_strdup2(sv.data, sv.size, ctx->arena); + } else { + f->json_name = make_json_name(name.data, name.size, ctx->arena); } + if (!f->json_name) _upb_DefBuilder_OomErr(ctx); - return true; -} - -static bool _upb_MessageDef_EncodeMessageSet(upb_DescState* s, - const upb_MessageDef* m, - upb_Arena* a) { - s->ptr = upb_MtDataEncoder_EncodeMessageSet(&s->e, s->ptr); - - return true; -} + const bool has_type = UPB_DESC(FieldDescriptorProto_has_type)(field_proto); + const bool has_type_name = + UPB_DESC(FieldDescriptorProto_has_type_name)(field_proto); -bool upb_MessageDef_MiniDescriptorEncode(const upb_MessageDef* m, upb_Arena* a, - upb_StringView* out) { - upb_DescState s; - _upb_DescState_Init(&s); + f->type_ = (int)UPB_DESC(FieldDescriptorProto_type)(field_proto); - if (!_upb_DescState_Grow(&s, a)) return false; + if (has_type) { + switch (f->type_) { + case kUpb_FieldType_Message: + case kUpb_FieldType_Group: + case kUpb_FieldType_Enum: + if (!has_type_name) { + _upb_DefBuilder_Errf(ctx, "field of type %d requires type name (%s)", + (int)f->type_, f->full_name); + } + break; + default: + if (has_type_name) { + _upb_DefBuilder_Errf( + ctx, "invalid type for field with type_name set (%s, %d)", + f->full_name, (int)f->type_); + } + } + } - if (upb_MessageDef_IsMapEntry(m)) { - if (!_upb_MessageDef_EncodeMap(&s, m, a)) return false; - } else if (UPB_DESC(MessageOptions_message_set_wire_format)(m->opts)) { - if (!_upb_MessageDef_EncodeMessageSet(&s, m, a)) return false; + if (!has_type && has_type_name) { + f->type_ = + UPB_FIELD_TYPE_UNSPECIFIED; // We'll assign this in resolve_subdef() } else { - if (!_upb_MessageDef_EncodeMessage(&s, m, a)) return false; + if (f->type_ < kUpb_FieldType_Double || f->type_ > kUpb_FieldType_SInt64) { + _upb_DefBuilder_Errf(ctx, "invalid type for field %s (%d)", f->full_name, + f->type_); + } } - if (!_upb_DescState_Grow(&s, a)) return false; - *s.ptr = '\0'; + if (f->label_ < kUpb_Label_Optional || f->label_ > kUpb_Label_Repeated) { + _upb_DefBuilder_Errf(ctx, "invalid label for field %s (%d)", f->full_name, + f->label_); + } - out->data = s.buf; - out->size = s.ptr - s.buf; - return true; -} + /* We can't resolve the subdef or (in the case of extensions) the containing + * message yet, because it may not have been defined yet. We stash a pointer + * to the field_proto until later when we can properly resolve it. */ + f->sub.unresolved = field_proto; -static upb_StringView* _upb_ReservedNames_New(upb_DefBuilder* ctx, int n, - const upb_StringView* protos) { - upb_StringView* sv = _upb_DefBuilder_Alloc(ctx, sizeof(upb_StringView) * n); - for (size_t i = 0; i < n; i++) { - sv[i].data = - upb_strdup2(protos[i].data, protos[i].size, _upb_DefBuilder_Arena(ctx)); - sv[i].size = protos[i].size; + if (f->label_ == kUpb_Label_Required && + upb_FileDef_Syntax(f->file) == kUpb_Syntax_Proto3) { + _upb_DefBuilder_Errf(ctx, "proto3 fields cannot be required (%s)", + f->full_name); } - return sv; -} -static void create_msgdef(upb_DefBuilder* ctx, const char* prefix, - const UPB_DESC(DescriptorProto) * msg_proto, - const upb_MessageDef* containing_type, - upb_MessageDef* m) { - const UPB_DESC(OneofDescriptorProto)* const* oneofs; - const UPB_DESC(FieldDescriptorProto)* const* fields; - const UPB_DESC(DescriptorProto_ExtensionRange)* const* ext_ranges; - const UPB_DESC(DescriptorProto_ReservedRange)* const* res_ranges; - const upb_StringView* res_names; - size_t n_oneof, n_field, n_enum, n_ext, n_msg; - size_t n_ext_range, n_res_range, n_res_name; - upb_StringView name; + if (UPB_DESC(FieldDescriptorProto_has_oneof_index)(field_proto)) { + int oneof_index = UPB_DESC(FieldDescriptorProto_oneof_index)(field_proto); - // Must happen before _upb_DefBuilder_Add() - m->file = _upb_DefBuilder_File(ctx); + if (upb_FieldDef_Label(f) != kUpb_Label_Optional) { + _upb_DefBuilder_Errf(ctx, "fields in oneof must have OPTIONAL label (%s)", + f->full_name); + } - m->containing_type = containing_type; - m->is_sorted = true; + if (!m) { + _upb_DefBuilder_Errf(ctx, "oneof field (%s) has no containing msg", + f->full_name); + } - name = UPB_DESC(DescriptorProto_name)(msg_proto); + if (oneof_index >= upb_MessageDef_OneofCount(m)) { + _upb_DefBuilder_Errf(ctx, "oneof_index out of range (%s)", f->full_name); + } - m->full_name = _upb_DefBuilder_MakeFullName(ctx, prefix, name); - _upb_DefBuilder_Add(ctx, m->full_name, _upb_DefType_Pack(m, UPB_DEFTYPE_MSG)); - - oneofs = UPB_DESC(DescriptorProto_oneof_decl)(msg_proto, &n_oneof); - fields = UPB_DESC(DescriptorProto_field)(msg_proto, &n_field); - ext_ranges = - UPB_DESC(DescriptorProto_extension_range)(msg_proto, &n_ext_range); - res_ranges = - UPB_DESC(DescriptorProto_reserved_range)(msg_proto, &n_res_range); - res_names = UPB_DESC(DescriptorProto_reserved_name)(msg_proto, &n_res_name); - - bool ok = upb_inttable_init(&m->itof, ctx->arena); - if (!ok) _upb_DefBuilder_OomErr(ctx); - - ok = upb_strtable_init(&m->ntof, n_oneof + n_field, ctx->arena); - if (!ok) _upb_DefBuilder_OomErr(ctx); - - UPB_DEF_SET_OPTIONS(m->opts, DescriptorProto, MessageOptions, msg_proto); + upb_OneofDef* oneof = (upb_OneofDef*)upb_MessageDef_Oneof(m, oneof_index); + f->scope.oneof = oneof; - m->oneof_count = n_oneof; - m->oneofs = _upb_OneofDefs_New(ctx, n_oneof, oneofs, m); + _upb_OneofDef_Insert(ctx, oneof, f, name.data, name.size); + } - m->field_count = n_field; - m->fields = - _upb_FieldDefs_New(ctx, n_field, fields, m->full_name, m, &m->is_sorted); + UPB_DEF_SET_OPTIONS(f->opts, FieldDescriptorProto, FieldOptions, field_proto); - // Message Sets may not contain fields. - if (UPB_UNLIKELY(UPB_DESC(MessageOptions_message_set_wire_format)(m->opts))) { - if (UPB_UNLIKELY(n_field > 0)) { - _upb_DefBuilder_Errf(ctx, "invalid message set (%s)", m->full_name); - } + if (UPB_DESC(FieldOptions_has_packed)(f->opts)) { + f->is_packed = UPB_DESC(FieldOptions_packed)(f->opts); + } else { + // Repeated fields default to packed for proto3 only. + f->is_packed = has_type && upb_FieldDef_IsPrimitive(f) && + f->label_ == kUpb_Label_Repeated && + upb_FileDef_Syntax(f->file) == kUpb_Syntax_Proto3; } - m->ext_range_count = n_ext_range; - m->ext_ranges = _upb_ExtensionRanges_New(ctx, n_ext_range, ext_ranges, m); + f->has_presence = + (!upb_FieldDef_IsRepeated(f)) && + (f->type_ == kUpb_FieldType_Message || f->type_ == kUpb_FieldType_Group || + upb_FieldDef_ContainingOneof(f) || + (upb_FileDef_Syntax(f->file) == kUpb_Syntax_Proto2)); +} - m->res_range_count = n_res_range; - m->res_ranges = - _upb_MessageReservedRanges_New(ctx, n_res_range, res_ranges, m); +static void _upb_FieldDef_CreateExt(upb_DefBuilder* ctx, const char* prefix, + const UPB_DESC(FieldDescriptorProto) * + field_proto, + upb_MessageDef* m, upb_FieldDef* f) { + f->is_extension = true; + _upb_FieldDef_Create(ctx, prefix, field_proto, m, f); - m->res_name_count = n_res_name; - m->res_names = _upb_ReservedNames_New(ctx, n_res_name, res_names); + if (UPB_DESC(FieldDescriptorProto_has_oneof_index)(field_proto)) { + _upb_DefBuilder_Errf(ctx, "oneof_index provided for extension field (%s)", + f->full_name); + } - const size_t synthetic_count = _upb_OneofDefs_Finalize(ctx, m); - m->real_oneof_count = m->oneof_count - synthetic_count; + f->scope.extension_scope = m; + _upb_DefBuilder_Add(ctx, f->full_name, _upb_DefType_Pack(f, UPB_DEFTYPE_EXT)); + f->layout_index = ctx->ext_count++; - assign_msg_wellknowntype(m); - upb_inttable_compact(&m->itof, ctx->arena); + if (ctx->layout) { + UPB_ASSERT(_upb_FieldDef_ExtensionMiniTable(f)->field.number == f->number_); + } +} - const UPB_DESC(EnumDescriptorProto)* const* enums = - UPB_DESC(DescriptorProto_enum_type)(msg_proto, &n_enum); - m->nested_enum_count = n_enum; - m->nested_enums = _upb_EnumDefs_New(ctx, n_enum, enums, m); +static void _upb_FieldDef_CreateNotExt(upb_DefBuilder* ctx, const char* prefix, + const UPB_DESC(FieldDescriptorProto) * + field_proto, + upb_MessageDef* m, upb_FieldDef* f) { + f->is_extension = false; + _upb_FieldDef_Create(ctx, prefix, field_proto, m, f); - const UPB_DESC(FieldDescriptorProto)* const* exts = - UPB_DESC(DescriptorProto_extension)(msg_proto, &n_ext); - m->nested_ext_count = n_ext; - m->nested_exts = _upb_Extensions_New(ctx, n_ext, exts, m->full_name, m); + if (!UPB_DESC(FieldDescriptorProto_has_oneof_index)(field_proto)) { + if (f->is_proto3_optional) { + _upb_DefBuilder_Errf( + ctx, + "non-extension field (%s) with proto3_optional was not in a oneof", + f->full_name); + } + } - const UPB_DESC(DescriptorProto)* const* msgs = - UPB_DESC(DescriptorProto_nested_type)(msg_proto, &n_msg); - m->nested_msg_count = n_msg; - m->nested_msgs = _upb_MessageDefs_New(ctx, n_msg, msgs, m); + _upb_MessageDef_InsertField(ctx, m, f); } -// Allocate and initialize an array of |n| message defs. -upb_MessageDef* _upb_MessageDefs_New( - upb_DefBuilder* ctx, int n, const UPB_DESC(DescriptorProto) * const* protos, - const upb_MessageDef* containing_type) { - _upb_DefType_CheckPadding(sizeof(upb_MessageDef)); - - const char* name = containing_type ? containing_type->full_name - : _upb_FileDef_RawPackage(ctx->file); +upb_FieldDef* _upb_Extensions_New( + upb_DefBuilder* ctx, int n, + const UPB_DESC(FieldDescriptorProto) * const* protos, const char* prefix, + upb_MessageDef* m) { + _upb_DefType_CheckPadding(sizeof(upb_FieldDef)); + upb_FieldDef* defs = + (upb_FieldDef*)_upb_DefBuilder_Alloc(ctx, sizeof(upb_FieldDef) * n); - upb_MessageDef* m = _upb_DefBuilder_Alloc(ctx, sizeof(upb_MessageDef) * n); for (int i = 0; i < n; i++) { - create_msgdef(ctx, name, protos[i], containing_type, &m[i]); + upb_FieldDef* f = &defs[i]; + + _upb_FieldDef_CreateExt(ctx, prefix, protos[i], m, f); + f->index_ = i; } - return m; + + return defs; } +upb_FieldDef* _upb_FieldDefs_New( + upb_DefBuilder* ctx, int n, + const UPB_DESC(FieldDescriptorProto) * const* protos, const char* prefix, + upb_MessageDef* m, bool* is_sorted) { + _upb_DefType_CheckPadding(sizeof(upb_FieldDef)); + upb_FieldDef* defs = + (upb_FieldDef*)_upb_DefBuilder_Alloc(ctx, sizeof(upb_FieldDef) * n); + + uint32_t previous = 0; + for (int i = 0; i < n; i++) { + upb_FieldDef* f = &defs[i]; -// Must be last. + _upb_FieldDef_CreateNotExt(ctx, prefix, protos[i], m, f); + f->index_ = i; + if (!ctx->layout) { + // Speculate that the def fields are sorted. We will always sort the + // MiniTable fields, so if defs are sorted then indices will match. + // + // If this is incorrect, we will overwrite later. + f->layout_index = i; + } -struct upb_MessageReservedRange { - int32_t start; - int32_t end; -}; + const uint32_t current = f->number_; + if (previous > current) *is_sorted = false; + previous = current; + } -upb_MessageReservedRange* _upb_MessageReservedRange_At( - const upb_MessageReservedRange* r, int i) { - return (upb_MessageReservedRange*)&r[i]; + return defs; } -int32_t upb_MessageReservedRange_Start(const upb_MessageReservedRange* r) { - return r->start; +static void resolve_subdef(upb_DefBuilder* ctx, const char* prefix, + upb_FieldDef* f) { + const UPB_DESC(FieldDescriptorProto)* field_proto = f->sub.unresolved; + upb_StringView name = UPB_DESC(FieldDescriptorProto_type_name)(field_proto); + bool has_name = UPB_DESC(FieldDescriptorProto_has_type_name)(field_proto); + switch ((int)f->type_) { + case UPB_FIELD_TYPE_UNSPECIFIED: { + // Type was not specified and must be inferred. + UPB_ASSERT(has_name); + upb_deftype_t type; + const void* def = + _upb_DefBuilder_ResolveAny(ctx, f->full_name, prefix, name, &type); + switch (type) { + case UPB_DEFTYPE_ENUM: + f->sub.enumdef = def; + f->type_ = kUpb_FieldType_Enum; + if (!UPB_DESC(FieldOptions_has_packed)(f->opts)) { + f->is_packed = f->label_ == kUpb_Label_Repeated && + upb_FileDef_Syntax(f->file) == kUpb_Syntax_Proto3; + } + break; + case UPB_DEFTYPE_MSG: + f->sub.msgdef = def; + f->type_ = kUpb_FieldType_Message; // It appears there is no way of + // this being a group. + f->has_presence = !upb_FieldDef_IsRepeated(f); + break; + default: + _upb_DefBuilder_Errf(ctx, "Couldn't resolve type name for field %s", + f->full_name); + } + break; + } + case kUpb_FieldType_Message: + case kUpb_FieldType_Group: + UPB_ASSERT(has_name); + f->sub.msgdef = _upb_DefBuilder_Resolve(ctx, f->full_name, prefix, name, + UPB_DEFTYPE_MSG); + break; + case kUpb_FieldType_Enum: + UPB_ASSERT(has_name); + f->sub.enumdef = _upb_DefBuilder_Resolve(ctx, f->full_name, prefix, name, + UPB_DEFTYPE_ENUM); + break; + default: + // No resolution necessary. + break; + } } -int32_t upb_MessageReservedRange_End(const upb_MessageReservedRange* r) { - return r->end; + +static int _upb_FieldDef_Compare(const void* p1, const void* p2) { + const uint32_t v1 = (*(upb_FieldDef**)p1)->number_; + const uint32_t v2 = (*(upb_FieldDef**)p2)->number_; + return (v1 < v2) ? -1 : (v1 > v2); } -upb_MessageReservedRange* _upb_MessageReservedRanges_New( - upb_DefBuilder* ctx, int n, - const UPB_DESC(DescriptorProto_ReservedRange) * const* protos, - const upb_MessageDef* m) { - upb_MessageReservedRange* r = - _upb_DefBuilder_Alloc(ctx, sizeof(upb_MessageReservedRange) * n); +// _upb_FieldDefs_Sorted() is mostly a pure function of its inputs, but has one +// critical side effect that we depend on: it sets layout_index appropriately +// for non-sorted lists of fields. +const upb_FieldDef** _upb_FieldDefs_Sorted(const upb_FieldDef* f, int n, + upb_Arena* a) { + // TODO(salo): Replace this arena alloc with a persistent scratch buffer. + upb_FieldDef** out = (upb_FieldDef**)upb_Arena_Malloc(a, n * sizeof(void*)); + if (!out) return NULL; for (int i = 0; i < n; i++) { - const int32_t start = - UPB_DESC(DescriptorProto_ReservedRange_start)(protos[i]); - const int32_t end = UPB_DESC(DescriptorProto_ReservedRange_end)(protos[i]); - const int32_t max = kUpb_MaxFieldNumber + 1; - - // A full validation would also check that each range is disjoint, and that - // none of the fields overlap with the extension ranges, but we are just - // sanity checking here. - if (start < 1 || end <= start || end > max) { - _upb_DefBuilder_Errf(ctx, - "Reserved range (%d, %d) is invalid, message=%s\n", - (int)start, (int)end, upb_MessageDef_FullName(m)); - } - - r[i].start = start; - r[i].end = end; + out[i] = (upb_FieldDef*)&f[i]; } + qsort(out, n, sizeof(void*), _upb_FieldDef_Compare); - return r; + for (int i = 0; i < n; i++) { + out[i]->layout_index = i; + } + return (const upb_FieldDef**)out; } +bool upb_FieldDef_MiniDescriptorEncode(const upb_FieldDef* f, upb_Arena* a, + upb_StringView* out) { + UPB_ASSERT(f->is_extension); -// Must be last. + upb_DescState s; + _upb_DescState_Init(&s); -struct upb_MethodDef { - const UPB_DESC(MethodOptions) * opts; - upb_ServiceDef* service; - const char* full_name; - const upb_MessageDef* input_type; - const upb_MessageDef* output_type; - int index; - bool client_streaming; - bool server_streaming; -}; - -upb_MethodDef* _upb_MethodDef_At(const upb_MethodDef* m, int i) { - return (upb_MethodDef*)&m[i]; -} + const int number = upb_FieldDef_Number(f); + const uint64_t modifiers = _upb_FieldDef_Modifiers(f); -const upb_ServiceDef* upb_MethodDef_Service(const upb_MethodDef* m) { - return m->service; -} + if (!_upb_DescState_Grow(&s, a)) return false; + s.ptr = upb_MtDataEncoder_EncodeExtension(&s.e, s.ptr, f->type_, number, + modifiers); + *s.ptr = '\0'; -const UPB_DESC(MethodOptions) * upb_MethodDef_Options(const upb_MethodDef* m) { - return m->opts; + out->data = s.buf; + out->size = s.ptr - s.buf; + return true; } -bool upb_MethodDef_HasOptions(const upb_MethodDef* m) { - return m->opts != (void*)kUpbDefOptDefault; -} +static void resolve_extension(upb_DefBuilder* ctx, const char* prefix, + upb_FieldDef* f, + const UPB_DESC(FieldDescriptorProto) * + field_proto) { + if (!UPB_DESC(FieldDescriptorProto_has_extendee)(field_proto)) { + _upb_DefBuilder_Errf(ctx, "extension for field '%s' had no extendee", + f->full_name); + } -const char* upb_MethodDef_FullName(const upb_MethodDef* m) { - return m->full_name; -} + upb_StringView name = UPB_DESC(FieldDescriptorProto_extendee)(field_proto); + const upb_MessageDef* m = + _upb_DefBuilder_Resolve(ctx, f->full_name, prefix, name, UPB_DEFTYPE_MSG); + f->msgdef = m; -const char* upb_MethodDef_Name(const upb_MethodDef* m) { - return _upb_DefBuilder_FullToShort(m->full_name); + if (!_upb_MessageDef_IsValidExtensionNumber(m, f->number_)) { + _upb_DefBuilder_Errf( + ctx, + "field number %u in extension %s has no extension range in message %s", + (unsigned)f->number_, f->full_name, upb_MessageDef_FullName(m)); + } } -int upb_MethodDef_Index(const upb_MethodDef* m) { return m->index; } +void _upb_FieldDef_BuildMiniTableExtension(upb_DefBuilder* ctx, + const upb_FieldDef* f) { + const upb_MiniTableExtension* ext = _upb_FieldDef_ExtensionMiniTable(f); -const upb_MessageDef* upb_MethodDef_InputType(const upb_MethodDef* m) { - return m->input_type; -} + if (ctx->layout) { + UPB_ASSERT(upb_FieldDef_Number(f) == ext->field.number); + } else { + upb_StringView desc; + if (!upb_FieldDef_MiniDescriptorEncode(f, ctx->tmp_arena, &desc)) { + _upb_DefBuilder_OomErr(ctx); + } -const upb_MessageDef* upb_MethodDef_OutputType(const upb_MethodDef* m) { - return m->output_type; -} + upb_MiniTableExtension* mut_ext = (upb_MiniTableExtension*)ext; + upb_MiniTableSub sub = {NULL}; + if (upb_FieldDef_IsSubMessage(f)) { + sub.submsg = upb_MessageDef_MiniTable(f->sub.msgdef); + } else if (_upb_FieldDef_IsClosedEnum(f)) { + sub.subenum = _upb_EnumDef_MiniTable(f->sub.enumdef); + } + bool ok2 = upb_MiniTableExtension_Init(desc.data, desc.size, mut_ext, + upb_MessageDef_MiniTable(f->msgdef), + sub, ctx->status); + if (!ok2) _upb_DefBuilder_Errf(ctx, "Could not build extension mini table"); + } -bool upb_MethodDef_ClientStreaming(const upb_MethodDef* m) { - return m->client_streaming; + bool ok = _upb_DefPool_InsertExt(ctx->symtab, ext, f); + if (!ok) _upb_DefBuilder_OomErr(ctx); } -bool upb_MethodDef_ServerStreaming(const upb_MethodDef* m) { - return m->server_streaming; -} +static void resolve_default(upb_DefBuilder* ctx, upb_FieldDef* f, + const UPB_DESC(FieldDescriptorProto) * + field_proto) { + // Have to delay resolving of the default value until now because of the enum + // case, since enum defaults are specified with a label. + if (UPB_DESC(FieldDescriptorProto_has_default_value)(field_proto)) { + upb_StringView defaultval = + UPB_DESC(FieldDescriptorProto_default_value)(field_proto); -static void create_method(upb_DefBuilder* ctx, - const UPB_DESC(MethodDescriptorProto) * method_proto, - upb_ServiceDef* s, upb_MethodDef* m) { - upb_StringView name = UPB_DESC(MethodDescriptorProto_name)(method_proto); + if (upb_FileDef_Syntax(f->file) == kUpb_Syntax_Proto3) { + _upb_DefBuilder_Errf(ctx, + "proto3 fields cannot have explicit defaults (%s)", + f->full_name); + } - m->service = s; - m->full_name = - _upb_DefBuilder_MakeFullName(ctx, upb_ServiceDef_FullName(s), name); - m->client_streaming = - UPB_DESC(MethodDescriptorProto_client_streaming)(method_proto); - m->server_streaming = - UPB_DESC(MethodDescriptorProto_server_streaming)(method_proto); - m->input_type = _upb_DefBuilder_Resolve( - ctx, m->full_name, m->full_name, - UPB_DESC(MethodDescriptorProto_input_type)(method_proto), - UPB_DEFTYPE_MSG); - m->output_type = _upb_DefBuilder_Resolve( - ctx, m->full_name, m->full_name, - UPB_DESC(MethodDescriptorProto_output_type)(method_proto), - UPB_DEFTYPE_MSG); + if (upb_FieldDef_IsSubMessage(f)) { + _upb_DefBuilder_Errf(ctx, + "message fields cannot have explicit defaults (%s)", + f->full_name); + } - UPB_DEF_SET_OPTIONS(m->opts, MethodDescriptorProto, MethodOptions, - method_proto); + parse_default(ctx, defaultval.data, defaultval.size, f); + f->has_default = true; + } else { + set_default_default(ctx, f); + f->has_default = false; + } } -// Allocate and initialize an array of |n| method defs belonging to |s|. -upb_MethodDef* _upb_MethodDefs_New( - upb_DefBuilder* ctx, int n, - const UPB_DESC(MethodDescriptorProto) * const* protos, upb_ServiceDef* s) { - upb_MethodDef* m = _upb_DefBuilder_Alloc(ctx, sizeof(upb_MethodDef) * n); - for (int i = 0; i < n; i++) { - create_method(ctx, protos[i], s, &m[i]); - m[i].index = i; +void _upb_FieldDef_Resolve(upb_DefBuilder* ctx, const char* prefix, + upb_FieldDef* f) { + // We have to stash this away since resolve_subdef() may overwrite it. + const UPB_DESC(FieldDescriptorProto)* field_proto = f->sub.unresolved; + + resolve_subdef(ctx, prefix, f); + resolve_default(ctx, f, field_proto); + + if (f->is_extension) { + resolve_extension(ctx, prefix, f, field_proto); } - return m; } -#include -#include -#include // Must be last. -struct upb_OneofDef { - const UPB_DESC(OneofOptions) * opts; - const upb_MessageDef* parent; - const char* full_name; - int field_count; - bool synthetic; - const upb_FieldDef** fields; - upb_strtable ntof; // lookup a field by name - upb_inttable itof; // lookup a field by number (index) -#if UINTPTR_MAX == 0xffffffff - uint32_t padding; // Increase size to a multiple of 8. -#endif +struct upb_FileDef { + const UPB_DESC(FileOptions) * opts; + const char* name; + const char* package; + const char* edition; + + const upb_FileDef** deps; + const int32_t* public_deps; + const int32_t* weak_deps; + const upb_MessageDef* top_lvl_msgs; + const upb_EnumDef* top_lvl_enums; + const upb_FieldDef* top_lvl_exts; + const upb_ServiceDef* services; + const upb_MiniTableExtension** ext_layouts; + const upb_DefPool* symtab; + + int dep_count; + int public_dep_count; + int weak_dep_count; + int top_lvl_msg_count; + int top_lvl_enum_count; + int top_lvl_ext_count; + int service_count; + int ext_count; // All exts in the file. + upb_Syntax syntax; }; -upb_OneofDef* _upb_OneofDef_At(const upb_OneofDef* o, int i) { - return (upb_OneofDef*)&o[i]; +const UPB_DESC(FileOptions) * upb_FileDef_Options(const upb_FileDef* f) { + return f->opts; } -const UPB_DESC(OneofOptions) * upb_OneofDef_Options(const upb_OneofDef* o) { - return o->opts; +bool upb_FileDef_HasOptions(const upb_FileDef* f) { + return f->opts != (void*)kUpbDefOptDefault; } -bool upb_OneofDef_HasOptions(const upb_OneofDef* o) { - return o->opts != (void*)kUpbDefOptDefault; -} +const char* upb_FileDef_Name(const upb_FileDef* f) { return f->name; } -const char* upb_OneofDef_FullName(const upb_OneofDef* o) { - return o->full_name; +const char* upb_FileDef_Package(const upb_FileDef* f) { + return f->package ? f->package : ""; } -const char* upb_OneofDef_Name(const upb_OneofDef* o) { - return _upb_DefBuilder_FullToShort(o->full_name); +const char* upb_FileDef_Edition(const upb_FileDef* f) { + return f->edition ? f->edition : ""; } -const upb_MessageDef* upb_OneofDef_ContainingType(const upb_OneofDef* o) { - return o->parent; -} +const char* _upb_FileDef_RawPackage(const upb_FileDef* f) { return f->package; } -int upb_OneofDef_FieldCount(const upb_OneofDef* o) { return o->field_count; } +upb_Syntax upb_FileDef_Syntax(const upb_FileDef* f) { return f->syntax; } -const upb_FieldDef* upb_OneofDef_Field(const upb_OneofDef* o, int i) { - UPB_ASSERT(i < o->field_count); - return o->fields[i]; +int upb_FileDef_TopLevelMessageCount(const upb_FileDef* f) { + return f->top_lvl_msg_count; } -int upb_OneofDef_numfields(const upb_OneofDef* o) { return o->field_count; } +int upb_FileDef_DependencyCount(const upb_FileDef* f) { return f->dep_count; } -uint32_t upb_OneofDef_Index(const upb_OneofDef* o) { - // Compute index in our parent's array. - return o - upb_MessageDef_Oneof(o->parent, 0); +int upb_FileDef_PublicDependencyCount(const upb_FileDef* f) { + return f->public_dep_count; } -bool upb_OneofDef_IsSynthetic(const upb_OneofDef* o) { return o->synthetic; } - -const upb_FieldDef* upb_OneofDef_LookupNameWithSize(const upb_OneofDef* o, - const char* name, - size_t size) { - upb_value val; - return upb_strtable_lookup2(&o->ntof, name, size, &val) - ? upb_value_getptr(val) - : NULL; +int upb_FileDef_WeakDependencyCount(const upb_FileDef* f) { + return f->weak_dep_count; } -const upb_FieldDef* upb_OneofDef_LookupName(const upb_OneofDef* o, - const char* name) { - return upb_OneofDef_LookupNameWithSize(o, name, strlen(name)); +const int32_t* _upb_FileDef_PublicDependencyIndexes(const upb_FileDef* f) { + return f->public_deps; } -const upb_FieldDef* upb_OneofDef_LookupNumber(const upb_OneofDef* o, - uint32_t num) { - upb_value val; - return upb_inttable_lookup(&o->itof, num, &val) ? upb_value_getptr(val) - : NULL; +const int32_t* _upb_FileDef_WeakDependencyIndexes(const upb_FileDef* f) { + return f->weak_deps; } -void _upb_OneofDef_Insert(upb_DefBuilder* ctx, upb_OneofDef* o, - const upb_FieldDef* f, const char* name, - size_t size) { - o->field_count++; - if (_upb_FieldDef_IsProto3Optional(f)) o->synthetic = true; - - const int number = upb_FieldDef_Number(f); - const upb_value v = upb_value_constptr(f); - - // TODO(salo): This lookup is unfortunate because we also perform it when - // inserting into the message's table. Unfortunately that step occurs after - // this one and moving things around could be tricky so let's leave it for - // a future refactoring. - const bool number_exists = upb_inttable_lookup(&o->itof, number, NULL); - if (UPB_UNLIKELY(number_exists)) { - _upb_DefBuilder_Errf(ctx, "oneof fields have the same number (%d)", number); - } - - // TODO(salo): More redundant work happening here. - const bool name_exists = upb_strtable_lookup2(&o->ntof, name, size, NULL); - if (UPB_UNLIKELY(name_exists)) { - _upb_DefBuilder_Errf(ctx, "oneof fields have the same name (%.*s)", - (int)size, name); - } - - const bool ok = upb_inttable_insert(&o->itof, number, v, ctx->arena) && - upb_strtable_insert(&o->ntof, name, size, v, ctx->arena); - if (UPB_UNLIKELY(!ok)) { - _upb_DefBuilder_OomErr(ctx); - } +int upb_FileDef_TopLevelEnumCount(const upb_FileDef* f) { + return f->top_lvl_enum_count; } -// Returns the synthetic count. -size_t _upb_OneofDefs_Finalize(upb_DefBuilder* ctx, upb_MessageDef* m) { - int synthetic_count = 0; - - for (int i = 0; i < upb_MessageDef_OneofCount(m); i++) { - upb_OneofDef* o = (upb_OneofDef*)upb_MessageDef_Oneof(m, i); - - if (o->synthetic && o->field_count != 1) { - _upb_DefBuilder_Errf(ctx, - "Synthetic oneofs must have one field, not %d: %s", - o->field_count, upb_OneofDef_Name(o)); - } - - if (o->synthetic) { - synthetic_count++; - } else if (synthetic_count != 0) { - _upb_DefBuilder_Errf( - ctx, "Synthetic oneofs must be after all other oneofs: %s", - upb_OneofDef_Name(o)); - } - - o->fields = - _upb_DefBuilder_Alloc(ctx, sizeof(upb_FieldDef*) * o->field_count); - o->field_count = 0; - } - - for (int i = 0; i < upb_MessageDef_FieldCount(m); i++) { - const upb_FieldDef* f = upb_MessageDef_Field(m, i); - upb_OneofDef* o = (upb_OneofDef*)upb_FieldDef_ContainingOneof(f); - if (o) { - o->fields[o->field_count++] = f; - } - } - - return synthetic_count; +int upb_FileDef_TopLevelExtensionCount(const upb_FileDef* f) { + return f->top_lvl_ext_count; } -static void create_oneofdef(upb_DefBuilder* ctx, upb_MessageDef* m, - const UPB_DESC(OneofDescriptorProto) * oneof_proto, - const upb_OneofDef* _o) { - upb_OneofDef* o = (upb_OneofDef*)_o; - upb_StringView name = UPB_DESC(OneofDescriptorProto_name)(oneof_proto); - - o->parent = m; - o->full_name = - _upb_DefBuilder_MakeFullName(ctx, upb_MessageDef_FullName(m), name); - o->field_count = 0; - o->synthetic = false; - - UPB_DEF_SET_OPTIONS(o->opts, OneofDescriptorProto, OneofOptions, oneof_proto); - - if (upb_MessageDef_FindByNameWithSize(m, name.data, name.size, NULL, NULL)) { - _upb_DefBuilder_Errf(ctx, "duplicate oneof name (%s)", o->full_name); - } - - upb_value v = _upb_DefType_Pack(o, UPB_DEFTYPE_ONEOF); - bool ok = _upb_MessageDef_Insert(m, name.data, name.size, v, ctx->arena); - if (!ok) _upb_DefBuilder_OomErr(ctx); - - ok = upb_inttable_init(&o->itof, ctx->arena); - if (!ok) _upb_DefBuilder_OomErr(ctx); +int upb_FileDef_ServiceCount(const upb_FileDef* f) { return f->service_count; } - ok = upb_strtable_init(&o->ntof, 4, ctx->arena); - if (!ok) _upb_DefBuilder_OomErr(ctx); +const upb_FileDef* upb_FileDef_Dependency(const upb_FileDef* f, int i) { + UPB_ASSERT(0 <= i && i < f->dep_count); + return f->deps[i]; } -// Allocate and initialize an array of |n| oneof defs. -upb_OneofDef* _upb_OneofDefs_New( - upb_DefBuilder* ctx, int n, - const UPB_DESC(OneofDescriptorProto) * const* protos, upb_MessageDef* m) { - _upb_DefType_CheckPadding(sizeof(upb_OneofDef)); - - upb_OneofDef* o = _upb_DefBuilder_Alloc(ctx, sizeof(upb_OneofDef) * n); - for (int i = 0; i < n; i++) { - create_oneofdef(ctx, m, protos[i], &o[i]); - } - return o; +const upb_FileDef* upb_FileDef_PublicDependency(const upb_FileDef* f, int i) { + UPB_ASSERT(0 <= i && i < f->public_dep_count); + return f->deps[f->public_deps[i]]; } - -// Must be last. - -struct upb_ServiceDef { - const UPB_DESC(ServiceOptions) * opts; - const upb_FileDef* file; - const char* full_name; - upb_MethodDef* methods; - int method_count; - int index; -}; - -upb_ServiceDef* _upb_ServiceDef_At(const upb_ServiceDef* s, int index) { - return (upb_ServiceDef*)&s[index]; +const upb_FileDef* upb_FileDef_WeakDependency(const upb_FileDef* f, int i) { + UPB_ASSERT(0 <= i && i < f->public_dep_count); + return f->deps[f->weak_deps[i]]; } -const UPB_DESC(ServiceOptions) * - upb_ServiceDef_Options(const upb_ServiceDef* s) { - return s->opts; +const upb_MessageDef* upb_FileDef_TopLevelMessage(const upb_FileDef* f, int i) { + UPB_ASSERT(0 <= i && i < f->top_lvl_msg_count); + return _upb_MessageDef_At(f->top_lvl_msgs, i); } -bool upb_ServiceDef_HasOptions(const upb_ServiceDef* s) { - return s->opts != (void*)kUpbDefOptDefault; +const upb_EnumDef* upb_FileDef_TopLevelEnum(const upb_FileDef* f, int i) { + UPB_ASSERT(0 <= i && i < f->top_lvl_enum_count); + return _upb_EnumDef_At(f->top_lvl_enums, i); } -const char* upb_ServiceDef_FullName(const upb_ServiceDef* s) { - return s->full_name; +const upb_FieldDef* upb_FileDef_TopLevelExtension(const upb_FileDef* f, int i) { + UPB_ASSERT(0 <= i && i < f->top_lvl_ext_count); + return _upb_FieldDef_At(f->top_lvl_exts, i); } -const char* upb_ServiceDef_Name(const upb_ServiceDef* s) { - return _upb_DefBuilder_FullToShort(s->full_name); +const upb_ServiceDef* upb_FileDef_Service(const upb_FileDef* f, int i) { + UPB_ASSERT(0 <= i && i < f->service_count); + return _upb_ServiceDef_At(f->services, i); } -int upb_ServiceDef_Index(const upb_ServiceDef* s) { return s->index; } - -const upb_FileDef* upb_ServiceDef_File(const upb_ServiceDef* s) { - return s->file; -} +const upb_DefPool* upb_FileDef_Pool(const upb_FileDef* f) { return f->symtab; } -int upb_ServiceDef_MethodCount(const upb_ServiceDef* s) { - return s->method_count; +const upb_MiniTableExtension* _upb_FileDef_ExtensionMiniTable( + const upb_FileDef* f, int i) { + return f->ext_layouts[i]; } -const upb_MethodDef* upb_ServiceDef_Method(const upb_ServiceDef* s, int i) { - return (i < 0 || i >= s->method_count) ? NULL - : _upb_MethodDef_At(s->methods, i); +static char* strviewdup(upb_DefBuilder* ctx, upb_StringView view) { + char* ret = upb_strdup2(view.data, view.size, _upb_DefBuilder_Arena(ctx)); + if (!ret) _upb_DefBuilder_OomErr(ctx); + return ret; } -const upb_MethodDef* upb_ServiceDef_FindMethodByName(const upb_ServiceDef* s, - const char* name) { - for (int i = 0; i < s->method_count; i++) { - const upb_MethodDef* m = _upb_MethodDef_At(s->methods, i); - if (strcmp(name, upb_MethodDef_Name(m)) == 0) { - return m; - } - } - return NULL; +static bool streql_view(upb_StringView view, const char* b) { + return view.size == strlen(b) && memcmp(view.data, b, view.size) == 0; } -static void create_service(upb_DefBuilder* ctx, - const UPB_DESC(ServiceDescriptorProto) * svc_proto, - upb_ServiceDef* s) { - upb_StringView name; +static int count_exts_in_msg(const UPB_DESC(DescriptorProto) * msg_proto) { size_t n; + UPB_DESC(DescriptorProto_extension)(msg_proto, &n); + int ext_count = n; - // Must happen before _upb_DefBuilder_Add() - s->file = _upb_DefBuilder_File(ctx); + const UPB_DESC(DescriptorProto)* const* nested_msgs = + UPB_DESC(DescriptorProto_nested_type)(msg_proto, &n); + for (size_t i = 0; i < n; i++) { + ext_count += count_exts_in_msg(nested_msgs[i]); + } - name = UPB_DESC(ServiceDescriptorProto_name)(svc_proto); - const char* package = _upb_FileDef_RawPackage(s->file); - s->full_name = _upb_DefBuilder_MakeFullName(ctx, package, name); - _upb_DefBuilder_Add(ctx, s->full_name, - _upb_DefType_Pack(s, UPB_DEFTYPE_SERVICE)); + return ext_count; +} - const UPB_DESC(MethodDescriptorProto)* const* methods = - UPB_DESC(ServiceDescriptorProto_method)(svc_proto, &n); - s->method_count = n; - s->methods = _upb_MethodDefs_New(ctx, n, methods, s); +// Allocate and initialize one file def, and add it to the context object. +void _upb_FileDef_Create(upb_DefBuilder* ctx, + const UPB_DESC(FileDescriptorProto) * file_proto) { + upb_FileDef* file = _upb_DefBuilder_Alloc(ctx, sizeof(upb_FileDef)); + ctx->file = file; - UPB_DEF_SET_OPTIONS(s->opts, ServiceDescriptorProto, ServiceOptions, - svc_proto); -} + const UPB_DESC(DescriptorProto)* const* msgs; + const UPB_DESC(EnumDescriptorProto)* const* enums; + const UPB_DESC(FieldDescriptorProto)* const* exts; + const UPB_DESC(ServiceDescriptorProto)* const* services; + const upb_StringView* strs; + const int32_t* public_deps; + const int32_t* weak_deps; + size_t n; -upb_ServiceDef* _upb_ServiceDefs_New( - upb_DefBuilder* ctx, int n, - const UPB_DESC(ServiceDescriptorProto) * const* protos) { - _upb_DefType_CheckPadding(sizeof(upb_ServiceDef)); + file->symtab = ctx->symtab; - upb_ServiceDef* s = _upb_DefBuilder_Alloc(ctx, sizeof(upb_ServiceDef) * n); - for (int i = 0; i < n; i++) { - create_service(ctx, protos[i], &s[i]); - s[i].index = i; + // Count all extensions in the file, to build a flat array of layouts. + UPB_DESC(FileDescriptorProto_extension)(file_proto, &n); + int ext_count = n; + msgs = UPB_DESC(FileDescriptorProto_message_type)(file_proto, &n); + for (size_t i = 0; i < n; i++) { + ext_count += count_exts_in_msg(msgs[i]); } - return s; -} + file->ext_count = ext_count; + if (ctx->layout) { + // We are using the ext layouts that were passed in. + file->ext_layouts = ctx->layout->exts; + if (ctx->layout->ext_count != file->ext_count) { + _upb_DefBuilder_Errf(ctx, + "Extension count did not match layout (%d vs %d)", + ctx->layout->ext_count, file->ext_count); + } + } else { + // We are building ext layouts from scratch. + file->ext_layouts = _upb_DefBuilder_Alloc( + ctx, sizeof(*file->ext_layouts) * file->ext_count); + upb_MiniTableExtension* ext = + _upb_DefBuilder_Alloc(ctx, sizeof(*ext) * file->ext_count); + for (int i = 0; i < file->ext_count; i++) { + file->ext_layouts[i] = &ext[i]; + } + } -#include + upb_StringView name = UPB_DESC(FileDescriptorProto_name)(file_proto); + file->name = strviewdup(ctx, name); + if (strlen(file->name) != name.size) { + _upb_DefBuilder_Errf(ctx, "File name contained embedded NULL"); + } + upb_StringView package = UPB_DESC(FileDescriptorProto_package)(file_proto); -// Must be last. + if (package.size) { + _upb_DefBuilder_CheckIdentFull(ctx, package); + file->package = strviewdup(ctx, package); + } else { + file->package = NULL; + } -// A few fake field types for our tables. -enum { - kUpb_FakeFieldType_FieldNotFound = 0, - kUpb_FakeFieldType_MessageSetItem = 19, -}; + upb_StringView edition = UPB_DESC(FileDescriptorProto_edition)(file_proto); -// DecodeOp: an action to be performed for a wire-type/field-type combination. -enum { - // Special ops: we don't write data to regular fields for these. - kUpb_DecodeOp_UnknownField = -1, - kUpb_DecodeOp_MessageSetItem = -2, + if (edition.size == 0) { + file->edition = NULL; + } else { + // TODO(b/267770604): How should we validate this? + file->edition = strviewdup(ctx, edition); + if (strlen(file->edition) != edition.size) { + _upb_DefBuilder_Errf(ctx, "Edition name contained embedded NULL"); + } + } - // Scalar-only ops. - kUpb_DecodeOp_Scalar1Byte = 0, - kUpb_DecodeOp_Scalar4Byte = 2, - kUpb_DecodeOp_Scalar8Byte = 3, - kUpb_DecodeOp_Enum = 1, + if (UPB_DESC(FileDescriptorProto_has_syntax)(file_proto)) { + upb_StringView syntax = UPB_DESC(FileDescriptorProto_syntax)(file_proto); - // Scalar/repeated ops. - kUpb_DecodeOp_String = 4, - kUpb_DecodeOp_Bytes = 5, - kUpb_DecodeOp_SubMessage = 6, + if (streql_view(syntax, "proto2")) { + file->syntax = kUpb_Syntax_Proto2; + } else if (streql_view(syntax, "proto3")) { + file->syntax = kUpb_Syntax_Proto3; + } else { + _upb_DefBuilder_Errf(ctx, "Invalid syntax '" UPB_STRINGVIEW_FORMAT "'", + UPB_STRINGVIEW_ARGS(syntax)); + } + } else { + file->syntax = kUpb_Syntax_Proto2; + } - // Repeated-only ops (also see macros below). - kUpb_DecodeOp_PackedEnum = 13, -}; + // Read options. + UPB_DEF_SET_OPTIONS(file->opts, FileDescriptorProto, FileOptions, file_proto); -// For packed fields it is helpful to be able to recover the lg2 of the data -// size from the op. -#define OP_FIXPCK_LG2(n) (n + 5) /* n in [2, 3] => op in [7, 8] */ -#define OP_VARPCK_LG2(n) (n + 9) /* n in [0, 2, 3] => op in [9, 11, 12] */ + // Verify dependencies. + strs = UPB_DESC(FileDescriptorProto_dependency)(file_proto, &n); + file->dep_count = n; + file->deps = _upb_DefBuilder_Alloc(ctx, sizeof(*file->deps) * n); -typedef union { - bool bool_val; - uint32_t uint32_val; - uint64_t uint64_val; - uint32_t size; -} wireval; + for (size_t i = 0; i < n; i++) { + upb_StringView str = strs[i]; + file->deps[i] = + upb_DefPool_FindFileByNameWithSize(ctx->symtab, str.data, str.size); + if (!file->deps[i]) { + _upb_DefBuilder_Errf(ctx, + "Depends on file '" UPB_STRINGVIEW_FORMAT + "', but it has not been loaded", + UPB_STRINGVIEW_ARGS(str)); + } + } -static const char* _upb_Decoder_DecodeMessage(upb_Decoder* d, const char* ptr, - upb_Message* msg, - const upb_MiniTable* layout); + public_deps = UPB_DESC(FileDescriptorProto_public_dependency)(file_proto, &n); + file->public_dep_count = n; + file->public_deps = + _upb_DefBuilder_Alloc(ctx, sizeof(*file->public_deps) * n); + int32_t* mutable_public_deps = (int32_t*)file->public_deps; + for (size_t i = 0; i < n; i++) { + if (public_deps[i] >= file->dep_count) { + _upb_DefBuilder_Errf(ctx, "public_dep %d is out of range", + (int)public_deps[i]); + } + mutable_public_deps[i] = public_deps[i]; + } -UPB_NORETURN static void* _upb_Decoder_ErrorJmp(upb_Decoder* d, - upb_DecodeStatus status) { - assert(status != kUpb_DecodeStatus_Ok); - d->status = status; - UPB_LONGJMP(d->err, 1); -} + weak_deps = UPB_DESC(FileDescriptorProto_weak_dependency)(file_proto, &n); + file->weak_dep_count = n; + file->weak_deps = _upb_DefBuilder_Alloc(ctx, sizeof(*file->weak_deps) * n); + int32_t* mutable_weak_deps = (int32_t*)file->weak_deps; + for (size_t i = 0; i < n; i++) { + if (weak_deps[i] >= file->dep_count) { + _upb_DefBuilder_Errf(ctx, "weak_dep %d is out of range", + (int)weak_deps[i]); + } + mutable_weak_deps[i] = weak_deps[i]; + } -const char* _upb_FastDecoder_ErrorJmp(upb_Decoder* d, int status) { - assert(status != kUpb_DecodeStatus_Ok); - d->status = status; - UPB_LONGJMP(d->err, 1); - return NULL; -} + // Create enums. + enums = UPB_DESC(FileDescriptorProto_enum_type)(file_proto, &n); + file->top_lvl_enum_count = n; + file->top_lvl_enums = _upb_EnumDefs_New(ctx, n, enums, NULL); -static void _upb_Decoder_VerifyUtf8(upb_Decoder* d, const char* buf, int len) { - if (!_upb_Decoder_VerifyUtf8Inline(buf, len)) { - _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_BadUtf8); - } -} + // Create extensions. + exts = UPB_DESC(FileDescriptorProto_extension)(file_proto, &n); + file->top_lvl_ext_count = n; + file->top_lvl_exts = _upb_Extensions_New(ctx, n, exts, file->package, NULL); -static bool _upb_Decoder_Reserve(upb_Decoder* d, upb_Array* arr, size_t elem) { - bool need_realloc = arr->capacity - arr->size < elem; - if (need_realloc && !_upb_array_realloc(arr, arr->size + elem, &d->arena)) { - _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); + // Create messages. + msgs = UPB_DESC(FileDescriptorProto_message_type)(file_proto, &n); + file->top_lvl_msg_count = n; + file->top_lvl_msgs = _upb_MessageDefs_New(ctx, n, msgs, NULL); + + // Create services. + services = UPB_DESC(FileDescriptorProto_service)(file_proto, &n); + file->service_count = n; + file->services = _upb_ServiceDefs_New(ctx, n, services); + + // Now that all names are in the table, build layouts and resolve refs. + + for (int i = 0; i < file->top_lvl_msg_count; i++) { + upb_MessageDef* m = (upb_MessageDef*)upb_FileDef_TopLevelMessage(file, i); + _upb_MessageDef_Resolve(ctx, m); } - return need_realloc; -} -typedef struct { - const char* ptr; - uint64_t val; -} _upb_DecodeLongVarintReturn; + for (int i = 0; i < file->top_lvl_ext_count; i++) { + upb_FieldDef* f = (upb_FieldDef*)upb_FileDef_TopLevelExtension(file, i); + _upb_FieldDef_Resolve(ctx, file->package, f); + } -UPB_NOINLINE -static _upb_DecodeLongVarintReturn _upb_Decoder_DecodeLongVarint( - const char* ptr, uint64_t val) { - _upb_DecodeLongVarintReturn ret = {NULL, 0}; - uint64_t byte; - int i; - for (i = 1; i < 10; i++) { - byte = (uint8_t)ptr[i]; - val += (byte - 1) << (i * 7); - if (!(byte & 0x80)) { - ret.ptr = ptr + i + 1; - ret.val = val; - return ret; - } + for (int i = 0; i < file->top_lvl_msg_count; i++) { + upb_MessageDef* m = (upb_MessageDef*)upb_FileDef_TopLevelMessage(file, i); + _upb_MessageDef_CreateMiniTable(ctx, (upb_MessageDef*)m); } - return ret; -} -UPB_FORCEINLINE -static const char* _upb_Decoder_DecodeVarint(upb_Decoder* d, const char* ptr, - uint64_t* val) { - uint64_t byte = (uint8_t)*ptr; - if (UPB_LIKELY((byte & 0x80) == 0)) { - *val = byte; - return ptr + 1; - } else { - _upb_DecodeLongVarintReturn res = _upb_Decoder_DecodeLongVarint(ptr, byte); - if (!res.ptr) _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); - *val = res.val; - return res.ptr; + for (int i = 0; i < file->top_lvl_ext_count; i++) { + upb_FieldDef* f = (upb_FieldDef*)upb_FileDef_TopLevelExtension(file, i); + _upb_FieldDef_BuildMiniTableExtension(ctx, f); } -} -UPB_FORCEINLINE -static const char* _upb_Decoder_DecodeTag(upb_Decoder* d, const char* ptr, - uint32_t* val) { - uint64_t byte = (uint8_t)*ptr; - if (UPB_LIKELY((byte & 0x80) == 0)) { - *val = byte; - return ptr + 1; - } else { - const char* start = ptr; - _upb_DecodeLongVarintReturn res = _upb_Decoder_DecodeLongVarint(ptr, byte); - if (!res.ptr || res.ptr - start > 5 || res.val > UINT32_MAX) { - _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); - } - *val = res.val; - return res.ptr; + for (int i = 0; i < file->top_lvl_msg_count; i++) { + upb_MessageDef* m = (upb_MessageDef*)upb_FileDef_TopLevelMessage(file, i); + _upb_MessageDef_LinkMiniTable(ctx, m); } -} -UPB_FORCEINLINE -static const char* upb_Decoder_DecodeSize(upb_Decoder* d, const char* ptr, - uint32_t* size) { - uint64_t size64; - ptr = _upb_Decoder_DecodeVarint(d, ptr, &size64); - if (size64 >= INT32_MAX || - !upb_EpsCopyInputStream_CheckSize(&d->input, ptr, (int)size64)) { - _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); + if (file->ext_count) { + bool ok = upb_ExtensionRegistry_AddArray( + _upb_DefPool_ExtReg(ctx->symtab), file->ext_layouts, file->ext_count); + if (!ok) _upb_DefBuilder_OomErr(ctx); } - *size = size64; - return ptr; } -static void _upb_Decoder_MungeInt32(wireval* val) { - if (!_upb_IsLittleEndian()) { - /* The next stage will memcpy(dst, &val, 4) */ - val->uint32_val = val->uint64_val; - } + +#include + + +// Must be last. + +bool upb_Message_HasFieldByDef(const upb_Message* msg, const upb_FieldDef* f) { + UPB_ASSERT(upb_FieldDef_HasPresence(f)); + return upb_Message_HasField(msg, upb_FieldDef_MiniTable(f)); } -static void _upb_Decoder_Munge(int type, wireval* val) { - switch (type) { - case kUpb_FieldType_Bool: - val->bool_val = val->uint64_val != 0; - break; - case kUpb_FieldType_SInt32: { - uint32_t n = val->uint64_val; - val->uint32_val = (n >> 1) ^ -(int32_t)(n & 1); - break; - } - case kUpb_FieldType_SInt64: { - uint64_t n = val->uint64_val; - val->uint64_val = (n >> 1) ^ -(int64_t)(n & 1); - break; - } - case kUpb_FieldType_Int32: - case kUpb_FieldType_UInt32: - case kUpb_FieldType_Enum: - _upb_Decoder_MungeInt32(val); - break; +const upb_FieldDef* upb_Message_WhichOneof(const upb_Message* msg, + const upb_OneofDef* o) { + const upb_FieldDef* f = upb_OneofDef_Field(o, 0); + if (upb_OneofDef_IsSynthetic(o)) { + UPB_ASSERT(upb_OneofDef_FieldCount(o) == 1); + return upb_Message_HasFieldByDef(msg, f) ? f : NULL; + } else { + const upb_MiniTableField* field = upb_FieldDef_MiniTable(f); + uint32_t oneof_case = upb_Message_WhichOneofFieldNumber(msg, field); + f = oneof_case ? upb_OneofDef_LookupNumber(o, oneof_case) : NULL; + UPB_ASSERT((f != NULL) == (oneof_case != 0)); + return f; } } -static upb_Message* _upb_Decoder_NewSubMessage(upb_Decoder* d, - const upb_MiniTableSub* subs, - const upb_MiniTableField* field, - upb_TaggedMessagePtr* target) { - const upb_MiniTable* subl = subs[field->UPB_PRIVATE(submsg_index)].submsg; - UPB_ASSERT(subl); - upb_Message* msg = _upb_Message_New(subl, &d->arena); - if (!msg) _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); - - // Extensions should not be unlinked. A message extension should not be - // registered until its sub-message type is available to be linked. - bool is_empty = subl == &_kUpb_MiniTable_Empty; - bool is_extension = field->mode & kUpb_LabelFlags_IsExtension; - UPB_ASSERT(!(is_empty && is_extension)); +upb_MessageValue upb_Message_GetFieldByDef(const upb_Message* msg, + const upb_FieldDef* f) { + upb_MessageValue default_val = upb_FieldDef_Default(f); + upb_MessageValue ret; + _upb_Message_GetField(msg, upb_FieldDef_MiniTable(f), &default_val, &ret); + return ret; +} - if (is_empty && !(d->options & kUpb_DecodeOption_ExperimentalAllowUnlinked)) { - _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_UnlinkedSubMessage); +upb_MutableMessageValue upb_Message_Mutable(upb_Message* msg, + const upb_FieldDef* f, + upb_Arena* a) { + UPB_ASSERT(upb_FieldDef_IsSubMessage(f) || upb_FieldDef_IsRepeated(f)); + if (upb_FieldDef_HasPresence(f) && !upb_Message_HasFieldByDef(msg, f)) { + // We need to skip the upb_Message_GetFieldByDef() call in this case. + goto make; } - upb_TaggedMessagePtr tagged = _upb_TaggedMessagePtr_Pack(msg, is_empty); - memcpy(target, &tagged, sizeof(tagged)); - return msg; -} - -static upb_Message* _upb_Decoder_ReuseSubMessage( - upb_Decoder* d, const upb_MiniTableSub* subs, - const upb_MiniTableField* field, upb_TaggedMessagePtr* target) { - upb_TaggedMessagePtr tagged = *target; - const upb_MiniTable* subl = subs[field->UPB_PRIVATE(submsg_index)].submsg; - UPB_ASSERT(subl); - if (!upb_TaggedMessagePtr_IsEmpty(tagged) || subl == &_kUpb_MiniTable_Empty) { - return _upb_TaggedMessagePtr_GetMessage(tagged); + upb_MessageValue val = upb_Message_GetFieldByDef(msg, f); + if (val.array_val) { + return (upb_MutableMessageValue){.array = (upb_Array*)val.array_val}; } - // We found an empty message from a previous parse that was performed before - // this field was linked. But it is linked now, so we want to allocate a new - // message of the correct type and promote data into it before continuing. - upb_Message* existing = _upb_TaggedMessagePtr_GetEmptyMessage(tagged); - upb_Message* promoted = _upb_Decoder_NewSubMessage(d, subs, field, target); - size_t size; - const char* unknown = upb_Message_GetUnknown(existing, &size); - upb_DecodeStatus status = upb_Decode(unknown, size, promoted, subl, d->extreg, - d->options, &d->arena); - if (status != kUpb_DecodeStatus_Ok) _upb_Decoder_ErrorJmp(d, status); - return promoted; -} + upb_MutableMessageValue ret; +make: + if (!a) return (upb_MutableMessageValue){.array = NULL}; + if (upb_FieldDef_IsMap(f)) { + const upb_MessageDef* entry = upb_FieldDef_MessageSubDef(f); + const upb_FieldDef* key = + upb_MessageDef_FindFieldByNumber(entry, kUpb_MapEntry_KeyFieldNumber); + const upb_FieldDef* value = + upb_MessageDef_FindFieldByNumber(entry, kUpb_MapEntry_ValueFieldNumber); + ret.map = + upb_Map_New(a, upb_FieldDef_CType(key), upb_FieldDef_CType(value)); + } else if (upb_FieldDef_IsRepeated(f)) { + ret.array = upb_Array_New(a, upb_FieldDef_CType(f)); + } else { + UPB_ASSERT(upb_FieldDef_IsSubMessage(f)); + const upb_MessageDef* m = upb_FieldDef_MessageSubDef(f); + ret.msg = upb_Message_New(upb_MessageDef_MiniTable(m), a); + } -static const char* _upb_Decoder_ReadString(upb_Decoder* d, const char* ptr, - int size, upb_StringView* str) { - const char* str_ptr = ptr; - ptr = upb_EpsCopyInputStream_ReadString(&d->input, &str_ptr, size, &d->arena); - if (!ptr) _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); - str->data = str_ptr; - str->size = size; - return ptr; -} + val.array_val = ret.array; + upb_Message_SetFieldByDef(msg, f, val, a); -UPB_FORCEINLINE -static const char* _upb_Decoder_RecurseSubMessage(upb_Decoder* d, - const char* ptr, - upb_Message* submsg, - const upb_MiniTable* subl, - uint32_t expected_end_group) { - if (--d->depth < 0) { - _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_MaxDepthExceeded); - } - ptr = _upb_Decoder_DecodeMessage(d, ptr, submsg, subl); - d->depth++; - if (d->end_group != expected_end_group) { - _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); - } - return ptr; + return ret; } -UPB_FORCEINLINE -static const char* _upb_Decoder_DecodeSubMessage( - upb_Decoder* d, const char* ptr, upb_Message* submsg, - const upb_MiniTableSub* subs, const upb_MiniTableField* field, int size) { - int saved_delta = upb_EpsCopyInputStream_PushLimit(&d->input, ptr, size); - const upb_MiniTable* subl = subs[field->UPB_PRIVATE(submsg_index)].submsg; - UPB_ASSERT(subl); - ptr = _upb_Decoder_RecurseSubMessage(d, ptr, submsg, subl, DECODE_NOGROUP); - upb_EpsCopyInputStream_PopLimit(&d->input, ptr, saved_delta); - return ptr; +bool upb_Message_SetFieldByDef(upb_Message* msg, const upb_FieldDef* f, + upb_MessageValue val, upb_Arena* a) { + return _upb_Message_SetField(msg, upb_FieldDef_MiniTable(f), &val, a); } -UPB_FORCEINLINE -static const char* _upb_Decoder_DecodeGroup(upb_Decoder* d, const char* ptr, - upb_Message* submsg, - const upb_MiniTable* subl, - uint32_t number) { - if (_upb_Decoder_IsDone(d, &ptr)) { - _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); - } - ptr = _upb_Decoder_RecurseSubMessage(d, ptr, submsg, subl, number); - d->end_group = DECODE_NOGROUP; - return ptr; +void upb_Message_ClearFieldByDef(upb_Message* msg, const upb_FieldDef* f) { + upb_Message_ClearField(msg, upb_FieldDef_MiniTable(f)); } -UPB_FORCEINLINE -static const char* _upb_Decoder_DecodeUnknownGroup(upb_Decoder* d, - const char* ptr, - uint32_t number) { - return _upb_Decoder_DecodeGroup(d, ptr, NULL, NULL, number); +void upb_Message_ClearByDef(upb_Message* msg, const upb_MessageDef* m) { + upb_Message_Clear(msg, upb_MessageDef_MiniTable(m)); } -UPB_FORCEINLINE -static const char* _upb_Decoder_DecodeKnownGroup( - upb_Decoder* d, const char* ptr, upb_Message* submsg, - const upb_MiniTableSub* subs, const upb_MiniTableField* field) { - const upb_MiniTable* subl = subs[field->UPB_PRIVATE(submsg_index)].submsg; - UPB_ASSERT(subl); - return _upb_Decoder_DecodeGroup(d, ptr, submsg, subl, field->number); -} +bool upb_Message_Next(const upb_Message* msg, const upb_MessageDef* m, + const upb_DefPool* ext_pool, const upb_FieldDef** out_f, + upb_MessageValue* out_val, size_t* iter) { + size_t i = *iter; + size_t n = upb_MessageDef_FieldCount(m); + UPB_UNUSED(ext_pool); -static char* upb_Decoder_EncodeVarint32(uint32_t val, char* ptr) { - do { - uint8_t byte = val & 0x7fU; - val >>= 7; - if (val) byte |= 0x80U; - *(ptr++) = byte; - } while (val); - return ptr; -} + // Iterate over normal fields, returning the first one that is set. + while (++i < n) { + const upb_FieldDef* f = upb_MessageDef_Field(m, i); + const upb_MiniTableField* field = upb_FieldDef_MiniTable(f); + upb_MessageValue val = upb_Message_GetFieldByDef(msg, f); -static void _upb_Decoder_AddUnknownVarints(upb_Decoder* d, upb_Message* msg, - uint32_t val1, uint32_t val2) { - char buf[20]; - char* end = buf; - end = upb_Decoder_EncodeVarint32(val1, end); - end = upb_Decoder_EncodeVarint32(val2, end); + // Skip field if unset or empty. + if (upb_MiniTableField_HasPresence(field)) { + if (!upb_Message_HasFieldByDef(msg, f)) continue; + } else { + switch (upb_FieldMode_Get(field)) { + case kUpb_FieldMode_Map: + if (!val.map_val || upb_Map_Size(val.map_val) == 0) continue; + break; + case kUpb_FieldMode_Array: + if (!val.array_val || upb_Array_Size(val.array_val) == 0) continue; + break; + case kUpb_FieldMode_Scalar: + if (!_upb_MiniTable_ValueIsNonZero(&val, field)) continue; + break; + } + } - if (!_upb_Message_AddUnknown(msg, buf, end - buf, &d->arena)) { - _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); + *out_val = val; + *out_f = f; + *iter = i; + return true; } -} -UPB_NOINLINE -static bool _upb_Decoder_CheckEnumSlow(upb_Decoder* d, const char* ptr, - upb_Message* msg, - const upb_MiniTableEnum* e, - const upb_MiniTableField* field, - uint32_t v) { - if (_upb_MiniTable_CheckEnumValueSlow(e, v)) return true; + if (ext_pool) { + // Return any extensions that are set. + size_t count; + const upb_Message_Extension* ext = _upb_Message_Getexts(msg, &count); + if (i - n < count) { + ext += count - 1 - (i - n); + memcpy(out_val, &ext->data, sizeof(*out_val)); + *out_f = upb_DefPool_FindExtensionByMiniTable(ext_pool, ext->ext); + *iter = i; + return true; + } + } - // Unrecognized enum goes into unknown fields. - // For packed fields the tag could be arbitrarily far in the past, so we - // just re-encode the tag and value here. - uint32_t tag = ((uint32_t)field->number << 3) | kUpb_WireType_Varint; - upb_Message* unknown_msg = - field->mode & kUpb_LabelFlags_IsExtension ? d->unknown_msg : msg; - _upb_Decoder_AddUnknownVarints(d, unknown_msg, tag, v); + *iter = i; return false; } -UPB_FORCEINLINE -static bool _upb_Decoder_CheckEnum(upb_Decoder* d, const char* ptr, - upb_Message* msg, const upb_MiniTableEnum* e, - const upb_MiniTableField* field, - wireval* val) { - uint32_t v = val->uint32_val; +bool _upb_Message_DiscardUnknown(upb_Message* msg, const upb_MessageDef* m, + int depth) { + size_t iter = kUpb_Message_Begin; + const upb_FieldDef* f; + upb_MessageValue val; + bool ret = true; - _kUpb_FastEnumCheck_Status status = _upb_MiniTable_CheckEnumValueFast(e, v); - if (UPB_LIKELY(status == _kUpb_FastEnumCheck_ValueIsInEnum)) return true; - return _upb_Decoder_CheckEnumSlow(d, ptr, msg, e, field, v); -} + if (--depth == 0) return false; -UPB_NOINLINE -static const char* _upb_Decoder_DecodeEnumArray(upb_Decoder* d, const char* ptr, - upb_Message* msg, - upb_Array* arr, - const upb_MiniTableSub* subs, - const upb_MiniTableField* field, - wireval* val) { - const upb_MiniTableEnum* e = subs[field->UPB_PRIVATE(submsg_index)].subenum; - if (!_upb_Decoder_CheckEnum(d, ptr, msg, e, field, val)) return ptr; - void* mem = UPB_PTR_AT(_upb_array_ptr(arr), arr->size * 4, void); - arr->size++; - memcpy(mem, val, 4); - return ptr; -} + _upb_Message_DiscardUnknown_shallow(msg); -UPB_FORCEINLINE -static const char* _upb_Decoder_DecodeFixedPacked( - upb_Decoder* d, const char* ptr, upb_Array* arr, wireval* val, - const upb_MiniTableField* field, int lg2) { - int mask = (1 << lg2) - 1; - size_t count = val->size >> lg2; - if ((val->size & mask) != 0) { - // Length isn't a round multiple of elem size. - _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); - } - _upb_Decoder_Reserve(d, arr, count); - void* mem = UPB_PTR_AT(_upb_array_ptr(arr), arr->size << lg2, void); - arr->size += count; - // Note: if/when the decoder supports multi-buffer input, we will need to - // handle buffer seams here. - if (_upb_IsLittleEndian()) { - ptr = upb_EpsCopyInputStream_Copy(&d->input, ptr, mem, val->size); - } else { - int delta = upb_EpsCopyInputStream_PushLimit(&d->input, ptr, val->size); - char* dst = mem; - while (!_upb_Decoder_IsDone(d, &ptr)) { - if (lg2 == 2) { - ptr = upb_WireReader_ReadFixed32(ptr, dst); - dst += 4; - } else { - UPB_ASSERT(lg2 == 3); - ptr = upb_WireReader_ReadFixed64(ptr, dst); - dst += 8; + while (upb_Message_Next(msg, m, NULL /*ext_pool*/, &f, &val, &iter)) { + const upb_MessageDef* subm = upb_FieldDef_MessageSubDef(f); + if (!subm) continue; + if (upb_FieldDef_IsMap(f)) { + const upb_FieldDef* val_f = upb_MessageDef_FindFieldByNumber(subm, 2); + const upb_MessageDef* val_m = upb_FieldDef_MessageSubDef(val_f); + upb_Map* map = (upb_Map*)val.map_val; + size_t iter = kUpb_Map_Begin; + + if (!val_m) continue; + + upb_MessageValue map_key, map_val; + while (upb_Map_Next(map, &map_key, &map_val, &iter)) { + if (!_upb_Message_DiscardUnknown((upb_Message*)map_val.msg_val, val_m, + depth)) { + ret = false; + } + } + } else if (upb_FieldDef_IsRepeated(f)) { + const upb_Array* arr = val.array_val; + size_t i, n = upb_Array_Size(arr); + for (i = 0; i < n; i++) { + upb_MessageValue elem = upb_Array_Get(arr, i); + if (!_upb_Message_DiscardUnknown((upb_Message*)elem.msg_val, subm, + depth)) { + ret = false; + } + } + } else { + if (!_upb_Message_DiscardUnknown((upb_Message*)val.msg_val, subm, + depth)) { + ret = false; } } - upb_EpsCopyInputStream_PopLimit(&d->input, ptr, delta); } - return ptr; + return ret; } -UPB_FORCEINLINE -static const char* _upb_Decoder_DecodeVarintPacked( - upb_Decoder* d, const char* ptr, upb_Array* arr, wireval* val, - const upb_MiniTableField* field, int lg2) { - int scale = 1 << lg2; - int saved_limit = upb_EpsCopyInputStream_PushLimit(&d->input, ptr, val->size); - char* out = UPB_PTR_AT(_upb_array_ptr(arr), arr->size << lg2, void); - while (!_upb_Decoder_IsDone(d, &ptr)) { - wireval elem; - ptr = _upb_Decoder_DecodeVarint(d, ptr, &elem.uint64_val); - _upb_Decoder_Munge(field->UPB_PRIVATE(descriptortype), &elem); - if (_upb_Decoder_Reserve(d, arr, 1)) { - out = UPB_PTR_AT(_upb_array_ptr(arr), arr->size << lg2, void); - } - arr->size++; - memcpy(out, &elem, scale); - out += scale; - } - upb_EpsCopyInputStream_PopLimit(&d->input, ptr, saved_limit); - return ptr; +bool upb_Message_DiscardUnknown(upb_Message* msg, const upb_MessageDef* m, + int maxdepth) { + return _upb_Message_DiscardUnknown(msg, m, maxdepth); } -UPB_NOINLINE -static const char* _upb_Decoder_DecodeEnumPacked( - upb_Decoder* d, const char* ptr, upb_Message* msg, upb_Array* arr, - const upb_MiniTableSub* subs, const upb_MiniTableField* field, - wireval* val) { - const upb_MiniTableEnum* e = subs[field->UPB_PRIVATE(submsg_index)].subenum; - int saved_limit = upb_EpsCopyInputStream_PushLimit(&d->input, ptr, val->size); - char* out = UPB_PTR_AT(_upb_array_ptr(arr), arr->size * 4, void); - while (!_upb_Decoder_IsDone(d, &ptr)) { - wireval elem; - ptr = _upb_Decoder_DecodeVarint(d, ptr, &elem.uint64_val); - _upb_Decoder_MungeInt32(&elem); - if (!_upb_Decoder_CheckEnum(d, ptr, msg, e, field, &elem)) { - continue; - } - if (_upb_Decoder_Reserve(d, arr, 1)) { - out = UPB_PTR_AT(_upb_array_ptr(arr), arr->size * 4, void); - } - arr->size++; - memcpy(out, &elem, 4); - out += 4; - } - upb_EpsCopyInputStream_PopLimit(&d->input, ptr, saved_limit); - return ptr; -} -upb_Array* _upb_Decoder_CreateArray(upb_Decoder* d, - const upb_MiniTableField* field) { - /* Maps descriptor type -> elem_size_lg2. */ - static const uint8_t kElemSizeLg2[] = { - [0] = -1, // invalid descriptor type - [kUpb_FieldType_Double] = 3, - [kUpb_FieldType_Float] = 2, - [kUpb_FieldType_Int64] = 3, - [kUpb_FieldType_UInt64] = 3, - [kUpb_FieldType_Int32] = 2, - [kUpb_FieldType_Fixed64] = 3, - [kUpb_FieldType_Fixed32] = 2, - [kUpb_FieldType_Bool] = 0, - [kUpb_FieldType_String] = UPB_SIZE(3, 4), - [kUpb_FieldType_Group] = UPB_SIZE(2, 3), - [kUpb_FieldType_Message] = UPB_SIZE(2, 3), - [kUpb_FieldType_Bytes] = UPB_SIZE(3, 4), - [kUpb_FieldType_UInt32] = 2, - [kUpb_FieldType_Enum] = 2, - [kUpb_FieldType_SFixed32] = 2, - [kUpb_FieldType_SFixed64] = 3, - [kUpb_FieldType_SInt32] = 2, - [kUpb_FieldType_SInt64] = 3, - }; - size_t lg2 = kElemSizeLg2[field->UPB_PRIVATE(descriptortype)]; - upb_Array* ret = _upb_Array_New(&d->arena, 4, lg2); - if (!ret) _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); - return ret; -} +// Must be last. -static const char* _upb_Decoder_DecodeToArray(upb_Decoder* d, const char* ptr, - upb_Message* msg, - const upb_MiniTableSub* subs, - const upb_MiniTableField* field, - wireval* val, int op) { - upb_Array** arrp = UPB_PTR_AT(msg, field->offset, void); - upb_Array* arr = *arrp; - void* mem; +struct upb_MessageDef { + const UPB_DESC(MessageOptions) * opts; + const upb_MiniTable* layout; + const upb_FileDef* file; + const upb_MessageDef* containing_type; + const char* full_name; - if (arr) { - _upb_Decoder_Reserve(d, arr, 1); + // Tables for looking up fields by number and name. + upb_inttable itof; + upb_strtable ntof; + + /* All nested defs. + * MEM: We could save some space here by putting nested defs in a contiguous + * region and calculating counts from offsets or vice-versa. */ + const upb_FieldDef* fields; + const upb_OneofDef* oneofs; + const upb_ExtensionRange* ext_ranges; + const upb_StringView* res_names; + const upb_MessageDef* nested_msgs; + const upb_MessageReservedRange* res_ranges; + const upb_EnumDef* nested_enums; + const upb_FieldDef* nested_exts; + + // TODO(salo): These counters don't need anywhere near 32 bits. + int field_count; + int real_oneof_count; + int oneof_count; + int ext_range_count; + int res_range_count; + int res_name_count; + int nested_msg_count; + int nested_enum_count; + int nested_ext_count; + bool in_message_set; + bool is_sorted; + upb_WellKnown well_known_type; +#if UINTPTR_MAX == 0xffffffff + uint32_t padding; // Increase size to a multiple of 8. +#endif +}; + +static void assign_msg_wellknowntype(upb_MessageDef* m) { + const char* name = m->full_name; + if (name == NULL) { + m->well_known_type = kUpb_WellKnown_Unspecified; + return; + } + if (!strcmp(name, "google.protobuf.Any")) { + m->well_known_type = kUpb_WellKnown_Any; + } else if (!strcmp(name, "google.protobuf.FieldMask")) { + m->well_known_type = kUpb_WellKnown_FieldMask; + } else if (!strcmp(name, "google.protobuf.Duration")) { + m->well_known_type = kUpb_WellKnown_Duration; + } else if (!strcmp(name, "google.protobuf.Timestamp")) { + m->well_known_type = kUpb_WellKnown_Timestamp; + } else if (!strcmp(name, "google.protobuf.DoubleValue")) { + m->well_known_type = kUpb_WellKnown_DoubleValue; + } else if (!strcmp(name, "google.protobuf.FloatValue")) { + m->well_known_type = kUpb_WellKnown_FloatValue; + } else if (!strcmp(name, "google.protobuf.Int64Value")) { + m->well_known_type = kUpb_WellKnown_Int64Value; + } else if (!strcmp(name, "google.protobuf.UInt64Value")) { + m->well_known_type = kUpb_WellKnown_UInt64Value; + } else if (!strcmp(name, "google.protobuf.Int32Value")) { + m->well_known_type = kUpb_WellKnown_Int32Value; + } else if (!strcmp(name, "google.protobuf.UInt32Value")) { + m->well_known_type = kUpb_WellKnown_UInt32Value; + } else if (!strcmp(name, "google.protobuf.BoolValue")) { + m->well_known_type = kUpb_WellKnown_BoolValue; + } else if (!strcmp(name, "google.protobuf.StringValue")) { + m->well_known_type = kUpb_WellKnown_StringValue; + } else if (!strcmp(name, "google.protobuf.BytesValue")) { + m->well_known_type = kUpb_WellKnown_BytesValue; + } else if (!strcmp(name, "google.protobuf.Value")) { + m->well_known_type = kUpb_WellKnown_Value; + } else if (!strcmp(name, "google.protobuf.ListValue")) { + m->well_known_type = kUpb_WellKnown_ListValue; + } else if (!strcmp(name, "google.protobuf.Struct")) { + m->well_known_type = kUpb_WellKnown_Struct; } else { - arr = _upb_Decoder_CreateArray(d, field); - *arrp = arr; + m->well_known_type = kUpb_WellKnown_Unspecified; } +} - switch (op) { - case kUpb_DecodeOp_Scalar1Byte: - case kUpb_DecodeOp_Scalar4Byte: - case kUpb_DecodeOp_Scalar8Byte: - /* Append scalar value. */ - mem = UPB_PTR_AT(_upb_array_ptr(arr), arr->size << op, void); - arr->size++; - memcpy(mem, val, 1 << op); - return ptr; - case kUpb_DecodeOp_String: - _upb_Decoder_VerifyUtf8(d, ptr, val->size); - /* Fallthrough. */ - case kUpb_DecodeOp_Bytes: { - /* Append bytes. */ - upb_StringView* str = (upb_StringView*)_upb_array_ptr(arr) + arr->size; - arr->size++; - return _upb_Decoder_ReadString(d, ptr, val->size, str); - } - case kUpb_DecodeOp_SubMessage: { - /* Append submessage / group. */ - upb_TaggedMessagePtr* target = UPB_PTR_AT( - _upb_array_ptr(arr), arr->size * sizeof(void*), upb_TaggedMessagePtr); - upb_Message* submsg = _upb_Decoder_NewSubMessage(d, subs, field, target); - arr->size++; - if (UPB_UNLIKELY(field->UPB_PRIVATE(descriptortype) == - kUpb_FieldType_Group)) { - return _upb_Decoder_DecodeKnownGroup(d, ptr, submsg, subs, field); - } else { - return _upb_Decoder_DecodeSubMessage(d, ptr, submsg, subs, field, - val->size); - } +upb_MessageDef* _upb_MessageDef_At(const upb_MessageDef* m, int i) { + return (upb_MessageDef*)&m[i]; +} + +bool _upb_MessageDef_IsValidExtensionNumber(const upb_MessageDef* m, int n) { + for (int i = 0; i < m->ext_range_count; i++) { + const upb_ExtensionRange* r = upb_MessageDef_ExtensionRange(m, i); + if (upb_ExtensionRange_Start(r) <= n && n < upb_ExtensionRange_End(r)) { + return true; } - case OP_FIXPCK_LG2(2): - case OP_FIXPCK_LG2(3): - return _upb_Decoder_DecodeFixedPacked(d, ptr, arr, val, field, - op - OP_FIXPCK_LG2(0)); - case OP_VARPCK_LG2(0): - case OP_VARPCK_LG2(2): - case OP_VARPCK_LG2(3): - return _upb_Decoder_DecodeVarintPacked(d, ptr, arr, val, field, - op - OP_VARPCK_LG2(0)); - case kUpb_DecodeOp_Enum: - return _upb_Decoder_DecodeEnumArray(d, ptr, msg, arr, subs, field, val); - case kUpb_DecodeOp_PackedEnum: - return _upb_Decoder_DecodeEnumPacked(d, ptr, msg, arr, subs, field, val); - default: - UPB_UNREACHABLE(); } + return false; } -upb_Map* _upb_Decoder_CreateMap(upb_Decoder* d, const upb_MiniTable* entry) { - /* Maps descriptor type -> upb map size. */ - static const uint8_t kSizeInMap[] = { - [0] = -1, // invalid descriptor type */ - [kUpb_FieldType_Double] = 8, - [kUpb_FieldType_Float] = 4, - [kUpb_FieldType_Int64] = 8, - [kUpb_FieldType_UInt64] = 8, - [kUpb_FieldType_Int32] = 4, - [kUpb_FieldType_Fixed64] = 8, - [kUpb_FieldType_Fixed32] = 4, - [kUpb_FieldType_Bool] = 1, - [kUpb_FieldType_String] = UPB_MAPTYPE_STRING, - [kUpb_FieldType_Group] = sizeof(void*), - [kUpb_FieldType_Message] = sizeof(void*), - [kUpb_FieldType_Bytes] = UPB_MAPTYPE_STRING, - [kUpb_FieldType_UInt32] = 4, - [kUpb_FieldType_Enum] = 4, - [kUpb_FieldType_SFixed32] = 4, - [kUpb_FieldType_SFixed64] = 8, - [kUpb_FieldType_SInt32] = 4, - [kUpb_FieldType_SInt64] = 8, - }; +const UPB_DESC(MessageOptions) * + upb_MessageDef_Options(const upb_MessageDef* m) { + return m->opts; +} - const upb_MiniTableField* key_field = &entry->fields[0]; - const upb_MiniTableField* val_field = &entry->fields[1]; - char key_size = kSizeInMap[key_field->UPB_PRIVATE(descriptortype)]; - char val_size = kSizeInMap[val_field->UPB_PRIVATE(descriptortype)]; - UPB_ASSERT(key_field->offset == offsetof(upb_MapEntryData, k)); - UPB_ASSERT(val_field->offset == offsetof(upb_MapEntryData, v)); - upb_Map* ret = _upb_Map_New(&d->arena, key_size, val_size); - if (!ret) _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); - return ret; +bool upb_MessageDef_HasOptions(const upb_MessageDef* m) { + return m->opts != (void*)kUpbDefOptDefault; } -static const char* _upb_Decoder_DecodeToMap(upb_Decoder* d, const char* ptr, - upb_Message* msg, - const upb_MiniTableSub* subs, - const upb_MiniTableField* field, - wireval* val) { - upb_Map** map_p = UPB_PTR_AT(msg, field->offset, upb_Map*); - upb_Map* map = *map_p; - upb_MapEntry ent; - UPB_ASSERT(upb_MiniTableField_Type(field) == kUpb_FieldType_Message); - const upb_MiniTable* entry = subs[field->UPB_PRIVATE(submsg_index)].submsg; +const char* upb_MessageDef_FullName(const upb_MessageDef* m) { + return m->full_name; +} - UPB_ASSERT(entry); - UPB_ASSERT(entry->field_count == 2); - UPB_ASSERT(!upb_IsRepeatedOrMap(&entry->fields[0])); - UPB_ASSERT(!upb_IsRepeatedOrMap(&entry->fields[1])); +const upb_FileDef* upb_MessageDef_File(const upb_MessageDef* m) { + return m->file; +} - if (!map) { - map = _upb_Decoder_CreateMap(d, entry); - *map_p = map; - } +const upb_MessageDef* upb_MessageDef_ContainingType(const upb_MessageDef* m) { + return m->containing_type; +} - // Parse map entry. - memset(&ent, 0, sizeof(ent)); +const char* upb_MessageDef_Name(const upb_MessageDef* m) { + return _upb_DefBuilder_FullToShort(m->full_name); +} - if (entry->fields[1].UPB_PRIVATE(descriptortype) == kUpb_FieldType_Message || - entry->fields[1].UPB_PRIVATE(descriptortype) == kUpb_FieldType_Group) { - // Create proactively to handle the case where it doesn't appear. - upb_TaggedMessagePtr msg; - _upb_Decoder_NewSubMessage(d, entry->subs, &entry->fields[1], &msg); - ent.data.v.val = upb_value_uintptr(msg); - } +upb_Syntax upb_MessageDef_Syntax(const upb_MessageDef* m) { + return upb_FileDef_Syntax(m->file); +} - ptr = - _upb_Decoder_DecodeSubMessage(d, ptr, &ent.data, subs, field, val->size); - // check if ent had any unknown fields - size_t size; - upb_Message_GetUnknown(&ent.data, &size); - if (size != 0) { - char* buf; - size_t size; - uint32_t tag = ((uint32_t)field->number << 3) | kUpb_WireType_Delimited; - upb_EncodeStatus status = - upb_Encode(&ent.data, entry, 0, &d->arena, &buf, &size); - if (status != kUpb_EncodeStatus_Ok) { - _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); - } - _upb_Decoder_AddUnknownVarints(d, msg, tag, size); - if (!_upb_Message_AddUnknown(msg, buf, size, &d->arena)) { - _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); - } - } else { - if (_upb_Map_Insert(map, &ent.data.k, map->key_size, &ent.data.v, - map->val_size, - &d->arena) == kUpb_MapInsertStatus_OutOfMemory) { - _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); - } - } - return ptr; +const upb_FieldDef* upb_MessageDef_FindFieldByNumber(const upb_MessageDef* m, + uint32_t i) { + upb_value val; + return upb_inttable_lookup(&m->itof, i, &val) ? upb_value_getconstptr(val) + : NULL; } -static const char* _upb_Decoder_DecodeToSubMessage( - upb_Decoder* d, const char* ptr, upb_Message* msg, - const upb_MiniTableSub* subs, const upb_MiniTableField* field, wireval* val, - int op) { - void* mem = UPB_PTR_AT(msg, field->offset, void); - int type = field->UPB_PRIVATE(descriptortype); +const upb_FieldDef* upb_MessageDef_FindFieldByNameWithSize( + const upb_MessageDef* m, const char* name, size_t size) { + upb_value val; - if (UPB_UNLIKELY(op == kUpb_DecodeOp_Enum) && - !_upb_Decoder_CheckEnum(d, ptr, msg, - subs[field->UPB_PRIVATE(submsg_index)].subenum, - field, val)) { - return ptr; + if (!upb_strtable_lookup2(&m->ntof, name, size, &val)) { + return NULL; } - /* Set presence if necessary. */ - if (field->presence > 0) { - _upb_sethas_field(msg, field); - } else if (field->presence < 0) { - /* Oneof case */ - uint32_t* oneof_case = _upb_oneofcase_field(msg, field); - if (op == kUpb_DecodeOp_SubMessage && *oneof_case != field->number) { - memset(mem, 0, sizeof(void*)); - } - *oneof_case = field->number; - } + return _upb_DefType_Unpack(val, UPB_DEFTYPE_FIELD); +} - /* Store into message. */ - switch (op) { - case kUpb_DecodeOp_SubMessage: { - upb_TaggedMessagePtr* submsgp = mem; - upb_Message* submsg; - if (*submsgp) { - submsg = _upb_Decoder_ReuseSubMessage(d, subs, field, submsgp); - } else { - submsg = _upb_Decoder_NewSubMessage(d, subs, field, submsgp); - } - if (UPB_UNLIKELY(type == kUpb_FieldType_Group)) { - ptr = _upb_Decoder_DecodeKnownGroup(d, ptr, submsg, subs, field); - } else { - ptr = _upb_Decoder_DecodeSubMessage(d, ptr, submsg, subs, field, - val->size); - } - break; - } - case kUpb_DecodeOp_String: - _upb_Decoder_VerifyUtf8(d, ptr, val->size); - /* Fallthrough. */ - case kUpb_DecodeOp_Bytes: - return _upb_Decoder_ReadString(d, ptr, val->size, mem); - case kUpb_DecodeOp_Scalar8Byte: - memcpy(mem, val, 8); - break; - case kUpb_DecodeOp_Enum: - case kUpb_DecodeOp_Scalar4Byte: - memcpy(mem, val, 4); - break; - case kUpb_DecodeOp_Scalar1Byte: - memcpy(mem, val, 1); - break; - default: - UPB_UNREACHABLE(); +const upb_OneofDef* upb_MessageDef_FindOneofByNameWithSize( + const upb_MessageDef* m, const char* name, size_t size) { + upb_value val; + + if (!upb_strtable_lookup2(&m->ntof, name, size, &val)) { + return NULL; } - return ptr; + return _upb_DefType_Unpack(val, UPB_DEFTYPE_ONEOF); } -UPB_NOINLINE -const char* _upb_Decoder_CheckRequired(upb_Decoder* d, const char* ptr, - const upb_Message* msg, - const upb_MiniTable* l) { - assert(l->required_count); - if (UPB_LIKELY((d->options & kUpb_DecodeOption_CheckRequired) == 0)) { - return ptr; - } - uint64_t msg_head; - memcpy(&msg_head, msg, 8); - msg_head = _upb_BigEndian_Swap64(msg_head); - if (upb_MiniTable_requiredmask(l) & ~msg_head) { - d->missing_required = true; - } - return ptr; +bool _upb_MessageDef_Insert(upb_MessageDef* m, const char* name, size_t len, + upb_value v, upb_Arena* a) { + return upb_strtable_insert(&m->ntof, name, len, v, a); } -UPB_FORCEINLINE -static bool _upb_Decoder_TryFastDispatch(upb_Decoder* d, const char** ptr, - upb_Message* msg, - const upb_MiniTable* layout) { -#if UPB_FASTTABLE - if (layout && layout->table_mask != (unsigned char)-1) { - uint16_t tag = _upb_FastDecoder_LoadTag(*ptr); - intptr_t table = decode_totable(layout); - *ptr = _upb_FastDecoder_TagDispatch(d, *ptr, msg, table, 0, tag); - return true; +bool upb_MessageDef_FindByNameWithSize(const upb_MessageDef* m, + const char* name, size_t len, + const upb_FieldDef** out_f, + const upb_OneofDef** out_o) { + upb_value val; + + if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) { + return false; } -#endif - return false; + + const upb_FieldDef* f = _upb_DefType_Unpack(val, UPB_DEFTYPE_FIELD); + const upb_OneofDef* o = _upb_DefType_Unpack(val, UPB_DEFTYPE_ONEOF); + if (out_f) *out_f = f; + if (out_o) *out_o = o; + return f || o; /* False if this was a JSON name. */ } -static const char* upb_Decoder_SkipField(upb_Decoder* d, const char* ptr, - uint32_t tag) { - int field_number = tag >> 3; - int wire_type = tag & 7; - switch (wire_type) { - case kUpb_WireType_Varint: { - uint64_t val; - return _upb_Decoder_DecodeVarint(d, ptr, &val); - } - case kUpb_WireType_64Bit: - return ptr + 8; - case kUpb_WireType_32Bit: - return ptr + 4; - case kUpb_WireType_Delimited: { - uint32_t size; - ptr = upb_Decoder_DecodeSize(d, ptr, &size); - return ptr + size; - } - case kUpb_WireType_StartGroup: - return _upb_Decoder_DecodeUnknownGroup(d, ptr, field_number); - default: - _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); +const upb_FieldDef* upb_MessageDef_FindByJsonNameWithSize( + const upb_MessageDef* m, const char* name, size_t size) { + upb_value val; + const upb_FieldDef* f; + + if (!upb_strtable_lookup2(&m->ntof, name, size, &val)) { + return NULL; } + + f = _upb_DefType_Unpack(val, UPB_DEFTYPE_FIELD); + if (!f) f = _upb_DefType_Unpack(val, UPB_DEFTYPE_FIELD_JSONNAME); + + return f; } -enum { - kStartItemTag = ((kUpb_MsgSet_Item << 3) | kUpb_WireType_StartGroup), - kEndItemTag = ((kUpb_MsgSet_Item << 3) | kUpb_WireType_EndGroup), - kTypeIdTag = ((kUpb_MsgSet_TypeId << 3) | kUpb_WireType_Varint), - kMessageTag = ((kUpb_MsgSet_Message << 3) | kUpb_WireType_Delimited), -}; +int upb_MessageDef_ExtensionRangeCount(const upb_MessageDef* m) { + return m->ext_range_count; +} -static void upb_Decoder_AddKnownMessageSetItem( - upb_Decoder* d, upb_Message* msg, const upb_MiniTableExtension* item_mt, - const char* data, uint32_t size) { - upb_Message_Extension* ext = - _upb_Message_GetOrCreateExtension(msg, item_mt, &d->arena); - if (UPB_UNLIKELY(!ext)) { - _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); - } - upb_Message* submsg = _upb_Decoder_NewSubMessage( - d, &ext->ext->sub, &ext->ext->field, (upb_TaggedMessagePtr*)&ext->data); - upb_DecodeStatus status = upb_Decode(data, size, submsg, item_mt->sub.submsg, - d->extreg, d->options, &d->arena); - if (status != kUpb_DecodeStatus_Ok) _upb_Decoder_ErrorJmp(d, status); +int upb_MessageDef_ReservedRangeCount(const upb_MessageDef* m) { + return m->res_range_count; } -static void upb_Decoder_AddUnknownMessageSetItem(upb_Decoder* d, - upb_Message* msg, - uint32_t type_id, - const char* message_data, - uint32_t message_size) { - char buf[60]; - char* ptr = buf; - ptr = upb_Decoder_EncodeVarint32(kStartItemTag, ptr); - ptr = upb_Decoder_EncodeVarint32(kTypeIdTag, ptr); - ptr = upb_Decoder_EncodeVarint32(type_id, ptr); - ptr = upb_Decoder_EncodeVarint32(kMessageTag, ptr); - ptr = upb_Decoder_EncodeVarint32(message_size, ptr); - char* split = ptr; +int upb_MessageDef_ReservedNameCount(const upb_MessageDef* m) { + return m->res_name_count; +} - ptr = upb_Decoder_EncodeVarint32(kEndItemTag, ptr); - char* end = ptr; +int upb_MessageDef_FieldCount(const upb_MessageDef* m) { + return m->field_count; +} - if (!_upb_Message_AddUnknown(msg, buf, split - buf, &d->arena) || - !_upb_Message_AddUnknown(msg, message_data, message_size, &d->arena) || - !_upb_Message_AddUnknown(msg, split, end - split, &d->arena)) { - _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); - } +int upb_MessageDef_OneofCount(const upb_MessageDef* m) { + return m->oneof_count; } -static void upb_Decoder_AddMessageSetItem(upb_Decoder* d, upb_Message* msg, - const upb_MiniTable* t, - uint32_t type_id, const char* data, - uint32_t size) { - const upb_MiniTableExtension* item_mt = - upb_ExtensionRegistry_Lookup(d->extreg, t, type_id); - if (item_mt) { - upb_Decoder_AddKnownMessageSetItem(d, msg, item_mt, data, size); - } else { - upb_Decoder_AddUnknownMessageSetItem(d, msg, type_id, data, size); - } +int upb_MessageDef_RealOneofCount(const upb_MessageDef* m) { + return m->real_oneof_count; } -static const char* upb_Decoder_DecodeMessageSetItem( - upb_Decoder* d, const char* ptr, upb_Message* msg, - const upb_MiniTable* layout) { - uint32_t type_id = 0; - upb_StringView preserved = {NULL, 0}; - typedef enum { - kUpb_HaveId = 1 << 0, - kUpb_HavePayload = 1 << 1, - } StateMask; - StateMask state_mask = 0; - while (!_upb_Decoder_IsDone(d, &ptr)) { - uint32_t tag; - ptr = _upb_Decoder_DecodeTag(d, ptr, &tag); - switch (tag) { - case kEndItemTag: - return ptr; - case kTypeIdTag: { - uint64_t tmp; - ptr = _upb_Decoder_DecodeVarint(d, ptr, &tmp); - if (state_mask & kUpb_HaveId) break; // Ignore dup. - state_mask |= kUpb_HaveId; - type_id = tmp; - if (state_mask & kUpb_HavePayload) { - upb_Decoder_AddMessageSetItem(d, msg, layout, type_id, preserved.data, - preserved.size); - } - break; - } - case kMessageTag: { - uint32_t size; - ptr = upb_Decoder_DecodeSize(d, ptr, &size); - const char* data = ptr; - ptr += size; - if (state_mask & kUpb_HavePayload) break; // Ignore dup. - state_mask |= kUpb_HavePayload; - if (state_mask & kUpb_HaveId) { - upb_Decoder_AddMessageSetItem(d, msg, layout, type_id, data, size); - } else { - // Out of order, we must preserve the payload. - preserved.data = data; - preserved.size = size; - } - break; - } - default: - // We do not preserve unexpected fields inside a message set item. - ptr = upb_Decoder_SkipField(d, ptr, tag); - break; - } - } - _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); +int upb_MessageDef_NestedMessageCount(const upb_MessageDef* m) { + return m->nested_msg_count; } -static const upb_MiniTableField* _upb_Decoder_FindField(upb_Decoder* d, - const upb_MiniTable* t, - uint32_t field_number, - int* last_field_index) { - static upb_MiniTableField none = { - 0, 0, 0, 0, kUpb_FakeFieldType_FieldNotFound, 0}; - if (t == NULL) return &none; +int upb_MessageDef_NestedEnumCount(const upb_MessageDef* m) { + return m->nested_enum_count; +} - size_t idx = ((size_t)field_number) - 1; // 0 wraps to SIZE_MAX - if (idx < t->dense_below) { - /* Fastest case: index into dense fields. */ - goto found; - } +int upb_MessageDef_NestedExtensionCount(const upb_MessageDef* m) { + return m->nested_ext_count; +} - if (t->dense_below < t->field_count) { - /* Linear search non-dense fields. Resume scanning from last_field_index - * since fields are usually in order. */ - size_t last = *last_field_index; - for (idx = last; idx < t->field_count; idx++) { - if (t->fields[idx].number == field_number) { - goto found; - } - } +const upb_MiniTable* upb_MessageDef_MiniTable(const upb_MessageDef* m) { + return m->layout; +} - for (idx = t->dense_below; idx < last; idx++) { - if (t->fields[idx].number == field_number) { - goto found; - } - } - } +const upb_ExtensionRange* upb_MessageDef_ExtensionRange(const upb_MessageDef* m, + int i) { + UPB_ASSERT(0 <= i && i < m->ext_range_count); + return _upb_ExtensionRange_At(m->ext_ranges, i); +} - if (d->extreg) { - switch (t->ext) { - case kUpb_ExtMode_Extendable: { - const upb_MiniTableExtension* ext = - upb_ExtensionRegistry_Lookup(d->extreg, t, field_number); - if (ext) return &ext->field; - break; - } - case kUpb_ExtMode_IsMessageSet: - if (field_number == kUpb_MsgSet_Item) { - static upb_MiniTableField item = { - 0, 0, 0, 0, kUpb_FakeFieldType_MessageSetItem, 0}; - return &item; - } - break; - } - } +const upb_MessageReservedRange* upb_MessageDef_ReservedRange( + const upb_MessageDef* m, int i) { + UPB_ASSERT(0 <= i && i < m->res_range_count); + return _upb_MessageReservedRange_At(m->res_ranges, i); +} - return &none; /* Unknown field. */ +upb_StringView upb_MessageDef_ReservedName(const upb_MessageDef* m, int i) { + UPB_ASSERT(0 <= i && i < m->res_name_count); + return m->res_names[i]; +} -found: - UPB_ASSERT(t->fields[idx].number == field_number); - *last_field_index = idx; - return &t->fields[idx]; +const upb_FieldDef* upb_MessageDef_Field(const upb_MessageDef* m, int i) { + UPB_ASSERT(0 <= i && i < m->field_count); + return _upb_FieldDef_At(m->fields, i); } -int _upb_Decoder_GetVarintOp(const upb_MiniTableField* field) { - static const int8_t kVarintOps[] = { - [kUpb_FakeFieldType_FieldNotFound] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_Double] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_Float] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_Int64] = kUpb_DecodeOp_Scalar8Byte, - [kUpb_FieldType_UInt64] = kUpb_DecodeOp_Scalar8Byte, - [kUpb_FieldType_Int32] = kUpb_DecodeOp_Scalar4Byte, - [kUpb_FieldType_Fixed64] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_Fixed32] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_Bool] = kUpb_DecodeOp_Scalar1Byte, - [kUpb_FieldType_String] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_Group] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_Message] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_Bytes] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_UInt32] = kUpb_DecodeOp_Scalar4Byte, - [kUpb_FieldType_Enum] = kUpb_DecodeOp_Enum, - [kUpb_FieldType_SFixed32] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_SFixed64] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_SInt32] = kUpb_DecodeOp_Scalar4Byte, - [kUpb_FieldType_SInt64] = kUpb_DecodeOp_Scalar8Byte, - [kUpb_FakeFieldType_MessageSetItem] = kUpb_DecodeOp_UnknownField, - }; +const upb_OneofDef* upb_MessageDef_Oneof(const upb_MessageDef* m, int i) { + UPB_ASSERT(0 <= i && i < m->oneof_count); + return _upb_OneofDef_At(m->oneofs, i); +} - return kVarintOps[field->UPB_PRIVATE(descriptortype)]; +const upb_MessageDef* upb_MessageDef_NestedMessage(const upb_MessageDef* m, + int i) { + UPB_ASSERT(0 <= i && i < m->nested_msg_count); + return &m->nested_msgs[i]; } -UPB_FORCEINLINE -static void _upb_Decoder_CheckUnlinked(upb_Decoder* d, const upb_MiniTable* mt, - const upb_MiniTableField* field, - int* op) { - // If sub-message is not linked, treat as unknown. - if (field->mode & kUpb_LabelFlags_IsExtension) return; - const upb_MiniTableSub* sub = &mt->subs[field->UPB_PRIVATE(submsg_index)]; - if ((d->options & kUpb_DecodeOption_ExperimentalAllowUnlinked) || - sub->submsg != &_kUpb_MiniTable_Empty) { - return; - } -#ifndef NDEBUG - const upb_MiniTableField* oneof = upb_MiniTable_GetOneof(mt, field); - if (oneof) { - // All other members of the oneof must be message fields that are also - // unlinked. - do { - assert(upb_MiniTableField_CType(oneof) == kUpb_CType_Message); - const upb_MiniTableSub* oneof_sub = - &mt->subs[oneof->UPB_PRIVATE(submsg_index)]; - assert(!oneof_sub); - } while (upb_MiniTable_NextOneofField(mt, &oneof)); - } -#endif // NDEBUG - *op = kUpb_DecodeOp_UnknownField; +const upb_EnumDef* upb_MessageDef_NestedEnum(const upb_MessageDef* m, int i) { + UPB_ASSERT(0 <= i && i < m->nested_enum_count); + return _upb_EnumDef_At(m->nested_enums, i); } -int _upb_Decoder_GetDelimitedOp(upb_Decoder* d, const upb_MiniTable* mt, - const upb_MiniTableField* field) { - enum { kRepeatedBase = 19 }; +const upb_FieldDef* upb_MessageDef_NestedExtension(const upb_MessageDef* m, + int i) { + UPB_ASSERT(0 <= i && i < m->nested_ext_count); + return _upb_FieldDef_At(m->nested_exts, i); +} - static const int8_t kDelimitedOps[] = { - /* For non-repeated field type. */ - [kUpb_FakeFieldType_FieldNotFound] = - kUpb_DecodeOp_UnknownField, // Field not found. - [kUpb_FieldType_Double] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_Float] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_Int64] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_UInt64] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_Int32] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_Fixed64] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_Fixed32] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_Bool] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_String] = kUpb_DecodeOp_String, - [kUpb_FieldType_Group] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_Message] = kUpb_DecodeOp_SubMessage, - [kUpb_FieldType_Bytes] = kUpb_DecodeOp_Bytes, - [kUpb_FieldType_UInt32] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_Enum] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_SFixed32] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_SFixed64] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_SInt32] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_SInt64] = kUpb_DecodeOp_UnknownField, - [kUpb_FakeFieldType_MessageSetItem] = kUpb_DecodeOp_UnknownField, - // For repeated field type. */ - [kRepeatedBase + kUpb_FieldType_Double] = OP_FIXPCK_LG2(3), - [kRepeatedBase + kUpb_FieldType_Float] = OP_FIXPCK_LG2(2), - [kRepeatedBase + kUpb_FieldType_Int64] = OP_VARPCK_LG2(3), - [kRepeatedBase + kUpb_FieldType_UInt64] = OP_VARPCK_LG2(3), - [kRepeatedBase + kUpb_FieldType_Int32] = OP_VARPCK_LG2(2), - [kRepeatedBase + kUpb_FieldType_Fixed64] = OP_FIXPCK_LG2(3), - [kRepeatedBase + kUpb_FieldType_Fixed32] = OP_FIXPCK_LG2(2), - [kRepeatedBase + kUpb_FieldType_Bool] = OP_VARPCK_LG2(0), - [kRepeatedBase + kUpb_FieldType_String] = kUpb_DecodeOp_String, - [kRepeatedBase + kUpb_FieldType_Group] = kUpb_DecodeOp_SubMessage, - [kRepeatedBase + kUpb_FieldType_Message] = kUpb_DecodeOp_SubMessage, - [kRepeatedBase + kUpb_FieldType_Bytes] = kUpb_DecodeOp_Bytes, - [kRepeatedBase + kUpb_FieldType_UInt32] = OP_VARPCK_LG2(2), - [kRepeatedBase + kUpb_FieldType_Enum] = kUpb_DecodeOp_PackedEnum, - [kRepeatedBase + kUpb_FieldType_SFixed32] = OP_FIXPCK_LG2(2), - [kRepeatedBase + kUpb_FieldType_SFixed64] = OP_FIXPCK_LG2(3), - [kRepeatedBase + kUpb_FieldType_SInt32] = OP_VARPCK_LG2(2), - [kRepeatedBase + kUpb_FieldType_SInt64] = OP_VARPCK_LG2(3), - // Omitting kUpb_FakeFieldType_MessageSetItem, because we never emit a - // repeated msgset type - }; +upb_WellKnown upb_MessageDef_WellKnownType(const upb_MessageDef* m) { + return m->well_known_type; +} - int ndx = field->UPB_PRIVATE(descriptortype); - if (upb_FieldMode_Get(field) == kUpb_FieldMode_Array) ndx += kRepeatedBase; - int op = kDelimitedOps[ndx]; +bool _upb_MessageDef_InMessageSet(const upb_MessageDef* m) { + return m->in_message_set; +} - if (op == kUpb_DecodeOp_SubMessage) { - _upb_Decoder_CheckUnlinked(d, mt, field, &op); - } +const upb_FieldDef* upb_MessageDef_FindFieldByName(const upb_MessageDef* m, + const char* name) { + return upb_MessageDef_FindFieldByNameWithSize(m, name, strlen(name)); +} - return op; +const upb_OneofDef* upb_MessageDef_FindOneofByName(const upb_MessageDef* m, + const char* name) { + return upb_MessageDef_FindOneofByNameWithSize(m, name, strlen(name)); } -UPB_FORCEINLINE -static const char* _upb_Decoder_DecodeWireValue(upb_Decoder* d, const char* ptr, - const upb_MiniTable* mt, - const upb_MiniTableField* field, - int wire_type, wireval* val, - int* op) { - static const unsigned kFixed32OkMask = (1 << kUpb_FieldType_Float) | - (1 << kUpb_FieldType_Fixed32) | - (1 << kUpb_FieldType_SFixed32); +bool upb_MessageDef_IsMapEntry(const upb_MessageDef* m) { + return UPB_DESC(MessageOptions_map_entry)(m->opts); +} - static const unsigned kFixed64OkMask = (1 << kUpb_FieldType_Double) | - (1 << kUpb_FieldType_Fixed64) | - (1 << kUpb_FieldType_SFixed64); +bool upb_MessageDef_IsMessageSet(const upb_MessageDef* m) { + return UPB_DESC(MessageOptions_message_set_wire_format)(m->opts); +} - switch (wire_type) { - case kUpb_WireType_Varint: - ptr = _upb_Decoder_DecodeVarint(d, ptr, &val->uint64_val); - *op = _upb_Decoder_GetVarintOp(field); - _upb_Decoder_Munge(field->UPB_PRIVATE(descriptortype), val); - return ptr; - case kUpb_WireType_32Bit: - *op = kUpb_DecodeOp_Scalar4Byte; - if (((1 << field->UPB_PRIVATE(descriptortype)) & kFixed32OkMask) == 0) { - *op = kUpb_DecodeOp_UnknownField; - } - return upb_WireReader_ReadFixed32(ptr, &val->uint32_val); - case kUpb_WireType_64Bit: - *op = kUpb_DecodeOp_Scalar8Byte; - if (((1 << field->UPB_PRIVATE(descriptortype)) & kFixed64OkMask) == 0) { - *op = kUpb_DecodeOp_UnknownField; - } - return upb_WireReader_ReadFixed64(ptr, &val->uint64_val); - case kUpb_WireType_Delimited: - ptr = upb_Decoder_DecodeSize(d, ptr, &val->size); - *op = _upb_Decoder_GetDelimitedOp(d, mt, field); - return ptr; - case kUpb_WireType_StartGroup: - val->uint32_val = field->number; - if (field->UPB_PRIVATE(descriptortype) == kUpb_FieldType_Group) { - *op = kUpb_DecodeOp_SubMessage; - _upb_Decoder_CheckUnlinked(d, mt, field, op); - } else if (field->UPB_PRIVATE(descriptortype) == - kUpb_FakeFieldType_MessageSetItem) { - *op = kUpb_DecodeOp_MessageSetItem; - } else { - *op = kUpb_DecodeOp_UnknownField; - } - return ptr; - default: - break; - } - _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); +static upb_MiniTable* _upb_MessageDef_MakeMiniTable(upb_DefBuilder* ctx, + const upb_MessageDef* m) { + upb_StringView desc; + // Note: this will assign layout_index for fields, so upb_FieldDef_MiniTable() + // is safe to call only after this call. + bool ok = upb_MessageDef_MiniDescriptorEncode(m, ctx->tmp_arena, &desc); + if (!ok) _upb_DefBuilder_OomErr(ctx); + + void** scratch_data = _upb_DefPool_ScratchData(ctx->symtab); + size_t* scratch_size = _upb_DefPool_ScratchSize(ctx->symtab); + upb_MiniTable* ret = upb_MiniTable_BuildWithBuf( + desc.data, desc.size, ctx->platform, ctx->arena, scratch_data, + scratch_size, ctx->status); + if (!ret) _upb_DefBuilder_FailJmp(ctx); + + return ret; } -UPB_FORCEINLINE -static const char* _upb_Decoder_DecodeKnownField( - upb_Decoder* d, const char* ptr, upb_Message* msg, - const upb_MiniTable* layout, const upb_MiniTableField* field, int op, - wireval* val) { - const upb_MiniTableSub* subs = layout->subs; - uint8_t mode = field->mode; +void _upb_MessageDef_Resolve(upb_DefBuilder* ctx, upb_MessageDef* m) { + for (int i = 0; i < m->field_count; i++) { + upb_FieldDef* f = (upb_FieldDef*)upb_MessageDef_Field(m, i); + _upb_FieldDef_Resolve(ctx, m->full_name, f); + } - if (UPB_UNLIKELY(mode & kUpb_LabelFlags_IsExtension)) { - const upb_MiniTableExtension* ext_layout = - (const upb_MiniTableExtension*)field; - upb_Message_Extension* ext = - _upb_Message_GetOrCreateExtension(msg, ext_layout, &d->arena); - if (UPB_UNLIKELY(!ext)) { - _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); + m->in_message_set = false; + for (int i = 0; i < upb_MessageDef_NestedExtensionCount(m); i++) { + upb_FieldDef* ext = (upb_FieldDef*)upb_MessageDef_NestedExtension(m, i); + _upb_FieldDef_Resolve(ctx, m->full_name, ext); + if (upb_FieldDef_Type(ext) == kUpb_FieldType_Message && + upb_FieldDef_Label(ext) == kUpb_Label_Optional && + upb_FieldDef_MessageSubDef(ext) == m && + UPB_DESC(MessageOptions_message_set_wire_format)( + upb_MessageDef_Options(upb_FieldDef_ContainingType(ext)))) { + m->in_message_set = true; } - d->unknown_msg = msg; - msg = &ext->data; - subs = &ext->ext->sub; } - switch (mode & kUpb_FieldMode_Mask) { - case kUpb_FieldMode_Array: - return _upb_Decoder_DecodeToArray(d, ptr, msg, subs, field, val, op); - case kUpb_FieldMode_Map: - return _upb_Decoder_DecodeToMap(d, ptr, msg, subs, field, val); - case kUpb_FieldMode_Scalar: - return _upb_Decoder_DecodeToSubMessage(d, ptr, msg, subs, field, val, op); - default: - UPB_UNREACHABLE(); + for (int i = 0; i < upb_MessageDef_NestedMessageCount(m); i++) { + upb_MessageDef* n = (upb_MessageDef*)upb_MessageDef_NestedMessage(m, i); + _upb_MessageDef_Resolve(ctx, n); } } -static const char* _upb_Decoder_ReverseSkipVarint(const char* ptr, - uint32_t val) { - uint32_t seen = 0; - do { - ptr--; - seen <<= 7; - seen |= *ptr & 0x7f; - } while (seen != val); - return ptr; -} - -static const char* _upb_Decoder_DecodeUnknownField(upb_Decoder* d, - const char* ptr, - upb_Message* msg, - int field_number, - int wire_type, wireval val) { - if (field_number == 0) _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); +void _upb_MessageDef_InsertField(upb_DefBuilder* ctx, upb_MessageDef* m, + const upb_FieldDef* f) { + const int32_t field_number = upb_FieldDef_Number(f); - // Since unknown fields are the uncommon case, we do a little extra work here - // to walk backwards through the buffer to find the field start. This frees - // up a register in the fast paths (when the field is known), which leads to - // significant speedups in benchmarks. - const char* start = ptr; + if (field_number <= 0 || field_number > kUpb_MaxFieldNumber) { + _upb_DefBuilder_Errf(ctx, "invalid field number (%u)", field_number); + } - if (wire_type == kUpb_WireType_Delimited) ptr += val.size; - if (msg) { - switch (wire_type) { - case kUpb_WireType_Varint: - case kUpb_WireType_Delimited: - start--; - while (start[-1] & 0x80) start--; - break; - case kUpb_WireType_32Bit: - start -= 4; - break; - case kUpb_WireType_64Bit: - start -= 8; - break; - default: - break; - } + const char* json_name = upb_FieldDef_JsonName(f); + const char* shortname = upb_FieldDef_Name(f); + const size_t shortnamelen = strlen(shortname); - assert(start == d->debug_valstart); - uint32_t tag = ((uint32_t)field_number << 3) | wire_type; - start = _upb_Decoder_ReverseSkipVarint(start, tag); - assert(start == d->debug_tagstart); + upb_value v = upb_value_constptr(f); - if (wire_type == kUpb_WireType_StartGroup) { - d->unknown = start; - d->unknown_msg = msg; - ptr = _upb_Decoder_DecodeUnknownGroup(d, ptr, field_number); - start = d->unknown; - d->unknown = NULL; - } - if (!_upb_Message_AddUnknown(msg, start, ptr - start, &d->arena)) { - _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); - } - } else if (wire_type == kUpb_WireType_StartGroup) { - ptr = _upb_Decoder_DecodeUnknownGroup(d, ptr, field_number); + upb_value existing_v; + if (upb_strtable_lookup(&m->ntof, shortname, &existing_v)) { + _upb_DefBuilder_Errf(ctx, "duplicate field name (%s)", shortname); } - return ptr; -} -UPB_NOINLINE -static const char* _upb_Decoder_DecodeMessage(upb_Decoder* d, const char* ptr, - upb_Message* msg, - const upb_MiniTable* layout) { - int last_field_index = 0; + const upb_value field_v = _upb_DefType_Pack(f, UPB_DEFTYPE_FIELD); + bool ok = + _upb_MessageDef_Insert(m, shortname, shortnamelen, field_v, ctx->arena); + if (!ok) _upb_DefBuilder_OomErr(ctx); -#if UPB_FASTTABLE - // The first time we want to skip fast dispatch, because we may have just been - // invoked by the fast parser to handle a case that it bailed on. - if (!_upb_Decoder_IsDone(d, &ptr)) goto nofast; -#endif + if (strcmp(shortname, json_name) != 0) { + if (upb_strtable_lookup(&m->ntof, json_name, &v)) { + _upb_DefBuilder_Errf(ctx, "duplicate json_name (%s)", json_name); + } - while (!_upb_Decoder_IsDone(d, &ptr)) { - uint32_t tag; - const upb_MiniTableField* field; - int field_number; - int wire_type; - wireval val; - int op; + const size_t json_size = strlen(json_name); + const upb_value json_v = _upb_DefType_Pack(f, UPB_DEFTYPE_FIELD_JSONNAME); + ok = _upb_MessageDef_Insert(m, json_name, json_size, json_v, ctx->arena); + if (!ok) _upb_DefBuilder_OomErr(ctx); + } - if (_upb_Decoder_TryFastDispatch(d, &ptr, msg, layout)) break; + if (upb_inttable_lookup(&m->itof, field_number, NULL)) { + _upb_DefBuilder_Errf(ctx, "duplicate field number (%u)", field_number); + } -#if UPB_FASTTABLE - nofast: -#endif + ok = upb_inttable_insert(&m->itof, field_number, v, ctx->arena); + if (!ok) _upb_DefBuilder_OomErr(ctx); +} -#ifndef NDEBUG - d->debug_tagstart = ptr; -#endif +void _upb_MessageDef_CreateMiniTable(upb_DefBuilder* ctx, upb_MessageDef* m) { + if (ctx->layout == NULL) { + m->layout = _upb_MessageDef_MakeMiniTable(ctx, m); + } else { + UPB_ASSERT(ctx->msg_count < ctx->layout->msg_count); + m->layout = ctx->layout->msgs[ctx->msg_count++]; + UPB_ASSERT(m->field_count == m->layout->field_count); - UPB_ASSERT(ptr < d->input.limit_ptr); - ptr = _upb_Decoder_DecodeTag(d, ptr, &tag); - field_number = tag >> 3; - wire_type = tag & 7; + // We don't need the result of this call, but it will assign layout_index + // for all the fields in O(n lg n) time. + _upb_FieldDefs_Sorted(m->fields, m->field_count, ctx->tmp_arena); + } -#ifndef NDEBUG - d->debug_valstart = ptr; -#endif + for (int i = 0; i < m->nested_msg_count; i++) { + upb_MessageDef* nested = + (upb_MessageDef*)upb_MessageDef_NestedMessage(m, i); + _upb_MessageDef_CreateMiniTable(ctx, nested); + } +} - if (wire_type == kUpb_WireType_EndGroup) { - d->end_group = field_number; - return ptr; - } +void _upb_MessageDef_LinkMiniTable(upb_DefBuilder* ctx, + const upb_MessageDef* m) { + for (int i = 0; i < upb_MessageDef_NestedExtensionCount(m); i++) { + const upb_FieldDef* ext = upb_MessageDef_NestedExtension(m, i); + _upb_FieldDef_BuildMiniTableExtension(ctx, ext); + } - field = _upb_Decoder_FindField(d, layout, field_number, &last_field_index); - ptr = _upb_Decoder_DecodeWireValue(d, ptr, layout, field, wire_type, &val, - &op); + for (int i = 0; i < m->nested_msg_count; i++) { + _upb_MessageDef_LinkMiniTable(ctx, upb_MessageDef_NestedMessage(m, i)); + } - if (op >= 0) { - ptr = _upb_Decoder_DecodeKnownField(d, ptr, msg, layout, field, op, &val); - } else { - switch (op) { - case kUpb_DecodeOp_UnknownField: - ptr = _upb_Decoder_DecodeUnknownField(d, ptr, msg, field_number, - wire_type, val); - break; - case kUpb_DecodeOp_MessageSetItem: - ptr = upb_Decoder_DecodeMessageSetItem(d, ptr, msg, layout); - break; + if (ctx->layout) return; + + for (int i = 0; i < m->field_count; i++) { + const upb_FieldDef* f = upb_MessageDef_Field(m, i); + const upb_MessageDef* sub_m = upb_FieldDef_MessageSubDef(f); + const upb_EnumDef* sub_e = upb_FieldDef_EnumSubDef(f); + const int layout_index = _upb_FieldDef_LayoutIndex(f); + upb_MiniTable* mt = (upb_MiniTable*)upb_MessageDef_MiniTable(m); + + UPB_ASSERT(layout_index < m->field_count); + upb_MiniTableField* mt_f = + (upb_MiniTableField*)&m->layout->fields[layout_index]; + if (sub_m) { + if (!mt->subs) { + _upb_DefBuilder_Errf(ctx, "unexpected submsg for (%s)", m->full_name); + } + UPB_ASSERT(mt_f); + UPB_ASSERT(sub_m->layout); + if (UPB_UNLIKELY(!upb_MiniTable_SetSubMessage(mt, mt_f, sub_m->layout))) { + _upb_DefBuilder_Errf(ctx, "invalid submsg for (%s)", m->full_name); + } + } else if (_upb_FieldDef_IsClosedEnum(f)) { + const upb_MiniTableEnum* mt_e = _upb_EnumDef_MiniTable(sub_e); + if (UPB_UNLIKELY(!upb_MiniTable_SetSubEnum(mt, mt_f, mt_e))) { + _upb_DefBuilder_Errf(ctx, "invalid subenum for (%s)", m->full_name); } } } - return UPB_UNLIKELY(layout && layout->required_count) - ? _upb_Decoder_CheckRequired(d, ptr, msg, layout) - : ptr; -} - -const char* _upb_FastDecoder_DecodeGeneric(struct upb_Decoder* d, - const char* ptr, upb_Message* msg, - intptr_t table, uint64_t hasbits, - uint64_t data) { - (void)data; - *(uint32_t*)msg |= hasbits; - return _upb_Decoder_DecodeMessage(d, ptr, msg, decode_totablep(table)); +#ifndef NDEBUG + for (int i = 0; i < m->field_count; i++) { + const upb_FieldDef* f = upb_MessageDef_Field(m, i); + const int layout_index = _upb_FieldDef_LayoutIndex(f); + UPB_ASSERT(layout_index < m->layout->field_count); + const upb_MiniTableField* mt_f = &m->layout->fields[layout_index]; + UPB_ASSERT(upb_FieldDef_Type(f) == upb_MiniTableField_Type(mt_f)); + UPB_ASSERT(upb_FieldDef_CType(f) == upb_MiniTableField_CType(mt_f)); + UPB_ASSERT(upb_FieldDef_HasPresence(f) == + upb_MiniTableField_HasPresence(mt_f)); + } +#endif } -static upb_DecodeStatus _upb_Decoder_DecodeTop(struct upb_Decoder* d, - const char* buf, void* msg, - const upb_MiniTable* l) { - if (!_upb_Decoder_TryFastDispatch(d, &buf, msg, l)) { - _upb_Decoder_DecodeMessage(d, buf, msg, l); +static uint64_t _upb_MessageDef_Modifiers(const upb_MessageDef* m) { + uint64_t out = 0; + if (upb_FileDef_Syntax(m->file) == kUpb_Syntax_Proto3) { + out |= kUpb_MessageModifier_ValidateUtf8; + out |= kUpb_MessageModifier_DefaultIsPacked; } - if (d->end_group != DECODE_NOGROUP) return kUpb_DecodeStatus_Malformed; - if (d->missing_required) return kUpb_DecodeStatus_MissingRequired; - return kUpb_DecodeStatus_Ok; + if (m->ext_range_count) { + out |= kUpb_MessageModifier_IsExtendable; + } + return out; } -UPB_NOINLINE -const char* _upb_Decoder_IsDoneFallback(upb_EpsCopyInputStream* e, - const char* ptr, int overrun) { - return _upb_EpsCopyInputStream_IsDoneFallbackInline( - e, ptr, overrun, _upb_Decoder_BufferFlipCallback); -} +static bool _upb_MessageDef_EncodeMap(upb_DescState* s, const upb_MessageDef* m, + upb_Arena* a) { + if (m->field_count != 2) return false; -static upb_DecodeStatus upb_Decoder_Decode(upb_Decoder* const decoder, - const char* const buf, - void* const msg, - const upb_MiniTable* const l, - upb_Arena* const arena) { - if (UPB_SETJMP(decoder->err) == 0) { - decoder->status = _upb_Decoder_DecodeTop(decoder, buf, msg, l); - } else { - UPB_ASSERT(decoder->status != kUpb_DecodeStatus_Ok); - } + const upb_FieldDef* key_field = upb_MessageDef_Field(m, 0); + const upb_FieldDef* val_field = upb_MessageDef_Field(m, 1); + if (key_field == NULL || val_field == NULL) return false; - _upb_MemBlock* blocks = - upb_Atomic_Load(&decoder->arena.blocks, memory_order_relaxed); - arena->head = decoder->arena.head; - upb_Atomic_Store(&arena->blocks, blocks, memory_order_relaxed); - return decoder->status; + UPB_ASSERT(_upb_FieldDef_LayoutIndex(key_field) == 0); + UPB_ASSERT(_upb_FieldDef_LayoutIndex(val_field) == 1); + + s->ptr = upb_MtDataEncoder_EncodeMap( + &s->e, s->ptr, upb_FieldDef_Type(key_field), upb_FieldDef_Type(val_field), + _upb_FieldDef_Modifiers(key_field), _upb_FieldDef_Modifiers(val_field)); + return true; } -upb_DecodeStatus upb_Decode(const char* buf, size_t size, void* msg, - const upb_MiniTable* l, - const upb_ExtensionRegistry* extreg, int options, - upb_Arena* arena) { - upb_Decoder decoder; - unsigned depth = (unsigned)options >> 16; - - upb_EpsCopyInputStream_Init(&decoder.input, &buf, size, - options & kUpb_DecodeOption_AliasString); +static bool _upb_MessageDef_EncodeMessage(upb_DescState* s, + const upb_MessageDef* m, + upb_Arena* a) { + const upb_FieldDef** sorted = NULL; + if (!m->is_sorted) { + sorted = _upb_FieldDefs_Sorted(m->fields, m->field_count, a); + if (!sorted) return false; + } - decoder.extreg = extreg; - decoder.unknown = NULL; - decoder.depth = depth ? depth : kUpb_WireFormat_DefaultDepthLimit; - decoder.end_group = DECODE_NOGROUP; - decoder.options = (uint16_t)options; - decoder.missing_required = false; - decoder.status = kUpb_DecodeStatus_Ok; + s->ptr = upb_MtDataEncoder_StartMessage(&s->e, s->ptr, + _upb_MessageDef_Modifiers(m)); - // Violating the encapsulation of the arena for performance reasons. - // This is a temporary arena that we swap into and swap out of when we are - // done. The temporary arena only needs to be able to handle allocation, - // not fuse or free, so it does not need many of the members to be initialized - // (particularly parent_or_count). - _upb_MemBlock* blocks = upb_Atomic_Load(&arena->blocks, memory_order_relaxed); - decoder.arena.head = arena->head; - decoder.arena.block_alloc = arena->block_alloc; - upb_Atomic_Init(&decoder.arena.blocks, blocks); + for (int i = 0; i < m->field_count; i++) { + const upb_FieldDef* f = sorted ? sorted[i] : upb_MessageDef_Field(m, i); + const upb_FieldType type = upb_FieldDef_Type(f); + const int number = upb_FieldDef_Number(f); + const uint64_t modifiers = _upb_FieldDef_Modifiers(f); - return upb_Decoder_Decode(&decoder, buf, msg, l, arena); -} + if (!_upb_DescState_Grow(s, a)) return false; + s->ptr = upb_MtDataEncoder_PutField(&s->e, s->ptr, type, number, modifiers); + } -#undef OP_FIXPCK_LG2 -#undef OP_VARPCK_LG2 + for (int i = 0; i < m->real_oneof_count; i++) { + if (!_upb_DescState_Grow(s, a)) return false; + s->ptr = upb_MtDataEncoder_StartOneof(&s->e, s->ptr); -// Fast decoder: ~3x the speed of decode.c, but requires x86-64/ARM64. -// Also the table size grows by 2x. -// -// Could potentially be ported to other 64-bit archs that pass at least six -// arguments in registers and have 8 unused high bits in pointers. -// -// The overall design is to create specialized functions for every possible -// field type (eg. oneof boolean field with a 1 byte tag) and then dispatch -// to the specialized function as quickly as possible. + const upb_OneofDef* o = upb_MessageDef_Oneof(m, i); + const int field_count = upb_OneofDef_FieldCount(o); + for (int j = 0; j < field_count; j++) { + const int number = upb_FieldDef_Number(upb_OneofDef_Field(o, j)); + if (!_upb_DescState_Grow(s, a)) return false; + s->ptr = upb_MtDataEncoder_PutOneofField(&s->e, s->ptr, number); + } + } + return true; +} -// Must be last. +static bool _upb_MessageDef_EncodeMessageSet(upb_DescState* s, + const upb_MessageDef* m, + upb_Arena* a) { + s->ptr = upb_MtDataEncoder_EncodeMessageSet(&s->e, s->ptr); -#if UPB_FASTTABLE + return true; +} -// The standard set of arguments passed to each parsing function. -// Thanks to x86-64 calling conventions, these will stay in registers. -#define UPB_PARSE_PARAMS \ - upb_Decoder *d, const char *ptr, upb_Message *msg, intptr_t table, \ - uint64_t hasbits, uint64_t data +bool upb_MessageDef_MiniDescriptorEncode(const upb_MessageDef* m, upb_Arena* a, + upb_StringView* out) { + upb_DescState s; + _upb_DescState_Init(&s); -#define UPB_PARSE_ARGS d, ptr, msg, table, hasbits, data + if (!_upb_DescState_Grow(&s, a)) return false; -#define RETURN_GENERIC(m) \ - /* Uncomment either of these for debugging purposes. */ \ - /* fprintf(stderr, m); */ \ - /*__builtin_trap(); */ \ - return _upb_FastDecoder_DecodeGeneric(d, ptr, msg, table, hasbits, 0); + if (upb_MessageDef_IsMapEntry(m)) { + if (!_upb_MessageDef_EncodeMap(&s, m, a)) return false; + } else if (UPB_DESC(MessageOptions_message_set_wire_format)(m->opts)) { + if (!_upb_MessageDef_EncodeMessageSet(&s, m, a)) return false; + } else { + if (!_upb_MessageDef_EncodeMessage(&s, m, a)) return false; + } -typedef enum { - CARD_s = 0, /* Singular (optional, non-repeated) */ - CARD_o = 1, /* Oneof */ - CARD_r = 2, /* Repeated */ - CARD_p = 3 /* Packed Repeated */ -} upb_card; + if (!_upb_DescState_Grow(&s, a)) return false; + *s.ptr = '\0'; -UPB_NOINLINE -static const char* fastdecode_isdonefallback(UPB_PARSE_PARAMS) { - int overrun = data; - ptr = _upb_EpsCopyInputStream_IsDoneFallbackInline( - &d->input, ptr, overrun, _upb_Decoder_BufferFlipCallback); - data = _upb_FastDecoder_LoadTag(ptr); - UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); + out->data = s.buf; + out->size = s.ptr - s.buf; + return true; } -UPB_FORCEINLINE -static const char* fastdecode_dispatch(UPB_PARSE_PARAMS) { - int overrun; - switch (upb_EpsCopyInputStream_IsDoneStatus(&d->input, ptr, &overrun)) { - case kUpb_IsDoneStatus_Done: - *(uint32_t*)msg |= hasbits; // Sync hasbits. - const upb_MiniTable* l = decode_totablep(table); - return UPB_UNLIKELY(l->required_count) - ? _upb_Decoder_CheckRequired(d, ptr, msg, l) - : ptr; - case kUpb_IsDoneStatus_NotDone: - break; - case kUpb_IsDoneStatus_NeedFallback: - data = overrun; - UPB_MUSTTAIL return fastdecode_isdonefallback(UPB_PARSE_ARGS); +static upb_StringView* _upb_ReservedNames_New(upb_DefBuilder* ctx, int n, + const upb_StringView* protos) { + upb_StringView* sv = _upb_DefBuilder_Alloc(ctx, sizeof(upb_StringView) * n); + for (int i = 0; i < n; i++) { + sv[i].data = + upb_strdup2(protos[i].data, protos[i].size, _upb_DefBuilder_Arena(ctx)); + sv[i].size = protos[i].size; } - - // Read two bytes of tag data (for a one-byte tag, the high byte is junk). - data = _upb_FastDecoder_LoadTag(ptr); - UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); + return sv; } -UPB_FORCEINLINE -static bool fastdecode_checktag(uint16_t data, int tagbytes) { - if (tagbytes == 1) { - return (data & 0xff) == 0; - } else { - return data == 0; - } -} +static void create_msgdef(upb_DefBuilder* ctx, const char* prefix, + const UPB_DESC(DescriptorProto) * msg_proto, + const upb_MessageDef* containing_type, + upb_MessageDef* m) { + const UPB_DESC(OneofDescriptorProto)* const* oneofs; + const UPB_DESC(FieldDescriptorProto)* const* fields; + const UPB_DESC(DescriptorProto_ExtensionRange)* const* ext_ranges; + const UPB_DESC(DescriptorProto_ReservedRange)* const* res_ranges; + const upb_StringView* res_names; + size_t n_oneof, n_field, n_enum, n_ext, n_msg; + size_t n_ext_range, n_res_range, n_res_name; + upb_StringView name; -UPB_FORCEINLINE -static const char* fastdecode_longsize(const char* ptr, int* size) { - int i; - UPB_ASSERT(*size & 0x80); - *size &= 0xff; - for (i = 0; i < 3; i++) { - ptr++; - size_t byte = (uint8_t)ptr[-1]; - *size += (byte - 1) << (7 + 7 * i); - if (UPB_LIKELY((byte & 0x80) == 0)) return ptr; - } - ptr++; - size_t byte = (uint8_t)ptr[-1]; - // len is limited by 2gb not 4gb, hence 8 and not 16 as normally expected - // for a 32 bit varint. - if (UPB_UNLIKELY(byte >= 8)) return NULL; - *size += (byte - 1) << 28; - return ptr; -} + // Must happen before _upb_DefBuilder_Add() + m->file = _upb_DefBuilder_File(ctx); -UPB_FORCEINLINE -static const char* fastdecode_delimited( - upb_Decoder* d, const char* ptr, - upb_EpsCopyInputStream_ParseDelimitedFunc* func, void* ctx) { - ptr++; + m->containing_type = containing_type; + m->is_sorted = true; - // Sign-extend so varint greater than one byte becomes negative, causing - // fast delimited parse to fail. - int len = (int8_t)ptr[-1]; + name = UPB_DESC(DescriptorProto_name)(msg_proto); - if (!upb_EpsCopyInputStream_TryParseDelimitedFast(&d->input, &ptr, len, func, - ctx)) { - // Slow case: Sub-message is >=128 bytes and/or exceeds the current buffer. - // If it exceeds the buffer limit, limit/limit_ptr will change during - // sub-message parsing, so we need to preserve delta, not limit. - if (UPB_UNLIKELY(len & 0x80)) { - // Size varint >1 byte (length >= 128). - ptr = fastdecode_longsize(ptr, &len); - if (!ptr) { - // Corrupt wire format: size exceeded INT_MAX. - return NULL; - } - } - if (!upb_EpsCopyInputStream_CheckSize(&d->input, ptr, len)) { - // Corrupt wire format: invalid limit. - return NULL; - } - int delta = upb_EpsCopyInputStream_PushLimit(&d->input, ptr, len); - ptr = func(&d->input, ptr, ctx); - upb_EpsCopyInputStream_PopLimit(&d->input, ptr, delta); - } - return ptr; -} + m->full_name = _upb_DefBuilder_MakeFullName(ctx, prefix, name); + _upb_DefBuilder_Add(ctx, m->full_name, _upb_DefType_Pack(m, UPB_DEFTYPE_MSG)); -/* singular, oneof, repeated field handling ***********************************/ + oneofs = UPB_DESC(DescriptorProto_oneof_decl)(msg_proto, &n_oneof); + fields = UPB_DESC(DescriptorProto_field)(msg_proto, &n_field); + ext_ranges = + UPB_DESC(DescriptorProto_extension_range)(msg_proto, &n_ext_range); + res_ranges = + UPB_DESC(DescriptorProto_reserved_range)(msg_proto, &n_res_range); + res_names = UPB_DESC(DescriptorProto_reserved_name)(msg_proto, &n_res_name); -typedef struct { - upb_Array* arr; - void* end; -} fastdecode_arr; + bool ok = upb_inttable_init(&m->itof, ctx->arena); + if (!ok) _upb_DefBuilder_OomErr(ctx); -typedef enum { - FD_NEXT_ATLIMIT, - FD_NEXT_SAMEFIELD, - FD_NEXT_OTHERFIELD -} fastdecode_next; + ok = upb_strtable_init(&m->ntof, n_oneof + n_field, ctx->arena); + if (!ok) _upb_DefBuilder_OomErr(ctx); -typedef struct { - void* dst; - fastdecode_next next; - uint32_t tag; -} fastdecode_nextret; + UPB_DEF_SET_OPTIONS(m->opts, DescriptorProto, MessageOptions, msg_proto); -UPB_FORCEINLINE -static void* fastdecode_resizearr(upb_Decoder* d, void* dst, - fastdecode_arr* farr, int valbytes) { - if (UPB_UNLIKELY(dst == farr->end)) { - size_t old_size = farr->arr->capacity; - size_t old_bytes = old_size * valbytes; - size_t new_size = old_size * 2; - size_t new_bytes = new_size * valbytes; - char* old_ptr = _upb_array_ptr(farr->arr); - char* new_ptr = upb_Arena_Realloc(&d->arena, old_ptr, old_bytes, new_bytes); - uint8_t elem_size_lg2 = __builtin_ctz(valbytes); - farr->arr->capacity = new_size; - farr->arr->data = _upb_array_tagptr(new_ptr, elem_size_lg2); - dst = (void*)(new_ptr + (old_size * valbytes)); - farr->end = (void*)(new_ptr + (new_size * valbytes)); + m->oneof_count = n_oneof; + m->oneofs = _upb_OneofDefs_New(ctx, n_oneof, oneofs, m); + + m->field_count = n_field; + m->fields = + _upb_FieldDefs_New(ctx, n_field, fields, m->full_name, m, &m->is_sorted); + + // Message Sets may not contain fields. + if (UPB_UNLIKELY(UPB_DESC(MessageOptions_message_set_wire_format)(m->opts))) { + if (UPB_UNLIKELY(n_field > 0)) { + _upb_DefBuilder_Errf(ctx, "invalid message set (%s)", m->full_name); + } } - return dst; + + m->ext_range_count = n_ext_range; + m->ext_ranges = _upb_ExtensionRanges_New(ctx, n_ext_range, ext_ranges, m); + + m->res_range_count = n_res_range; + m->res_ranges = + _upb_MessageReservedRanges_New(ctx, n_res_range, res_ranges, m); + + m->res_name_count = n_res_name; + m->res_names = _upb_ReservedNames_New(ctx, n_res_name, res_names); + + const size_t synthetic_count = _upb_OneofDefs_Finalize(ctx, m); + m->real_oneof_count = m->oneof_count - synthetic_count; + + assign_msg_wellknowntype(m); + upb_inttable_compact(&m->itof, ctx->arena); + + const UPB_DESC(EnumDescriptorProto)* const* enums = + UPB_DESC(DescriptorProto_enum_type)(msg_proto, &n_enum); + m->nested_enum_count = n_enum; + m->nested_enums = _upb_EnumDefs_New(ctx, n_enum, enums, m); + + const UPB_DESC(FieldDescriptorProto)* const* exts = + UPB_DESC(DescriptorProto_extension)(msg_proto, &n_ext); + m->nested_ext_count = n_ext; + m->nested_exts = _upb_Extensions_New(ctx, n_ext, exts, m->full_name, m); + + const UPB_DESC(DescriptorProto)* const* msgs = + UPB_DESC(DescriptorProto_nested_type)(msg_proto, &n_msg); + m->nested_msg_count = n_msg; + m->nested_msgs = _upb_MessageDefs_New(ctx, n_msg, msgs, m); } -UPB_FORCEINLINE -static bool fastdecode_tagmatch(uint32_t tag, uint64_t data, int tagbytes) { - if (tagbytes == 1) { - return (uint8_t)tag == (uint8_t)data; - } else { - return (uint16_t)tag == (uint16_t)data; +// Allocate and initialize an array of |n| message defs. +upb_MessageDef* _upb_MessageDefs_New( + upb_DefBuilder* ctx, int n, const UPB_DESC(DescriptorProto) * const* protos, + const upb_MessageDef* containing_type) { + _upb_DefType_CheckPadding(sizeof(upb_MessageDef)); + + const char* name = containing_type ? containing_type->full_name + : _upb_FileDef_RawPackage(ctx->file); + + upb_MessageDef* m = _upb_DefBuilder_Alloc(ctx, sizeof(upb_MessageDef) * n); + for (int i = 0; i < n; i++) { + create_msgdef(ctx, name, protos[i], containing_type, &m[i]); } + return m; } -UPB_FORCEINLINE -static void fastdecode_commitarr(void* dst, fastdecode_arr* farr, - int valbytes) { - farr->arr->size = - (size_t)((char*)dst - (char*)_upb_array_ptr(farr->arr)) / valbytes; -} -UPB_FORCEINLINE -static fastdecode_nextret fastdecode_nextrepeated(upb_Decoder* d, void* dst, - const char** ptr, - fastdecode_arr* farr, - uint64_t data, int tagbytes, - int valbytes) { - fastdecode_nextret ret; - dst = (char*)dst + valbytes; +// Must be last. - if (UPB_LIKELY(!_upb_Decoder_IsDone(d, ptr))) { - ret.tag = _upb_FastDecoder_LoadTag(*ptr); - if (fastdecode_tagmatch(ret.tag, data, tagbytes)) { - ret.next = FD_NEXT_SAMEFIELD; - } else { - fastdecode_commitarr(dst, farr, valbytes); - ret.next = FD_NEXT_OTHERFIELD; - } - } else { - fastdecode_commitarr(dst, farr, valbytes); - ret.next = FD_NEXT_ATLIMIT; - } +struct upb_MessageReservedRange { + int32_t start; + int32_t end; +}; - ret.dst = dst; - return ret; +upb_MessageReservedRange* _upb_MessageReservedRange_At( + const upb_MessageReservedRange* r, int i) { + return (upb_MessageReservedRange*)&r[i]; } -UPB_FORCEINLINE -static void* fastdecode_fieldmem(upb_Message* msg, uint64_t data) { - size_t ofs = data >> 48; - return (char*)msg + ofs; +int32_t upb_MessageReservedRange_Start(const upb_MessageReservedRange* r) { + return r->start; +} +int32_t upb_MessageReservedRange_End(const upb_MessageReservedRange* r) { + return r->end; } -UPB_FORCEINLINE -static void* fastdecode_getfield(upb_Decoder* d, const char* ptr, - upb_Message* msg, uint64_t* data, - uint64_t* hasbits, fastdecode_arr* farr, - int valbytes, upb_card card) { - switch (card) { - case CARD_s: { - uint8_t hasbit_index = *data >> 24; - // Set hasbit and return pointer to scalar field. - *hasbits |= 1ull << hasbit_index; - return fastdecode_fieldmem(msg, *data); - } - case CARD_o: { - uint16_t case_ofs = *data >> 32; - uint32_t* oneof_case = UPB_PTR_AT(msg, case_ofs, uint32_t); - uint8_t field_number = *data >> 24; - *oneof_case = field_number; - return fastdecode_fieldmem(msg, *data); - } - case CARD_r: { - // Get pointer to upb_Array and allocate/expand if necessary. - uint8_t elem_size_lg2 = __builtin_ctz(valbytes); - upb_Array** arr_p = fastdecode_fieldmem(msg, *data); - char* begin; - *(uint32_t*)msg |= *hasbits; - *hasbits = 0; - if (UPB_LIKELY(!*arr_p)) { - farr->arr = _upb_Array_New(&d->arena, 8, elem_size_lg2); - *arr_p = farr->arr; - } else { - farr->arr = *arr_p; - } - begin = _upb_array_ptr(farr->arr); - farr->end = begin + (farr->arr->capacity * valbytes); - *data = _upb_FastDecoder_LoadTag(ptr); - return begin + (farr->arr->size * valbytes); +upb_MessageReservedRange* _upb_MessageReservedRanges_New( + upb_DefBuilder* ctx, int n, + const UPB_DESC(DescriptorProto_ReservedRange) * const* protos, + const upb_MessageDef* m) { + upb_MessageReservedRange* r = + _upb_DefBuilder_Alloc(ctx, sizeof(upb_MessageReservedRange) * n); + + for (int i = 0; i < n; i++) { + const int32_t start = + UPB_DESC(DescriptorProto_ReservedRange_start)(protos[i]); + const int32_t end = UPB_DESC(DescriptorProto_ReservedRange_end)(protos[i]); + const int32_t max = kUpb_MaxFieldNumber + 1; + + // A full validation would also check that each range is disjoint, and that + // none of the fields overlap with the extension ranges, but we are just + // sanity checking here. + if (start < 1 || end <= start || end > max) { + _upb_DefBuilder_Errf(ctx, + "Reserved range (%d, %d) is invalid, message=%s\n", + (int)start, (int)end, upb_MessageDef_FullName(m)); } - default: - UPB_UNREACHABLE(); + + r[i].start = start; + r[i].end = end; } + + return r; } -UPB_FORCEINLINE -static bool fastdecode_flippacked(uint64_t* data, int tagbytes) { - *data ^= (0x2 ^ 0x0); // Patch data to match packed wiretype. - return fastdecode_checktag(*data, tagbytes); + + +// Must be last. + +struct upb_MethodDef { + const UPB_DESC(MethodOptions) * opts; + upb_ServiceDef* service; + const char* full_name; + const upb_MessageDef* input_type; + const upb_MessageDef* output_type; + int index; + bool client_streaming; + bool server_streaming; +}; + +upb_MethodDef* _upb_MethodDef_At(const upb_MethodDef* m, int i) { + return (upb_MethodDef*)&m[i]; } -#define FASTDECODE_CHECKPACKED(tagbytes, card, func) \ - if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { \ - if (card == CARD_r && fastdecode_flippacked(&data, tagbytes)) { \ - UPB_MUSTTAIL return func(UPB_PARSE_ARGS); \ - } \ - RETURN_GENERIC("packed check tag mismatch\n"); \ - } +const upb_ServiceDef* upb_MethodDef_Service(const upb_MethodDef* m) { + return m->service; +} -/* varint fields **************************************************************/ +const UPB_DESC(MethodOptions) * upb_MethodDef_Options(const upb_MethodDef* m) { + return m->opts; +} -UPB_FORCEINLINE -static uint64_t fastdecode_munge(uint64_t val, int valbytes, bool zigzag) { - if (valbytes == 1) { - return val != 0; - } else if (zigzag) { - if (valbytes == 4) { - uint32_t n = val; - return (n >> 1) ^ -(int32_t)(n & 1); - } else if (valbytes == 8) { - return (val >> 1) ^ -(int64_t)(val & 1); - } - UPB_UNREACHABLE(); - } - return val; +bool upb_MethodDef_HasOptions(const upb_MethodDef* m) { + return m->opts != (void*)kUpbDefOptDefault; } -UPB_FORCEINLINE -static const char* fastdecode_varint64(const char* ptr, uint64_t* val) { - ptr++; - *val = (uint8_t)ptr[-1]; - if (UPB_UNLIKELY(*val & 0x80)) { - int i; - for (i = 0; i < 8; i++) { - ptr++; - uint64_t byte = (uint8_t)ptr[-1]; - *val += (byte - 1) << (7 + 7 * i); - if (UPB_LIKELY((byte & 0x80) == 0)) goto done; - } - ptr++; - uint64_t byte = (uint8_t)ptr[-1]; - if (byte > 1) { - return NULL; - } - *val += (byte - 1) << 63; - } -done: - UPB_ASSUME(ptr != NULL); - return ptr; +const char* upb_MethodDef_FullName(const upb_MethodDef* m) { + return m->full_name; } -#define FASTDECODE_UNPACKEDVARINT(d, ptr, msg, table, hasbits, data, tagbytes, \ - valbytes, card, zigzag, packed) \ - uint64_t val; \ - void* dst; \ - fastdecode_arr farr; \ - \ - FASTDECODE_CHECKPACKED(tagbytes, card, packed); \ - \ - dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, valbytes, \ - card); \ - if (card == CARD_r) { \ - if (UPB_UNLIKELY(!dst)) { \ - RETURN_GENERIC("need array resize\n"); \ - } \ - } \ - \ - again: \ - if (card == CARD_r) { \ - dst = fastdecode_resizearr(d, dst, &farr, valbytes); \ - } \ - \ - ptr += tagbytes; \ - ptr = fastdecode_varint64(ptr, &val); \ - if (ptr == NULL) _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \ - val = fastdecode_munge(val, valbytes, zigzag); \ - memcpy(dst, &val, valbytes); \ - \ - if (card == CARD_r) { \ - fastdecode_nextret ret = fastdecode_nextrepeated( \ - d, dst, &ptr, &farr, data, tagbytes, valbytes); \ - switch (ret.next) { \ - case FD_NEXT_SAMEFIELD: \ - dst = ret.dst; \ - goto again; \ - case FD_NEXT_OTHERFIELD: \ - data = ret.tag; \ - UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); \ - case FD_NEXT_ATLIMIT: \ - return ptr; \ - } \ - } \ - \ - UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); +const char* upb_MethodDef_Name(const upb_MethodDef* m) { + return _upb_DefBuilder_FullToShort(m->full_name); +} -typedef struct { - uint8_t valbytes; - bool zigzag; - void* dst; - fastdecode_arr farr; -} fastdecode_varintdata; +int upb_MethodDef_Index(const upb_MethodDef* m) { return m->index; } -UPB_FORCEINLINE -static const char* fastdecode_topackedvarint(upb_EpsCopyInputStream* e, - const char* ptr, void* ctx) { - upb_Decoder* d = (upb_Decoder*)e; - fastdecode_varintdata* data = ctx; - void* dst = data->dst; - uint64_t val; +const upb_MessageDef* upb_MethodDef_InputType(const upb_MethodDef* m) { + return m->input_type; +} - while (!_upb_Decoder_IsDone(d, &ptr)) { - dst = fastdecode_resizearr(d, dst, &data->farr, data->valbytes); - ptr = fastdecode_varint64(ptr, &val); - if (ptr == NULL) return NULL; - val = fastdecode_munge(val, data->valbytes, data->zigzag); - memcpy(dst, &val, data->valbytes); - dst = (char*)dst + data->valbytes; - } +const upb_MessageDef* upb_MethodDef_OutputType(const upb_MethodDef* m) { + return m->output_type; +} - fastdecode_commitarr(dst, &data->farr, data->valbytes); - return ptr; +bool upb_MethodDef_ClientStreaming(const upb_MethodDef* m) { + return m->client_streaming; } -#define FASTDECODE_PACKEDVARINT(d, ptr, msg, table, hasbits, data, tagbytes, \ - valbytes, zigzag, unpacked) \ - fastdecode_varintdata ctx = {valbytes, zigzag}; \ - \ - FASTDECODE_CHECKPACKED(tagbytes, CARD_r, unpacked); \ - \ - ctx.dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &ctx.farr, \ - valbytes, CARD_r); \ - if (UPB_UNLIKELY(!ctx.dst)) { \ - RETURN_GENERIC("need array resize\n"); \ - } \ - \ - ptr += tagbytes; \ - ptr = fastdecode_delimited(d, ptr, &fastdecode_topackedvarint, &ctx); \ - \ - if (UPB_UNLIKELY(ptr == NULL)) { \ - _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \ - } \ - \ - UPB_MUSTTAIL return fastdecode_dispatch(d, ptr, msg, table, hasbits, 0); +bool upb_MethodDef_ServerStreaming(const upb_MethodDef* m) { + return m->server_streaming; +} -#define FASTDECODE_VARINT(d, ptr, msg, table, hasbits, data, tagbytes, \ - valbytes, card, zigzag, unpacked, packed) \ - if (card == CARD_p) { \ - FASTDECODE_PACKEDVARINT(d, ptr, msg, table, hasbits, data, tagbytes, \ - valbytes, zigzag, unpacked); \ - } else { \ - FASTDECODE_UNPACKEDVARINT(d, ptr, msg, table, hasbits, data, tagbytes, \ - valbytes, card, zigzag, packed); \ - } +static void create_method(upb_DefBuilder* ctx, + const UPB_DESC(MethodDescriptorProto) * method_proto, + upb_ServiceDef* s, upb_MethodDef* m) { + upb_StringView name = UPB_DESC(MethodDescriptorProto_name)(method_proto); -#define z_ZZ true -#define b_ZZ false -#define v_ZZ false + m->service = s; + m->full_name = + _upb_DefBuilder_MakeFullName(ctx, upb_ServiceDef_FullName(s), name); + m->client_streaming = + UPB_DESC(MethodDescriptorProto_client_streaming)(method_proto); + m->server_streaming = + UPB_DESC(MethodDescriptorProto_server_streaming)(method_proto); + m->input_type = _upb_DefBuilder_Resolve( + ctx, m->full_name, m->full_name, + UPB_DESC(MethodDescriptorProto_input_type)(method_proto), + UPB_DEFTYPE_MSG); + m->output_type = _upb_DefBuilder_Resolve( + ctx, m->full_name, m->full_name, + UPB_DESC(MethodDescriptorProto_output_type)(method_proto), + UPB_DEFTYPE_MSG); -/* Generate all combinations: - * {s,o,r,p} x {b1,v4,z4,v8,z8} x {1bt,2bt} */ + UPB_DEF_SET_OPTIONS(m->opts, MethodDescriptorProto, MethodOptions, + method_proto); +} -#define F(card, type, valbytes, tagbytes) \ - UPB_NOINLINE \ - const char* upb_p##card##type##valbytes##_##tagbytes##bt(UPB_PARSE_PARAMS) { \ - FASTDECODE_VARINT(d, ptr, msg, table, hasbits, data, tagbytes, valbytes, \ - CARD_##card, type##_ZZ, \ - upb_pr##type##valbytes##_##tagbytes##bt, \ - upb_pp##type##valbytes##_##tagbytes##bt); \ +// Allocate and initialize an array of |n| method defs belonging to |s|. +upb_MethodDef* _upb_MethodDefs_New( + upb_DefBuilder* ctx, int n, + const UPB_DESC(MethodDescriptorProto) * const* protos, upb_ServiceDef* s) { + upb_MethodDef* m = _upb_DefBuilder_Alloc(ctx, sizeof(upb_MethodDef) * n); + for (int i = 0; i < n; i++) { + create_method(ctx, protos[i], s, &m[i]); + m[i].index = i; } + return m; +} -#define TYPES(card, tagbytes) \ - F(card, b, 1, tagbytes) \ - F(card, v, 4, tagbytes) \ - F(card, v, 8, tagbytes) \ - F(card, z, 4, tagbytes) \ - F(card, z, 8, tagbytes) -#define TAGBYTES(card) \ - TYPES(card, 1) \ - TYPES(card, 2) +#include +#include +#include -TAGBYTES(s) -TAGBYTES(o) -TAGBYTES(r) -TAGBYTES(p) -#undef z_ZZ -#undef b_ZZ -#undef v_ZZ -#undef o_ONEOF -#undef s_ONEOF -#undef r_ONEOF -#undef F -#undef TYPES -#undef TAGBYTES -#undef FASTDECODE_UNPACKEDVARINT -#undef FASTDECODE_PACKEDVARINT -#undef FASTDECODE_VARINT +// Must be last. -/* fixed fields ***************************************************************/ +struct upb_OneofDef { + const UPB_DESC(OneofOptions) * opts; + const upb_MessageDef* parent; + const char* full_name; + int field_count; + bool synthetic; + const upb_FieldDef** fields; + upb_strtable ntof; // lookup a field by name + upb_inttable itof; // lookup a field by number (index) +#if UINTPTR_MAX == 0xffffffff + uint32_t padding; // Increase size to a multiple of 8. +#endif +}; -#define FASTDECODE_UNPACKEDFIXED(d, ptr, msg, table, hasbits, data, tagbytes, \ - valbytes, card, packed) \ - void* dst; \ - fastdecode_arr farr; \ - \ - FASTDECODE_CHECKPACKED(tagbytes, card, packed) \ - \ - dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, valbytes, \ - card); \ - if (card == CARD_r) { \ - if (UPB_UNLIKELY(!dst)) { \ - RETURN_GENERIC("couldn't allocate array in arena\n"); \ - } \ - } \ - \ - again: \ - if (card == CARD_r) { \ - dst = fastdecode_resizearr(d, dst, &farr, valbytes); \ - } \ - \ - ptr += tagbytes; \ - memcpy(dst, ptr, valbytes); \ - ptr += valbytes; \ - \ - if (card == CARD_r) { \ - fastdecode_nextret ret = fastdecode_nextrepeated( \ - d, dst, &ptr, &farr, data, tagbytes, valbytes); \ - switch (ret.next) { \ - case FD_NEXT_SAMEFIELD: \ - dst = ret.dst; \ - goto again; \ - case FD_NEXT_OTHERFIELD: \ - data = ret.tag; \ - UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); \ - case FD_NEXT_ATLIMIT: \ - return ptr; \ - } \ - } \ - \ - UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); +upb_OneofDef* _upb_OneofDef_At(const upb_OneofDef* o, int i) { + return (upb_OneofDef*)&o[i]; +} -#define FASTDECODE_PACKEDFIXED(d, ptr, msg, table, hasbits, data, tagbytes, \ - valbytes, unpacked) \ - FASTDECODE_CHECKPACKED(tagbytes, CARD_r, unpacked) \ - \ - ptr += tagbytes; \ - int size = (uint8_t)ptr[0]; \ - ptr++; \ - if (size & 0x80) { \ - ptr = fastdecode_longsize(ptr, &size); \ - } \ - \ - if (UPB_UNLIKELY(!upb_EpsCopyInputStream_CheckDataSizeAvailable( \ - &d->input, ptr, size) || \ - (size % valbytes) != 0)) { \ - _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \ - } \ - \ - upb_Array** arr_p = fastdecode_fieldmem(msg, data); \ - upb_Array* arr = *arr_p; \ - uint8_t elem_size_lg2 = __builtin_ctz(valbytes); \ - int elems = size / valbytes; \ - \ - if (UPB_LIKELY(!arr)) { \ - *arr_p = arr = _upb_Array_New(&d->arena, elems, elem_size_lg2); \ - if (!arr) { \ - _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \ - } \ - } else { \ - _upb_Array_ResizeUninitialized(arr, elems, &d->arena); \ - } \ - \ - char* dst = _upb_array_ptr(arr); \ - memcpy(dst, ptr, size); \ - arr->size = elems; \ - \ - ptr += size; \ - UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); +const UPB_DESC(OneofOptions) * upb_OneofDef_Options(const upb_OneofDef* o) { + return o->opts; +} -#define FASTDECODE_FIXED(d, ptr, msg, table, hasbits, data, tagbytes, \ - valbytes, card, unpacked, packed) \ - if (card == CARD_p) { \ - FASTDECODE_PACKEDFIXED(d, ptr, msg, table, hasbits, data, tagbytes, \ - valbytes, unpacked); \ - } else { \ - FASTDECODE_UNPACKEDFIXED(d, ptr, msg, table, hasbits, data, tagbytes, \ - valbytes, card, packed); \ - } +bool upb_OneofDef_HasOptions(const upb_OneofDef* o) { + return o->opts != (void*)kUpbDefOptDefault; +} -/* Generate all combinations: - * {s,o,r,p} x {f4,f8} x {1bt,2bt} */ +const char* upb_OneofDef_FullName(const upb_OneofDef* o) { + return o->full_name; +} -#define F(card, valbytes, tagbytes) \ - UPB_NOINLINE \ - const char* upb_p##card##f##valbytes##_##tagbytes##bt(UPB_PARSE_PARAMS) { \ - FASTDECODE_FIXED(d, ptr, msg, table, hasbits, data, tagbytes, valbytes, \ - CARD_##card, upb_ppf##valbytes##_##tagbytes##bt, \ - upb_prf##valbytes##_##tagbytes##bt); \ - } +const char* upb_OneofDef_Name(const upb_OneofDef* o) { + return _upb_DefBuilder_FullToShort(o->full_name); +} -#define TYPES(card, tagbytes) \ - F(card, 4, tagbytes) \ - F(card, 8, tagbytes) +const upb_MessageDef* upb_OneofDef_ContainingType(const upb_OneofDef* o) { + return o->parent; +} -#define TAGBYTES(card) \ - TYPES(card, 1) \ - TYPES(card, 2) +int upb_OneofDef_FieldCount(const upb_OneofDef* o) { return o->field_count; } -TAGBYTES(s) -TAGBYTES(o) -TAGBYTES(r) -TAGBYTES(p) +const upb_FieldDef* upb_OneofDef_Field(const upb_OneofDef* o, int i) { + UPB_ASSERT(i < o->field_count); + return o->fields[i]; +} -#undef F -#undef TYPES -#undef TAGBYTES -#undef FASTDECODE_UNPACKEDFIXED -#undef FASTDECODE_PACKEDFIXED +int upb_OneofDef_numfields(const upb_OneofDef* o) { return o->field_count; } -/* string fields **************************************************************/ +uint32_t upb_OneofDef_Index(const upb_OneofDef* o) { + // Compute index in our parent's array. + return o - upb_MessageDef_Oneof(o->parent, 0); +} -typedef const char* fastdecode_copystr_func(struct upb_Decoder* d, - const char* ptr, upb_Message* msg, - const upb_MiniTable* table, - uint64_t hasbits, - upb_StringView* dst); +bool upb_OneofDef_IsSynthetic(const upb_OneofDef* o) { return o->synthetic; } -UPB_NOINLINE -static const char* fastdecode_verifyutf8(upb_Decoder* d, const char* ptr, - upb_Message* msg, intptr_t table, - uint64_t hasbits, uint64_t data) { - upb_StringView* dst = (upb_StringView*)data; - if (!_upb_Decoder_VerifyUtf8Inline(dst->data, dst->size)) { - _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_BadUtf8); +const upb_FieldDef* upb_OneofDef_LookupNameWithSize(const upb_OneofDef* o, + const char* name, + size_t size) { + upb_value val; + return upb_strtable_lookup2(&o->ntof, name, size, &val) + ? upb_value_getptr(val) + : NULL; +} + +const upb_FieldDef* upb_OneofDef_LookupName(const upb_OneofDef* o, + const char* name) { + return upb_OneofDef_LookupNameWithSize(o, name, strlen(name)); +} + +const upb_FieldDef* upb_OneofDef_LookupNumber(const upb_OneofDef* o, + uint32_t num) { + upb_value val; + return upb_inttable_lookup(&o->itof, num, &val) ? upb_value_getptr(val) + : NULL; +} + +void _upb_OneofDef_Insert(upb_DefBuilder* ctx, upb_OneofDef* o, + const upb_FieldDef* f, const char* name, + size_t size) { + o->field_count++; + if (_upb_FieldDef_IsProto3Optional(f)) o->synthetic = true; + + const int number = upb_FieldDef_Number(f); + const upb_value v = upb_value_constptr(f); + + // TODO(salo): This lookup is unfortunate because we also perform it when + // inserting into the message's table. Unfortunately that step occurs after + // this one and moving things around could be tricky so let's leave it for + // a future refactoring. + const bool number_exists = upb_inttable_lookup(&o->itof, number, NULL); + if (UPB_UNLIKELY(number_exists)) { + _upb_DefBuilder_Errf(ctx, "oneof fields have the same number (%d)", number); + } + + // TODO(salo): More redundant work happening here. + const bool name_exists = upb_strtable_lookup2(&o->ntof, name, size, NULL); + if (UPB_UNLIKELY(name_exists)) { + _upb_DefBuilder_Errf(ctx, "oneof fields have the same name (%.*s)", + (int)size, name); + } + + const bool ok = upb_inttable_insert(&o->itof, number, v, ctx->arena) && + upb_strtable_insert(&o->ntof, name, size, v, ctx->arena); + if (UPB_UNLIKELY(!ok)) { + _upb_DefBuilder_OomErr(ctx); } - UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); } -#define FASTDECODE_LONGSTRING(d, ptr, msg, table, hasbits, dst, validate_utf8) \ - int size = (uint8_t)ptr[0]; /* Could plumb through hasbits. */ \ - ptr++; \ - if (size & 0x80) { \ - ptr = fastdecode_longsize(ptr, &size); \ - } \ - \ - if (UPB_UNLIKELY(!upb_EpsCopyInputStream_CheckSize(&d->input, ptr, size))) { \ - dst->size = 0; \ - _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \ - } \ - \ - const char* s_ptr = ptr; \ - ptr = upb_EpsCopyInputStream_ReadString(&d->input, &s_ptr, size, &d->arena); \ - if (!ptr) _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); \ - dst->data = s_ptr; \ - dst->size = size; \ - \ - if (validate_utf8) { \ - data = (uint64_t)dst; \ - UPB_MUSTTAIL return fastdecode_verifyutf8(UPB_PARSE_ARGS); \ - } else { \ - UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); \ +// Returns the synthetic count. +size_t _upb_OneofDefs_Finalize(upb_DefBuilder* ctx, upb_MessageDef* m) { + int synthetic_count = 0; + + for (int i = 0; i < upb_MessageDef_OneofCount(m); i++) { + upb_OneofDef* o = (upb_OneofDef*)upb_MessageDef_Oneof(m, i); + + if (o->synthetic && o->field_count != 1) { + _upb_DefBuilder_Errf(ctx, + "Synthetic oneofs must have one field, not %d: %s", + o->field_count, upb_OneofDef_Name(o)); + } + + if (o->synthetic) { + synthetic_count++; + } else if (synthetic_count != 0) { + _upb_DefBuilder_Errf( + ctx, "Synthetic oneofs must be after all other oneofs: %s", + upb_OneofDef_Name(o)); + } + + o->fields = + _upb_DefBuilder_Alloc(ctx, sizeof(upb_FieldDef*) * o->field_count); + o->field_count = 0; } -UPB_NOINLINE -static const char* fastdecode_longstring_utf8(struct upb_Decoder* d, - const char* ptr, upb_Message* msg, - intptr_t table, uint64_t hasbits, - uint64_t data) { - upb_StringView* dst = (upb_StringView*)data; - FASTDECODE_LONGSTRING(d, ptr, msg, table, hasbits, dst, true); + for (int i = 0; i < upb_MessageDef_FieldCount(m); i++) { + const upb_FieldDef* f = upb_MessageDef_Field(m, i); + upb_OneofDef* o = (upb_OneofDef*)upb_FieldDef_ContainingOneof(f); + if (o) { + o->fields[o->field_count++] = f; + } + } + + return synthetic_count; } -UPB_NOINLINE -static const char* fastdecode_longstring_noutf8( - struct upb_Decoder* d, const char* ptr, upb_Message* msg, intptr_t table, - uint64_t hasbits, uint64_t data) { - upb_StringView* dst = (upb_StringView*)data; - FASTDECODE_LONGSTRING(d, ptr, msg, table, hasbits, dst, false); +static void create_oneofdef(upb_DefBuilder* ctx, upb_MessageDef* m, + const UPB_DESC(OneofDescriptorProto) * oneof_proto, + const upb_OneofDef* _o) { + upb_OneofDef* o = (upb_OneofDef*)_o; + upb_StringView name = UPB_DESC(OneofDescriptorProto_name)(oneof_proto); + + o->parent = m; + o->full_name = + _upb_DefBuilder_MakeFullName(ctx, upb_MessageDef_FullName(m), name); + o->field_count = 0; + o->synthetic = false; + + UPB_DEF_SET_OPTIONS(o->opts, OneofDescriptorProto, OneofOptions, oneof_proto); + + if (upb_MessageDef_FindByNameWithSize(m, name.data, name.size, NULL, NULL)) { + _upb_DefBuilder_Errf(ctx, "duplicate oneof name (%s)", o->full_name); + } + + upb_value v = _upb_DefType_Pack(o, UPB_DEFTYPE_ONEOF); + bool ok = _upb_MessageDef_Insert(m, name.data, name.size, v, ctx->arena); + if (!ok) _upb_DefBuilder_OomErr(ctx); + + ok = upb_inttable_init(&o->itof, ctx->arena); + if (!ok) _upb_DefBuilder_OomErr(ctx); + + ok = upb_strtable_init(&o->ntof, 4, ctx->arena); + if (!ok) _upb_DefBuilder_OomErr(ctx); } -UPB_FORCEINLINE -static void fastdecode_docopy(upb_Decoder* d, const char* ptr, uint32_t size, - int copy, char* data, upb_StringView* dst) { - d->arena.head.ptr += copy; - dst->data = data; - UPB_UNPOISON_MEMORY_REGION(data, copy); - memcpy(data, ptr, copy); - UPB_POISON_MEMORY_REGION(data + size, copy - size); +// Allocate and initialize an array of |n| oneof defs. +upb_OneofDef* _upb_OneofDefs_New( + upb_DefBuilder* ctx, int n, + const UPB_DESC(OneofDescriptorProto) * const* protos, upb_MessageDef* m) { + _upb_DefType_CheckPadding(sizeof(upb_OneofDef)); + + upb_OneofDef* o = _upb_DefBuilder_Alloc(ctx, sizeof(upb_OneofDef) * n); + for (int i = 0; i < n; i++) { + create_oneofdef(ctx, m, protos[i], &o[i]); + } + return o; } -#define FASTDECODE_COPYSTRING(d, ptr, msg, table, hasbits, data, tagbytes, \ - card, validate_utf8) \ - upb_StringView* dst; \ - fastdecode_arr farr; \ - int64_t size; \ - size_t arena_has; \ - size_t common_has; \ - char* buf; \ - \ - UPB_ASSERT(!upb_EpsCopyInputStream_AliasingAvailable(&d->input, ptr, 0)); \ - UPB_ASSERT(fastdecode_checktag(data, tagbytes)); \ - \ - dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, \ - sizeof(upb_StringView), card); \ - \ - again: \ - if (card == CARD_r) { \ - dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_StringView)); \ - } \ - \ - size = (uint8_t)ptr[tagbytes]; \ - ptr += tagbytes + 1; \ - dst->size = size; \ - \ - buf = d->arena.head.ptr; \ - arena_has = _upb_ArenaHas(&d->arena); \ - common_has = UPB_MIN(arena_has, \ - upb_EpsCopyInputStream_BytesAvailable(&d->input, ptr)); \ - \ - if (UPB_LIKELY(size <= 15 - tagbytes)) { \ - if (arena_has < 16) goto longstr; \ - d->arena.head.ptr += 16; \ - memcpy(buf, ptr - tagbytes - 1, 16); \ - dst->data = buf + tagbytes + 1; \ - } else if (UPB_LIKELY(size <= 32)) { \ - if (UPB_UNLIKELY(common_has < 32)) goto longstr; \ - fastdecode_docopy(d, ptr, size, 32, buf, dst); \ - } else if (UPB_LIKELY(size <= 64)) { \ - if (UPB_UNLIKELY(common_has < 64)) goto longstr; \ - fastdecode_docopy(d, ptr, size, 64, buf, dst); \ - } else if (UPB_LIKELY(size < 128)) { \ - if (UPB_UNLIKELY(common_has < 128)) goto longstr; \ - fastdecode_docopy(d, ptr, size, 128, buf, dst); \ - } else { \ - goto longstr; \ - } \ - \ - ptr += size; \ - \ - if (card == CARD_r) { \ - if (validate_utf8 && \ - !_upb_Decoder_VerifyUtf8Inline(dst->data, dst->size)) { \ - _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_BadUtf8); \ - } \ - fastdecode_nextret ret = fastdecode_nextrepeated( \ - d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_StringView)); \ - switch (ret.next) { \ - case FD_NEXT_SAMEFIELD: \ - dst = ret.dst; \ - goto again; \ - case FD_NEXT_OTHERFIELD: \ - data = ret.tag; \ - UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); \ - case FD_NEXT_ATLIMIT: \ - return ptr; \ - } \ - } \ - \ - if (card != CARD_r && validate_utf8) { \ - data = (uint64_t)dst; \ - UPB_MUSTTAIL return fastdecode_verifyutf8(UPB_PARSE_ARGS); \ - } \ - \ - UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); \ - \ - longstr: \ - if (card == CARD_r) { \ - fastdecode_commitarr(dst + 1, &farr, sizeof(upb_StringView)); \ - } \ - ptr--; \ - if (validate_utf8) { \ - UPB_MUSTTAIL return fastdecode_longstring_utf8(d, ptr, msg, table, \ - hasbits, (uint64_t)dst); \ - } else { \ - UPB_MUSTTAIL return fastdecode_longstring_noutf8(d, ptr, msg, table, \ - hasbits, (uint64_t)dst); \ - } -#define FASTDECODE_STRING(d, ptr, msg, table, hasbits, data, tagbytes, card, \ - copyfunc, validate_utf8) \ - upb_StringView* dst; \ - fastdecode_arr farr; \ - int64_t size; \ - \ - if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { \ - RETURN_GENERIC("string field tag mismatch\n"); \ - } \ - \ - if (UPB_UNLIKELY( \ - !upb_EpsCopyInputStream_AliasingAvailable(&d->input, ptr, 0))) { \ - UPB_MUSTTAIL return copyfunc(UPB_PARSE_ARGS); \ - } \ - \ - dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, \ - sizeof(upb_StringView), card); \ - \ - again: \ - if (card == CARD_r) { \ - dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_StringView)); \ - } \ - \ - size = (int8_t)ptr[tagbytes]; \ - ptr += tagbytes + 1; \ - \ - if (UPB_UNLIKELY( \ - !upb_EpsCopyInputStream_AliasingAvailable(&d->input, ptr, size))) { \ - ptr--; \ - if (validate_utf8) { \ - return fastdecode_longstring_utf8(d, ptr, msg, table, hasbits, \ - (uint64_t)dst); \ - } else { \ - return fastdecode_longstring_noutf8(d, ptr, msg, table, hasbits, \ - (uint64_t)dst); \ - } \ - } \ - \ - dst->data = ptr; \ - dst->size = size; \ - ptr = upb_EpsCopyInputStream_ReadStringAliased(&d->input, &dst->data, \ - dst->size); \ - \ - if (card == CARD_r) { \ - if (validate_utf8 && \ - !_upb_Decoder_VerifyUtf8Inline(dst->data, dst->size)) { \ - _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_BadUtf8); \ - } \ - fastdecode_nextret ret = fastdecode_nextrepeated( \ - d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_StringView)); \ - switch (ret.next) { \ - case FD_NEXT_SAMEFIELD: \ - dst = ret.dst; \ - goto again; \ - case FD_NEXT_OTHERFIELD: \ - data = ret.tag; \ - UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); \ - case FD_NEXT_ATLIMIT: \ - return ptr; \ - } \ - } \ - \ - if (card != CARD_r && validate_utf8) { \ - data = (uint64_t)dst; \ - UPB_MUSTTAIL return fastdecode_verifyutf8(UPB_PARSE_ARGS); \ - } \ - \ - UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); -/* Generate all combinations: - * {p,c} x {s,o,r} x {s, b} x {1bt,2bt} */ +// Must be last. -#define s_VALIDATE true -#define b_VALIDATE false +struct upb_ServiceDef { + const UPB_DESC(ServiceOptions) * opts; + const upb_FileDef* file; + const char* full_name; + upb_MethodDef* methods; + int method_count; + int index; +}; -#define F(card, tagbytes, type) \ - UPB_NOINLINE \ - const char* upb_c##card##type##_##tagbytes##bt(UPB_PARSE_PARAMS) { \ - FASTDECODE_COPYSTRING(d, ptr, msg, table, hasbits, data, tagbytes, \ - CARD_##card, type##_VALIDATE); \ - } \ - const char* upb_p##card##type##_##tagbytes##bt(UPB_PARSE_PARAMS) { \ - FASTDECODE_STRING(d, ptr, msg, table, hasbits, data, tagbytes, \ - CARD_##card, upb_c##card##type##_##tagbytes##bt, \ - type##_VALIDATE); \ - } +upb_ServiceDef* _upb_ServiceDef_At(const upb_ServiceDef* s, int index) { + return (upb_ServiceDef*)&s[index]; +} -#define UTF8(card, tagbytes) \ - F(card, tagbytes, s) \ - F(card, tagbytes, b) +const UPB_DESC(ServiceOptions) * + upb_ServiceDef_Options(const upb_ServiceDef* s) { + return s->opts; +} -#define TAGBYTES(card) \ - UTF8(card, 1) \ - UTF8(card, 2) +bool upb_ServiceDef_HasOptions(const upb_ServiceDef* s) { + return s->opts != (void*)kUpbDefOptDefault; +} -TAGBYTES(s) -TAGBYTES(o) -TAGBYTES(r) +const char* upb_ServiceDef_FullName(const upb_ServiceDef* s) { + return s->full_name; +} -#undef s_VALIDATE -#undef b_VALIDATE -#undef F -#undef TAGBYTES -#undef FASTDECODE_LONGSTRING -#undef FASTDECODE_COPYSTRING -#undef FASTDECODE_STRING +const char* upb_ServiceDef_Name(const upb_ServiceDef* s) { + return _upb_DefBuilder_FullToShort(s->full_name); +} -/* message fields *************************************************************/ +int upb_ServiceDef_Index(const upb_ServiceDef* s) { return s->index; } -UPB_INLINE -upb_Message* decode_newmsg_ceil(upb_Decoder* d, const upb_MiniTable* l, - int msg_ceil_bytes) { - size_t size = l->size + sizeof(upb_Message_Internal); - char* msg_data; - if (UPB_LIKELY(msg_ceil_bytes > 0 && - _upb_ArenaHas(&d->arena) >= msg_ceil_bytes)) { - UPB_ASSERT(size <= (size_t)msg_ceil_bytes); - msg_data = d->arena.head.ptr; - d->arena.head.ptr += size; - UPB_UNPOISON_MEMORY_REGION(msg_data, msg_ceil_bytes); - memset(msg_data, 0, msg_ceil_bytes); - UPB_POISON_MEMORY_REGION(msg_data + size, msg_ceil_bytes - size); - } else { - msg_data = (char*)upb_Arena_Malloc(&d->arena, size); - memset(msg_data, 0, size); - } - return msg_data + sizeof(upb_Message_Internal); +const upb_FileDef* upb_ServiceDef_File(const upb_ServiceDef* s) { + return s->file; } -typedef struct { - intptr_t table; - upb_Message* msg; -} fastdecode_submsgdata; +int upb_ServiceDef_MethodCount(const upb_ServiceDef* s) { + return s->method_count; +} -UPB_FORCEINLINE -static const char* fastdecode_tosubmsg(upb_EpsCopyInputStream* e, - const char* ptr, void* ctx) { - upb_Decoder* d = (upb_Decoder*)e; - fastdecode_submsgdata* submsg = ctx; - ptr = fastdecode_dispatch(d, ptr, submsg->msg, submsg->table, 0, 0); - UPB_ASSUME(ptr != NULL); - return ptr; +const upb_MethodDef* upb_ServiceDef_Method(const upb_ServiceDef* s, int i) { + return (i < 0 || i >= s->method_count) ? NULL + : _upb_MethodDef_At(s->methods, i); } -#define FASTDECODE_SUBMSG(d, ptr, msg, table, hasbits, data, tagbytes, \ - msg_ceil_bytes, card) \ - \ - if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { \ - RETURN_GENERIC("submessage field tag mismatch\n"); \ - } \ - \ - if (--d->depth == 0) { \ - _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_MaxDepthExceeded); \ - } \ - \ - upb_Message** dst; \ - uint32_t submsg_idx = (data >> 16) & 0xff; \ - const upb_MiniTable* tablep = decode_totablep(table); \ - const upb_MiniTable* subtablep = tablep->subs[submsg_idx].submsg; \ - fastdecode_submsgdata submsg = {decode_totable(subtablep)}; \ - fastdecode_arr farr; \ - \ - if (subtablep->table_mask == (uint8_t)-1) { \ - RETURN_GENERIC("submessage doesn't have fast tables."); \ - } \ - \ - dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, \ - sizeof(upb_Message*), card); \ - \ - if (card == CARD_s) { \ - *(uint32_t*)msg |= hasbits; \ - hasbits = 0; \ - } \ - \ - again: \ - if (card == CARD_r) { \ - dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_Message*)); \ - } \ - \ - submsg.msg = *dst; \ - \ - if (card == CARD_r || UPB_LIKELY(!submsg.msg)) { \ - *dst = submsg.msg = decode_newmsg_ceil(d, subtablep, msg_ceil_bytes); \ - } \ - \ - ptr += tagbytes; \ - ptr = fastdecode_delimited(d, ptr, fastdecode_tosubmsg, &submsg); \ - \ - if (UPB_UNLIKELY(ptr == NULL || d->end_group != DECODE_NOGROUP)) { \ - _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \ - } \ - \ - if (card == CARD_r) { \ - fastdecode_nextret ret = fastdecode_nextrepeated( \ - d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_Message*)); \ - switch (ret.next) { \ - case FD_NEXT_SAMEFIELD: \ - dst = ret.dst; \ - goto again; \ - case FD_NEXT_OTHERFIELD: \ - d->depth++; \ - data = ret.tag; \ - UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); \ - case FD_NEXT_ATLIMIT: \ - d->depth++; \ - return ptr; \ - } \ - } \ - \ - d->depth++; \ - UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); - -#define F(card, tagbytes, size_ceil, ceil_arg) \ - const char* upb_p##card##m_##tagbytes##bt_max##size_ceil##b( \ - UPB_PARSE_PARAMS) { \ - FASTDECODE_SUBMSG(d, ptr, msg, table, hasbits, data, tagbytes, ceil_arg, \ - CARD_##card); \ +const upb_MethodDef* upb_ServiceDef_FindMethodByName(const upb_ServiceDef* s, + const char* name) { + for (int i = 0; i < s->method_count; i++) { + const upb_MethodDef* m = _upb_MethodDef_At(s->methods, i); + if (strcmp(name, upb_MethodDef_Name(m)) == 0) { + return m; + } } + return NULL; +} -#define SIZES(card, tagbytes) \ - F(card, tagbytes, 64, 64) \ - F(card, tagbytes, 128, 128) \ - F(card, tagbytes, 192, 192) \ - F(card, tagbytes, 256, 256) \ - F(card, tagbytes, max, -1) +static void create_service(upb_DefBuilder* ctx, + const UPB_DESC(ServiceDescriptorProto) * svc_proto, + upb_ServiceDef* s) { + upb_StringView name; + size_t n; -#define TAGBYTES(card) \ - SIZES(card, 1) \ - SIZES(card, 2) + // Must happen before _upb_DefBuilder_Add() + s->file = _upb_DefBuilder_File(ctx); -TAGBYTES(s) -TAGBYTES(o) -TAGBYTES(r) + name = UPB_DESC(ServiceDescriptorProto_name)(svc_proto); + const char* package = _upb_FileDef_RawPackage(s->file); + s->full_name = _upb_DefBuilder_MakeFullName(ctx, package, name); + _upb_DefBuilder_Add(ctx, s->full_name, + _upb_DefType_Pack(s, UPB_DEFTYPE_SERVICE)); -#undef TAGBYTES -#undef SIZES -#undef F -#undef FASTDECODE_SUBMSG + const UPB_DESC(MethodDescriptorProto)* const* methods = + UPB_DESC(ServiceDescriptorProto_method)(svc_proto, &n); + s->method_count = n; + s->methods = _upb_MethodDefs_New(ctx, n, methods, s); -#endif /* UPB_FASTTABLE */ + UPB_DEF_SET_OPTIONS(s->opts, ServiceDescriptorProto, ServiceOptions, + svc_proto); +} -// We encode backwards, to avoid pre-computing lengths (one-pass encode). +upb_ServiceDef* _upb_ServiceDefs_New( + upb_DefBuilder* ctx, int n, + const UPB_DESC(ServiceDescriptorProto) * const* protos) { + _upb_DefType_CheckPadding(sizeof(upb_ServiceDef)); + + upb_ServiceDef* s = _upb_DefBuilder_Alloc(ctx, sizeof(upb_ServiceDef) * n); + for (int i = 0; i < n; i++) { + create_service(ctx, protos[i], &s[i]); + s[i].index = i; + } + return s; +} #include @@ -12036,2170 +11254,2962 @@ TAGBYTES(r) // Must be last. -#define UPB_PB_VARINT_MAX_LEN 10 - -UPB_NOINLINE -static size_t encode_varint64(uint64_t val, char* buf) { - size_t i = 0; - do { - uint8_t byte = val & 0x7fU; - val >>= 7; - if (val) byte |= 0x80U; - buf[i++] = byte; - } while (val); - return i; -} +// A few fake field types for our tables. +enum { + kUpb_FakeFieldType_FieldNotFound = 0, + kUpb_FakeFieldType_MessageSetItem = 19, +}; -static uint32_t encode_zz32(int32_t n) { - return ((uint32_t)n << 1) ^ (n >> 31); -} -static uint64_t encode_zz64(int64_t n) { - return ((uint64_t)n << 1) ^ (n >> 63); -} +// DecodeOp: an action to be performed for a wire-type/field-type combination. +enum { + // Special ops: we don't write data to regular fields for these. + kUpb_DecodeOp_UnknownField = -1, + kUpb_DecodeOp_MessageSetItem = -2, -typedef struct { - upb_EncodeStatus status; - jmp_buf err; - upb_Arena* arena; - char *buf, *ptr, *limit; - int options; - int depth; - _upb_mapsorter sorter; -} upb_encstate; + // Scalar-only ops. + kUpb_DecodeOp_Scalar1Byte = 0, + kUpb_DecodeOp_Scalar4Byte = 2, + kUpb_DecodeOp_Scalar8Byte = 3, + kUpb_DecodeOp_Enum = 1, -static size_t upb_roundup_pow2(size_t bytes) { - size_t ret = 128; - while (ret < bytes) { - ret *= 2; - } - return ret; -} + // Scalar/repeated ops. + kUpb_DecodeOp_String = 4, + kUpb_DecodeOp_Bytes = 5, + kUpb_DecodeOp_SubMessage = 6, -UPB_NORETURN static void encode_err(upb_encstate* e, upb_EncodeStatus s) { - UPB_ASSERT(s != kUpb_EncodeStatus_Ok); - e->status = s; - UPB_LONGJMP(e->err, 1); -} + // Repeated-only ops (also see macros below). + kUpb_DecodeOp_PackedEnum = 13, +}; -UPB_NOINLINE -static void encode_growbuffer(upb_encstate* e, size_t bytes) { - size_t old_size = e->limit - e->buf; - size_t new_size = upb_roundup_pow2(bytes + (e->limit - e->ptr)); - char* new_buf = upb_Arena_Realloc(e->arena, e->buf, old_size, new_size); +// For packed fields it is helpful to be able to recover the lg2 of the data +// size from the op. +#define OP_FIXPCK_LG2(n) (n + 5) /* n in [2, 3] => op in [7, 8] */ +#define OP_VARPCK_LG2(n) (n + 9) /* n in [0, 2, 3] => op in [9, 11, 12] */ - if (!new_buf) encode_err(e, kUpb_EncodeStatus_OutOfMemory); +typedef union { + bool bool_val; + uint32_t uint32_val; + uint64_t uint64_val; + uint32_t size; +} wireval; - // We want previous data at the end, realloc() put it at the beginning. - // TODO(salo): This is somewhat inefficient since we are copying twice. - // Maybe create a realloc() that copies to the end of the new buffer? - if (old_size > 0) { - memmove(new_buf + new_size - old_size, e->buf, old_size); - } +static const char* _upb_Decoder_DecodeMessage(upb_Decoder* d, const char* ptr, + upb_Message* msg, + const upb_MiniTable* layout); - e->ptr = new_buf + new_size - (e->limit - e->ptr); - e->limit = new_buf + new_size; - e->buf = new_buf; +UPB_NORETURN static void* _upb_Decoder_ErrorJmp(upb_Decoder* d, + upb_DecodeStatus status) { + assert(status != kUpb_DecodeStatus_Ok); + d->status = status; + UPB_LONGJMP(d->err, 1); +} - e->ptr -= bytes; +const char* _upb_FastDecoder_ErrorJmp(upb_Decoder* d, int status) { + assert(status != kUpb_DecodeStatus_Ok); + d->status = status; + UPB_LONGJMP(d->err, 1); + return NULL; } -/* Call to ensure that at least "bytes" bytes are available for writing at - * e->ptr. Returns false if the bytes could not be allocated. */ -UPB_FORCEINLINE -static void encode_reserve(upb_encstate* e, size_t bytes) { - if ((size_t)(e->ptr - e->buf) < bytes) { - encode_growbuffer(e, bytes); - return; +static void _upb_Decoder_VerifyUtf8(upb_Decoder* d, const char* buf, int len) { + if (!_upb_Decoder_VerifyUtf8Inline(buf, len)) { + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_BadUtf8); } - - e->ptr -= bytes; } -/* Writes the given bytes to the buffer, handling reserve/advance. */ -static void encode_bytes(upb_encstate* e, const void* data, size_t len) { - if (len == 0) return; /* memcpy() with zero size is UB */ - encode_reserve(e, len); - memcpy(e->ptr, data, len); +static bool _upb_Decoder_Reserve(upb_Decoder* d, upb_Array* arr, size_t elem) { + bool need_realloc = arr->capacity - arr->size < elem; + if (need_realloc && !_upb_array_realloc(arr, arr->size + elem, &d->arena)) { + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); + } + return need_realloc; } -static void encode_fixed64(upb_encstate* e, uint64_t val) { - val = _upb_BigEndian_Swap64(val); - encode_bytes(e, &val, sizeof(uint64_t)); -} - -static void encode_fixed32(upb_encstate* e, uint32_t val) { - val = _upb_BigEndian_Swap32(val); - encode_bytes(e, &val, sizeof(uint32_t)); -} +typedef struct { + const char* ptr; + uint64_t val; +} _upb_DecodeLongVarintReturn; UPB_NOINLINE -static void encode_longvarint(upb_encstate* e, uint64_t val) { - size_t len; - char* start; - - encode_reserve(e, UPB_PB_VARINT_MAX_LEN); - len = encode_varint64(val, e->ptr); - start = e->ptr + UPB_PB_VARINT_MAX_LEN - len; - memmove(start, e->ptr, len); - e->ptr = start; +static _upb_DecodeLongVarintReturn _upb_Decoder_DecodeLongVarint( + const char* ptr, uint64_t val) { + _upb_DecodeLongVarintReturn ret = {NULL, 0}; + uint64_t byte; + int i; + for (i = 1; i < 10; i++) { + byte = (uint8_t)ptr[i]; + val += (byte - 1) << (i * 7); + if (!(byte & 0x80)) { + ret.ptr = ptr + i + 1; + ret.val = val; + return ret; + } + } + return ret; } UPB_FORCEINLINE -static void encode_varint(upb_encstate* e, uint64_t val) { - if (val < 128 && e->ptr != e->buf) { - --e->ptr; - *e->ptr = val; +static const char* _upb_Decoder_DecodeVarint(upb_Decoder* d, const char* ptr, + uint64_t* val) { + uint64_t byte = (uint8_t)*ptr; + if (UPB_LIKELY((byte & 0x80) == 0)) { + *val = byte; + return ptr + 1; } else { - encode_longvarint(e, val); + _upb_DecodeLongVarintReturn res = _upb_Decoder_DecodeLongVarint(ptr, byte); + if (!res.ptr) _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); + *val = res.val; + return res.ptr; } } -static void encode_double(upb_encstate* e, double d) { - uint64_t u64; - UPB_ASSERT(sizeof(double) == sizeof(uint64_t)); - memcpy(&u64, &d, sizeof(uint64_t)); - encode_fixed64(e, u64); -} - -static void encode_float(upb_encstate* e, float d) { - uint32_t u32; - UPB_ASSERT(sizeof(float) == sizeof(uint32_t)); - memcpy(&u32, &d, sizeof(uint32_t)); - encode_fixed32(e, u32); -} - -static void encode_tag(upb_encstate* e, uint32_t field_number, - uint8_t wire_type) { - encode_varint(e, (field_number << 3) | wire_type); -} - -static void encode_fixedarray(upb_encstate* e, const upb_Array* arr, - size_t elem_size, uint32_t tag) { - size_t bytes = arr->size * elem_size; - const char* data = _upb_array_constptr(arr); - const char* ptr = data + bytes - elem_size; - - if (tag || !_upb_IsLittleEndian()) { - while (true) { - if (elem_size == 4) { - uint32_t val; - memcpy(&val, ptr, sizeof(val)); - val = _upb_BigEndian_Swap32(val); - encode_bytes(e, &val, elem_size); - } else { - UPB_ASSERT(elem_size == 8); - uint64_t val; - memcpy(&val, ptr, sizeof(val)); - val = _upb_BigEndian_Swap64(val); - encode_bytes(e, &val, elem_size); - } - - if (tag) encode_varint(e, tag); - if (ptr == data) break; - ptr -= elem_size; - } +UPB_FORCEINLINE +static const char* _upb_Decoder_DecodeTag(upb_Decoder* d, const char* ptr, + uint32_t* val) { + uint64_t byte = (uint8_t)*ptr; + if (UPB_LIKELY((byte & 0x80) == 0)) { + *val = byte; + return ptr + 1; } else { - encode_bytes(e, data, bytes); + const char* start = ptr; + _upb_DecodeLongVarintReturn res = _upb_Decoder_DecodeLongVarint(ptr, byte); + if (!res.ptr || res.ptr - start > 5 || res.val > UINT32_MAX) { + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); + } + *val = res.val; + return res.ptr; } } -static void encode_message(upb_encstate* e, const upb_Message* msg, - const upb_MiniTable* m, size_t* size); - -static void encode_TaggedMessagePtr(upb_encstate* e, - upb_TaggedMessagePtr tagged, - const upb_MiniTable* m, size_t* size) { - if (upb_TaggedMessagePtr_IsEmpty(tagged)) { - m = &_kUpb_MiniTable_Empty; +UPB_FORCEINLINE +static const char* upb_Decoder_DecodeSize(upb_Decoder* d, const char* ptr, + uint32_t* size) { + uint64_t size64; + ptr = _upb_Decoder_DecodeVarint(d, ptr, &size64); + if (size64 >= INT32_MAX || + !upb_EpsCopyInputStream_CheckSize(&d->input, ptr, (int)size64)) { + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); } - encode_message(e, _upb_TaggedMessagePtr_GetMessage(tagged), m, size); + *size = size64; + return ptr; } -static void encode_scalar(upb_encstate* e, const void* _field_mem, - const upb_MiniTableSub* subs, - const upb_MiniTableField* f) { - const char* field_mem = _field_mem; - int wire_type; - -#define CASE(ctype, type, wtype, encodeval) \ - { \ - ctype val = *(ctype*)field_mem; \ - encode_##type(e, encodeval); \ - wire_type = wtype; \ - break; \ +static void _upb_Decoder_MungeInt32(wireval* val) { + if (!_upb_IsLittleEndian()) { + /* The next stage will memcpy(dst, &val, 4) */ + val->uint32_val = val->uint64_val; } +} - switch (f->UPB_PRIVATE(descriptortype)) { - case kUpb_FieldType_Double: - CASE(double, double, kUpb_WireType_64Bit, val); - case kUpb_FieldType_Float: - CASE(float, float, kUpb_WireType_32Bit, val); - case kUpb_FieldType_Int64: - case kUpb_FieldType_UInt64: - CASE(uint64_t, varint, kUpb_WireType_Varint, val); - case kUpb_FieldType_UInt32: - CASE(uint32_t, varint, kUpb_WireType_Varint, val); - case kUpb_FieldType_Int32: - case kUpb_FieldType_Enum: - CASE(int32_t, varint, kUpb_WireType_Varint, (int64_t)val); - case kUpb_FieldType_SFixed64: - case kUpb_FieldType_Fixed64: - CASE(uint64_t, fixed64, kUpb_WireType_64Bit, val); - case kUpb_FieldType_Fixed32: - case kUpb_FieldType_SFixed32: - CASE(uint32_t, fixed32, kUpb_WireType_32Bit, val); +static void _upb_Decoder_Munge(int type, wireval* val) { + switch (type) { case kUpb_FieldType_Bool: - CASE(bool, varint, kUpb_WireType_Varint, val); - case kUpb_FieldType_SInt32: - CASE(int32_t, varint, kUpb_WireType_Varint, encode_zz32(val)); - case kUpb_FieldType_SInt64: - CASE(int64_t, varint, kUpb_WireType_Varint, encode_zz64(val)); - case kUpb_FieldType_String: - case kUpb_FieldType_Bytes: { - upb_StringView view = *(upb_StringView*)field_mem; - encode_bytes(e, view.data, view.size); - encode_varint(e, view.size); - wire_type = kUpb_WireType_Delimited; + val->bool_val = val->uint64_val != 0; break; - } - case kUpb_FieldType_Group: { - size_t size; - upb_TaggedMessagePtr submsg = *(upb_TaggedMessagePtr*)field_mem; - const upb_MiniTable* subm = subs[f->UPB_PRIVATE(submsg_index)].submsg; - if (submsg == 0) { - return; - } - if (--e->depth == 0) encode_err(e, kUpb_EncodeStatus_MaxDepthExceeded); - encode_tag(e, f->number, kUpb_WireType_EndGroup); - encode_TaggedMessagePtr(e, submsg, subm, &size); - wire_type = kUpb_WireType_StartGroup; - e->depth++; + case kUpb_FieldType_SInt32: { + uint32_t n = val->uint64_val; + val->uint32_val = (n >> 1) ^ -(int32_t)(n & 1); break; } - case kUpb_FieldType_Message: { - size_t size; - upb_TaggedMessagePtr submsg = *(upb_TaggedMessagePtr*)field_mem; - const upb_MiniTable* subm = subs[f->UPB_PRIVATE(submsg_index)].submsg; - if (submsg == 0) { - return; - } - if (--e->depth == 0) encode_err(e, kUpb_EncodeStatus_MaxDepthExceeded); - encode_TaggedMessagePtr(e, submsg, subm, &size); - encode_varint(e, size); - wire_type = kUpb_WireType_Delimited; - e->depth++; + case kUpb_FieldType_SInt64: { + uint64_t n = val->uint64_val; + val->uint64_val = (n >> 1) ^ -(int64_t)(n & 1); break; } - default: - UPB_UNREACHABLE(); + case kUpb_FieldType_Int32: + case kUpb_FieldType_UInt32: + case kUpb_FieldType_Enum: + _upb_Decoder_MungeInt32(val); + break; } -#undef CASE - - encode_tag(e, f->number, wire_type); } -static void encode_array(upb_encstate* e, const upb_Message* msg, - const upb_MiniTableSub* subs, - const upb_MiniTableField* f) { - const upb_Array* arr = *UPB_PTR_AT(msg, f->offset, upb_Array*); - bool packed = f->mode & kUpb_LabelFlags_IsPacked; - size_t pre_len = e->limit - e->ptr; +static upb_Message* _upb_Decoder_NewSubMessage(upb_Decoder* d, + const upb_MiniTableSub* subs, + const upb_MiniTableField* field, + upb_TaggedMessagePtr* target) { + const upb_MiniTable* subl = subs[field->UPB_PRIVATE(submsg_index)].submsg; + UPB_ASSERT(subl); + upb_Message* msg = _upb_Message_New(subl, &d->arena); + if (!msg) _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); - if (arr == NULL || arr->size == 0) { - return; - } + // Extensions should not be unlinked. A message extension should not be + // registered until its sub-message type is available to be linked. + bool is_empty = subl == &_kUpb_MiniTable_Empty; + bool is_extension = field->mode & kUpb_LabelFlags_IsExtension; + UPB_ASSERT(!(is_empty && is_extension)); -#define VARINT_CASE(ctype, encode) \ - { \ - const ctype* start = _upb_array_constptr(arr); \ - const ctype* ptr = start + arr->size; \ - uint32_t tag = packed ? 0 : (f->number << 3) | kUpb_WireType_Varint; \ - do { \ - ptr--; \ - encode_varint(e, encode); \ - if (tag) encode_varint(e, tag); \ - } while (ptr != start); \ - } \ - break; + if (is_empty && !(d->options & kUpb_DecodeOption_ExperimentalAllowUnlinked)) { + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_UnlinkedSubMessage); + } -#define TAG(wire_type) (packed ? 0 : (f->number << 3 | wire_type)) + upb_TaggedMessagePtr tagged = _upb_TaggedMessagePtr_Pack(msg, is_empty); + memcpy(target, &tagged, sizeof(tagged)); + return msg; +} - switch (f->UPB_PRIVATE(descriptortype)) { - case kUpb_FieldType_Double: - encode_fixedarray(e, arr, sizeof(double), TAG(kUpb_WireType_64Bit)); - break; - case kUpb_FieldType_Float: - encode_fixedarray(e, arr, sizeof(float), TAG(kUpb_WireType_32Bit)); - break; - case kUpb_FieldType_SFixed64: - case kUpb_FieldType_Fixed64: - encode_fixedarray(e, arr, sizeof(uint64_t), TAG(kUpb_WireType_64Bit)); - break; - case kUpb_FieldType_Fixed32: - case kUpb_FieldType_SFixed32: - encode_fixedarray(e, arr, sizeof(uint32_t), TAG(kUpb_WireType_32Bit)); - break; - case kUpb_FieldType_Int64: - case kUpb_FieldType_UInt64: - VARINT_CASE(uint64_t, *ptr); - case kUpb_FieldType_UInt32: - VARINT_CASE(uint32_t, *ptr); - case kUpb_FieldType_Int32: - case kUpb_FieldType_Enum: - VARINT_CASE(int32_t, (int64_t)*ptr); - case kUpb_FieldType_Bool: - VARINT_CASE(bool, *ptr); - case kUpb_FieldType_SInt32: - VARINT_CASE(int32_t, encode_zz32(*ptr)); - case kUpb_FieldType_SInt64: - VARINT_CASE(int64_t, encode_zz64(*ptr)); - case kUpb_FieldType_String: - case kUpb_FieldType_Bytes: { - const upb_StringView* start = _upb_array_constptr(arr); - const upb_StringView* ptr = start + arr->size; - do { - ptr--; - encode_bytes(e, ptr->data, ptr->size); - encode_varint(e, ptr->size); - encode_tag(e, f->number, kUpb_WireType_Delimited); - } while (ptr != start); - return; - } - case kUpb_FieldType_Group: { - const upb_TaggedMessagePtr* start = _upb_array_constptr(arr); - const upb_TaggedMessagePtr* ptr = start + arr->size; - const upb_MiniTable* subm = subs[f->UPB_PRIVATE(submsg_index)].submsg; - if (--e->depth == 0) encode_err(e, kUpb_EncodeStatus_MaxDepthExceeded); - do { - size_t size; - ptr--; - encode_tag(e, f->number, kUpb_WireType_EndGroup); - encode_TaggedMessagePtr(e, *ptr, subm, &size); - encode_tag(e, f->number, kUpb_WireType_StartGroup); - } while (ptr != start); - e->depth++; - return; - } - case kUpb_FieldType_Message: { - const upb_TaggedMessagePtr* start = _upb_array_constptr(arr); - const upb_TaggedMessagePtr* ptr = start + arr->size; - const upb_MiniTable* subm = subs[f->UPB_PRIVATE(submsg_index)].submsg; - if (--e->depth == 0) encode_err(e, kUpb_EncodeStatus_MaxDepthExceeded); - do { - size_t size; - ptr--; - encode_TaggedMessagePtr(e, *ptr, subm, &size); - encode_varint(e, size); - encode_tag(e, f->number, kUpb_WireType_Delimited); - } while (ptr != start); - e->depth++; - return; - } - } -#undef VARINT_CASE - - if (packed) { - encode_varint(e, e->limit - e->ptr - pre_len); - encode_tag(e, f->number, kUpb_WireType_Delimited); +static upb_Message* _upb_Decoder_ReuseSubMessage( + upb_Decoder* d, const upb_MiniTableSub* subs, + const upb_MiniTableField* field, upb_TaggedMessagePtr* target) { + upb_TaggedMessagePtr tagged = *target; + const upb_MiniTable* subl = subs[field->UPB_PRIVATE(submsg_index)].submsg; + UPB_ASSERT(subl); + if (!upb_TaggedMessagePtr_IsEmpty(tagged) || subl == &_kUpb_MiniTable_Empty) { + return _upb_TaggedMessagePtr_GetMessage(tagged); } -} -static void encode_mapentry(upb_encstate* e, uint32_t number, - const upb_MiniTable* layout, - const upb_MapEntry* ent) { - const upb_MiniTableField* key_field = &layout->fields[0]; - const upb_MiniTableField* val_field = &layout->fields[1]; - size_t pre_len = e->limit - e->ptr; + // We found an empty message from a previous parse that was performed before + // this field was linked. But it is linked now, so we want to allocate a new + // message of the correct type and promote data into it before continuing. + upb_Message* existing = _upb_TaggedMessagePtr_GetEmptyMessage(tagged); + upb_Message* promoted = _upb_Decoder_NewSubMessage(d, subs, field, target); size_t size; - encode_scalar(e, &ent->data.v, layout->subs, val_field); - encode_scalar(e, &ent->data.k, layout->subs, key_field); - size = (e->limit - e->ptr) - pre_len; - encode_varint(e, size); - encode_tag(e, number, kUpb_WireType_Delimited); + const char* unknown = upb_Message_GetUnknown(existing, &size); + upb_DecodeStatus status = upb_Decode(unknown, size, promoted, subl, d->extreg, + d->options, &d->arena); + if (status != kUpb_DecodeStatus_Ok) _upb_Decoder_ErrorJmp(d, status); + return promoted; } -static void encode_map(upb_encstate* e, const upb_Message* msg, - const upb_MiniTableSub* subs, - const upb_MiniTableField* f) { - const upb_Map* map = *UPB_PTR_AT(msg, f->offset, const upb_Map*); - const upb_MiniTable* layout = subs[f->UPB_PRIVATE(submsg_index)].submsg; - UPB_ASSERT(layout->field_count == 2); - - if (map == NULL) return; - - if (e->options & kUpb_EncodeOption_Deterministic) { - _upb_sortedmap sorted; - _upb_mapsorter_pushmap(&e->sorter, - layout->fields[0].UPB_PRIVATE(descriptortype), map, - &sorted); - upb_MapEntry ent; - while (_upb_sortedmap_next(&e->sorter, map, &sorted, &ent)) { - encode_mapentry(e, f->number, layout, &ent); - } - _upb_mapsorter_popmap(&e->sorter, &sorted); - } else { - intptr_t iter = UPB_STRTABLE_BEGIN; - upb_StringView key; - upb_value val; - while (upb_strtable_next2(&map->table, &key, &val, &iter)) { - upb_MapEntry ent; - _upb_map_fromkey(key, &ent.data.k, map->key_size); - _upb_map_fromvalue(val, &ent.data.v, map->val_size); - encode_mapentry(e, f->number, layout, &ent); - } - } +static const char* _upb_Decoder_ReadString(upb_Decoder* d, const char* ptr, + int size, upb_StringView* str) { + const char* str_ptr = ptr; + ptr = upb_EpsCopyInputStream_ReadString(&d->input, &str_ptr, size, &d->arena); + if (!ptr) _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); + str->data = str_ptr; + str->size = size; + return ptr; } -static bool encode_shouldencode(upb_encstate* e, const upb_Message* msg, - const upb_MiniTableSub* subs, - const upb_MiniTableField* f) { - if (f->presence == 0) { - /* Proto3 presence or map/array. */ - const void* mem = UPB_PTR_AT(msg, f->offset, void); - switch (_upb_MiniTableField_GetRep(f)) { - case kUpb_FieldRep_1Byte: { - char ch; - memcpy(&ch, mem, 1); - return ch != 0; - } - case kUpb_FieldRep_4Byte: { - uint32_t u32; - memcpy(&u32, mem, 4); - return u32 != 0; - } - case kUpb_FieldRep_8Byte: { - uint64_t u64; - memcpy(&u64, mem, 8); - return u64 != 0; - } - case kUpb_FieldRep_StringView: { - const upb_StringView* str = (const upb_StringView*)mem; - return str->size != 0; - } - default: - UPB_UNREACHABLE(); - } - } else if (f->presence > 0) { - /* Proto2 presence: hasbit. */ - return _upb_hasbit_field(msg, f); - } else { - /* Field is in a oneof. */ - return _upb_getoneofcase_field(msg, f) == f->number; +UPB_FORCEINLINE +static const char* _upb_Decoder_RecurseSubMessage(upb_Decoder* d, + const char* ptr, + upb_Message* submsg, + const upb_MiniTable* subl, + uint32_t expected_end_group) { + if (--d->depth < 0) { + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_MaxDepthExceeded); } -} - -static void encode_field(upb_encstate* e, const upb_Message* msg, - const upb_MiniTableSub* subs, - const upb_MiniTableField* field) { - switch (upb_FieldMode_Get(field)) { - case kUpb_FieldMode_Array: - encode_array(e, msg, subs, field); - break; - case kUpb_FieldMode_Map: - encode_map(e, msg, subs, field); - break; - case kUpb_FieldMode_Scalar: - encode_scalar(e, UPB_PTR_AT(msg, field->offset, void), subs, field); - break; - default: - UPB_UNREACHABLE(); + ptr = _upb_Decoder_DecodeMessage(d, ptr, submsg, subl); + d->depth++; + if (d->end_group != expected_end_group) { + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); } + return ptr; } -static void encode_msgset_item(upb_encstate* e, - const upb_Message_Extension* ext) { - size_t size; - encode_tag(e, kUpb_MsgSet_Item, kUpb_WireType_EndGroup); - encode_message(e, ext->data.ptr, ext->ext->sub.submsg, &size); - encode_varint(e, size); - encode_tag(e, kUpb_MsgSet_Message, kUpb_WireType_Delimited); - encode_varint(e, ext->ext->field.number); - encode_tag(e, kUpb_MsgSet_TypeId, kUpb_WireType_Varint); - encode_tag(e, kUpb_MsgSet_Item, kUpb_WireType_StartGroup); +UPB_FORCEINLINE +static const char* _upb_Decoder_DecodeSubMessage( + upb_Decoder* d, const char* ptr, upb_Message* submsg, + const upb_MiniTableSub* subs, const upb_MiniTableField* field, int size) { + int saved_delta = upb_EpsCopyInputStream_PushLimit(&d->input, ptr, size); + const upb_MiniTable* subl = subs[field->UPB_PRIVATE(submsg_index)].submsg; + UPB_ASSERT(subl); + ptr = _upb_Decoder_RecurseSubMessage(d, ptr, submsg, subl, DECODE_NOGROUP); + upb_EpsCopyInputStream_PopLimit(&d->input, ptr, saved_delta); + return ptr; } -static void encode_ext(upb_encstate* e, const upb_Message_Extension* ext, - bool is_message_set) { - if (UPB_UNLIKELY(is_message_set)) { - encode_msgset_item(e, ext); - } else { - encode_field(e, &ext->data, &ext->ext->sub, &ext->ext->field); +UPB_FORCEINLINE +static const char* _upb_Decoder_DecodeGroup(upb_Decoder* d, const char* ptr, + upb_Message* submsg, + const upb_MiniTable* subl, + uint32_t number) { + if (_upb_Decoder_IsDone(d, &ptr)) { + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); } + ptr = _upb_Decoder_RecurseSubMessage(d, ptr, submsg, subl, number); + d->end_group = DECODE_NOGROUP; + return ptr; } -static void encode_message(upb_encstate* e, const upb_Message* msg, - const upb_MiniTable* m, size_t* size) { - size_t pre_len = e->limit - e->ptr; +UPB_FORCEINLINE +static const char* _upb_Decoder_DecodeUnknownGroup(upb_Decoder* d, + const char* ptr, + uint32_t number) { + return _upb_Decoder_DecodeGroup(d, ptr, NULL, NULL, number); +} - if ((e->options & kUpb_EncodeOption_CheckRequired) && m->required_count) { - uint64_t msg_head; - memcpy(&msg_head, msg, 8); - msg_head = _upb_BigEndian_Swap64(msg_head); - if (upb_MiniTable_requiredmask(m) & ~msg_head) { - encode_err(e, kUpb_EncodeStatus_MissingRequired); - } - } - - if ((e->options & kUpb_EncodeOption_SkipUnknown) == 0) { - size_t unknown_size; - const char* unknown = upb_Message_GetUnknown(msg, &unknown_size); +UPB_FORCEINLINE +static const char* _upb_Decoder_DecodeKnownGroup( + upb_Decoder* d, const char* ptr, upb_Message* submsg, + const upb_MiniTableSub* subs, const upb_MiniTableField* field) { + const upb_MiniTable* subl = subs[field->UPB_PRIVATE(submsg_index)].submsg; + UPB_ASSERT(subl); + return _upb_Decoder_DecodeGroup(d, ptr, submsg, subl, field->number); +} - if (unknown) { - encode_bytes(e, unknown, unknown_size); - } - } +static char* upb_Decoder_EncodeVarint32(uint32_t val, char* ptr) { + do { + uint8_t byte = val & 0x7fU; + val >>= 7; + if (val) byte |= 0x80U; + *(ptr++) = byte; + } while (val); + return ptr; +} - if (m->ext != kUpb_ExtMode_NonExtendable) { - /* Encode all extensions together. Unlike C++, we do not attempt to keep - * these in field number order relative to normal fields or even to each - * other. */ - size_t ext_count; - const upb_Message_Extension* ext = _upb_Message_Getexts(msg, &ext_count); - if (ext_count) { - if (e->options & kUpb_EncodeOption_Deterministic) { - _upb_sortedmap sorted; - _upb_mapsorter_pushexts(&e->sorter, ext, ext_count, &sorted); - while (_upb_sortedmap_nextext(&e->sorter, &sorted, &ext)) { - encode_ext(e, ext, m->ext == kUpb_ExtMode_IsMessageSet); - } - _upb_mapsorter_popmap(&e->sorter, &sorted); - } else { - const upb_Message_Extension* end = ext + ext_count; - for (; ext != end; ext++) { - encode_ext(e, ext, m->ext == kUpb_ExtMode_IsMessageSet); - } - } - } - } +static void _upb_Decoder_AddUnknownVarints(upb_Decoder* d, upb_Message* msg, + uint32_t val1, uint32_t val2) { + char buf[20]; + char* end = buf; + end = upb_Decoder_EncodeVarint32(val1, end); + end = upb_Decoder_EncodeVarint32(val2, end); - if (m->field_count) { - const upb_MiniTableField* f = &m->fields[m->field_count]; - const upb_MiniTableField* first = &m->fields[0]; - while (f != first) { - f--; - if (encode_shouldencode(e, msg, m->subs, f)) { - encode_field(e, msg, m->subs, f); - } - } + if (!_upb_Message_AddUnknown(msg, buf, end - buf, &d->arena)) { + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); } - - *size = (e->limit - e->ptr) - pre_len; } -static upb_EncodeStatus upb_Encoder_Encode(upb_encstate* const encoder, - const void* const msg, - const upb_MiniTable* const l, - char** const buf, - size_t* const size) { - // Unfortunately we must continue to perform hackery here because there are - // code paths which blindly copy the returned pointer without bothering to - // check for errors until much later (b/235839510). So we still set *buf to - // NULL on error and we still set it to non-NULL on a successful empty result. - if (UPB_SETJMP(encoder->err) == 0) { - encode_message(encoder, msg, l, size); - *size = encoder->limit - encoder->ptr; - if (*size == 0) { - static char ch; - *buf = &ch; - } else { - UPB_ASSERT(encoder->ptr); - *buf = encoder->ptr; - } - } else { - UPB_ASSERT(encoder->status != kUpb_EncodeStatus_Ok); - *buf = NULL; - *size = 0; - } +UPB_NOINLINE +static bool _upb_Decoder_CheckEnumSlow(upb_Decoder* d, const char* ptr, + upb_Message* msg, + const upb_MiniTableEnum* e, + const upb_MiniTableField* field, + uint32_t v) { + if (_upb_MiniTable_CheckEnumValueSlow(e, v)) return true; - _upb_mapsorter_destroy(&encoder->sorter); - return encoder->status; + // Unrecognized enum goes into unknown fields. + // For packed fields the tag could be arbitrarily far in the past, so we + // just re-encode the tag and value here. + uint32_t tag = ((uint32_t)field->number << 3) | kUpb_WireType_Varint; + upb_Message* unknown_msg = + field->mode & kUpb_LabelFlags_IsExtension ? d->unknown_msg : msg; + _upb_Decoder_AddUnknownVarints(d, unknown_msg, tag, v); + return false; } -upb_EncodeStatus upb_Encode(const void* msg, const upb_MiniTable* l, - int options, upb_Arena* arena, char** buf, - size_t* size) { - upb_encstate e; - unsigned depth = (unsigned)options >> 16; - - e.status = kUpb_EncodeStatus_Ok; - e.arena = arena; - e.buf = NULL; - e.limit = NULL; - e.ptr = NULL; - e.depth = depth ? depth : kUpb_WireFormat_DefaultDepthLimit; - e.options = options; - _upb_mapsorter_init(&e.sorter); +UPB_FORCEINLINE +static bool _upb_Decoder_CheckEnum(upb_Decoder* d, const char* ptr, + upb_Message* msg, const upb_MiniTableEnum* e, + const upb_MiniTableField* field, + wireval* val) { + uint32_t v = val->uint32_val; - return upb_Encoder_Encode(&e, msg, l, buf, size); + _kUpb_FastEnumCheck_Status status = _upb_MiniTable_CheckEnumValueFast(e, v); + if (UPB_LIKELY(status == _kUpb_FastEnumCheck_ValueIsInEnum)) return true; + return _upb_Decoder_CheckEnumSlow(d, ptr, msg, e, field, v); } +UPB_NOINLINE +static const char* _upb_Decoder_DecodeEnumArray(upb_Decoder* d, const char* ptr, + upb_Message* msg, + upb_Array* arr, + const upb_MiniTableSub* subs, + const upb_MiniTableField* field, + wireval* val) { + const upb_MiniTableEnum* e = subs[field->UPB_PRIVATE(submsg_index)].subenum; + if (!_upb_Decoder_CheckEnum(d, ptr, msg, e, field, val)) return ptr; + void* mem = UPB_PTR_AT(_upb_array_ptr(arr), arr->size * 4, void); + arr->size++; + memcpy(mem, val, 4); + return ptr; +} +UPB_FORCEINLINE +static const char* _upb_Decoder_DecodeFixedPacked( + upb_Decoder* d, const char* ptr, upb_Array* arr, wireval* val, + const upb_MiniTableField* field, int lg2) { + int mask = (1 << lg2) - 1; + size_t count = val->size >> lg2; + if ((val->size & mask) != 0) { + // Length isn't a round multiple of elem size. + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); + } + _upb_Decoder_Reserve(d, arr, count); + void* mem = UPB_PTR_AT(_upb_array_ptr(arr), arr->size << lg2, void); + arr->size += count; + // Note: if/when the decoder supports multi-buffer input, we will need to + // handle buffer seams here. + if (_upb_IsLittleEndian()) { + ptr = upb_EpsCopyInputStream_Copy(&d->input, ptr, mem, val->size); + } else { + int delta = upb_EpsCopyInputStream_PushLimit(&d->input, ptr, val->size); + char* dst = mem; + while (!_upb_Decoder_IsDone(d, &ptr)) { + if (lg2 == 2) { + ptr = upb_WireReader_ReadFixed32(ptr, dst); + dst += 4; + } else { + UPB_ASSERT(lg2 == 3); + ptr = upb_WireReader_ReadFixed64(ptr, dst); + dst += 8; + } + } + upb_EpsCopyInputStream_PopLimit(&d->input, ptr, delta); + } -// Must be last. + return ptr; +} -UPB_NOINLINE _upb_WireReader_ReadLongVarintRet -_upb_WireReader_ReadLongVarint(const char* ptr, uint64_t val) { - _upb_WireReader_ReadLongVarintRet ret = {NULL, 0}; - uint64_t byte; - int i; - for (i = 1; i < 10; i++) { - byte = (uint8_t)ptr[i]; - val += (byte - 1) << (i * 7); - if (!(byte & 0x80)) { - ret.ptr = ptr + i + 1; - ret.val = val; - return ret; +UPB_FORCEINLINE +static const char* _upb_Decoder_DecodeVarintPacked( + upb_Decoder* d, const char* ptr, upb_Array* arr, wireval* val, + const upb_MiniTableField* field, int lg2) { + int scale = 1 << lg2; + int saved_limit = upb_EpsCopyInputStream_PushLimit(&d->input, ptr, val->size); + char* out = UPB_PTR_AT(_upb_array_ptr(arr), arr->size << lg2, void); + while (!_upb_Decoder_IsDone(d, &ptr)) { + wireval elem; + ptr = _upb_Decoder_DecodeVarint(d, ptr, &elem.uint64_val); + _upb_Decoder_Munge(field->UPB_PRIVATE(descriptortype), &elem); + if (_upb_Decoder_Reserve(d, arr, 1)) { + out = UPB_PTR_AT(_upb_array_ptr(arr), arr->size << lg2, void); } + arr->size++; + memcpy(out, &elem, scale); + out += scale; } - return ret; + upb_EpsCopyInputStream_PopLimit(&d->input, ptr, saved_limit); + return ptr; } -const char* _upb_WireReader_SkipGroup(const char* ptr, uint32_t tag, - int depth_limit, - upb_EpsCopyInputStream* stream) { - if (--depth_limit == 0) return NULL; - uint32_t end_group_tag = (tag & ~7ULL) | kUpb_WireType_EndGroup; - while (!upb_EpsCopyInputStream_IsDone(stream, &ptr)) { - uint32_t tag; - ptr = upb_WireReader_ReadTag(ptr, &tag); - if (!ptr) return NULL; - if (tag == end_group_tag) return ptr; - ptr = _upb_WireReader_SkipValue(ptr, tag, depth_limit, stream); - if (!ptr) return NULL; +UPB_NOINLINE +static const char* _upb_Decoder_DecodeEnumPacked( + upb_Decoder* d, const char* ptr, upb_Message* msg, upb_Array* arr, + const upb_MiniTableSub* subs, const upb_MiniTableField* field, + wireval* val) { + const upb_MiniTableEnum* e = subs[field->UPB_PRIVATE(submsg_index)].subenum; + int saved_limit = upb_EpsCopyInputStream_PushLimit(&d->input, ptr, val->size); + char* out = UPB_PTR_AT(_upb_array_ptr(arr), arr->size * 4, void); + while (!_upb_Decoder_IsDone(d, &ptr)) { + wireval elem; + ptr = _upb_Decoder_DecodeVarint(d, ptr, &elem.uint64_val); + _upb_Decoder_MungeInt32(&elem); + if (!_upb_Decoder_CheckEnum(d, ptr, msg, e, field, &elem)) { + continue; + } + if (_upb_Decoder_Reserve(d, arr, 1)) { + out = UPB_PTR_AT(_upb_array_ptr(arr), arr->size * 4, void); + } + arr->size++; + memcpy(out, &elem, 4); + out += 4; } + upb_EpsCopyInputStream_PopLimit(&d->input, ptr, saved_limit); return ptr; } +upb_Array* _upb_Decoder_CreateArray(upb_Decoder* d, + const upb_MiniTableField* field) { + /* Maps descriptor type -> elem_size_lg2. */ + static const uint8_t kElemSizeLg2[] = { + [0] = -1, // invalid descriptor type + [kUpb_FieldType_Double] = 3, + [kUpb_FieldType_Float] = 2, + [kUpb_FieldType_Int64] = 3, + [kUpb_FieldType_UInt64] = 3, + [kUpb_FieldType_Int32] = 2, + [kUpb_FieldType_Fixed64] = 3, + [kUpb_FieldType_Fixed32] = 2, + [kUpb_FieldType_Bool] = 0, + [kUpb_FieldType_String] = UPB_SIZE(3, 4), + [kUpb_FieldType_Group] = UPB_SIZE(2, 3), + [kUpb_FieldType_Message] = UPB_SIZE(2, 3), + [kUpb_FieldType_Bytes] = UPB_SIZE(3, 4), + [kUpb_FieldType_UInt32] = 2, + [kUpb_FieldType_Enum] = 2, + [kUpb_FieldType_SFixed32] = 2, + [kUpb_FieldType_SFixed64] = 3, + [kUpb_FieldType_SInt32] = 2, + [kUpb_FieldType_SInt64] = 3, + }; - -// Must be last. - -typedef struct { - uint64_t present_values_mask; - uint32_t last_written_value; -} upb_MtDataEncoderInternal_EnumState; - -typedef struct { - uint64_t msg_modifiers; - uint32_t last_field_num; - enum { - kUpb_OneofState_NotStarted, - kUpb_OneofState_StartedOneof, - kUpb_OneofState_EmittedOneofField, - } oneof_state; -} upb_MtDataEncoderInternal_MsgState; - -typedef struct { - char* buf_start; // Only for checking kUpb_MtDataEncoder_MinSize. - union { - upb_MtDataEncoderInternal_EnumState enum_state; - upb_MtDataEncoderInternal_MsgState msg_state; - } state; -} upb_MtDataEncoderInternal; - -static upb_MtDataEncoderInternal* upb_MtDataEncoder_GetInternal( - upb_MtDataEncoder* e, char* buf_start) { - UPB_ASSERT(sizeof(upb_MtDataEncoderInternal) <= sizeof(e->internal)); - upb_MtDataEncoderInternal* ret = (upb_MtDataEncoderInternal*)e->internal; - ret->buf_start = buf_start; + size_t lg2 = kElemSizeLg2[field->UPB_PRIVATE(descriptortype)]; + upb_Array* ret = _upb_Array_New(&d->arena, 4, lg2); + if (!ret) _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); return ret; } -static char* upb_MtDataEncoder_PutRaw(upb_MtDataEncoder* e, char* ptr, - char ch) { - upb_MtDataEncoderInternal* in = (upb_MtDataEncoderInternal*)e->internal; - UPB_ASSERT(ptr - in->buf_start < kUpb_MtDataEncoder_MinSize); - if (ptr == e->end) return NULL; - *ptr++ = ch; - return ptr; -} - -static char* upb_MtDataEncoder_Put(upb_MtDataEncoder* e, char* ptr, char ch) { - return upb_MtDataEncoder_PutRaw(e, ptr, _upb_ToBase92(ch)); -} +static const char* _upb_Decoder_DecodeToArray(upb_Decoder* d, const char* ptr, + upb_Message* msg, + const upb_MiniTableSub* subs, + const upb_MiniTableField* field, + wireval* val, int op) { + upb_Array** arrp = UPB_PTR_AT(msg, field->offset, void); + upb_Array* arr = *arrp; + void* mem; -static char* upb_MtDataEncoder_PutBase92Varint(upb_MtDataEncoder* e, char* ptr, - uint32_t val, int min, int max) { - int shift = upb_Log2Ceiling(_upb_FromBase92(max) - _upb_FromBase92(min) + 1); - UPB_ASSERT(shift <= 6); - uint32_t mask = (1 << shift) - 1; - do { - uint32_t bits = val & mask; - ptr = upb_MtDataEncoder_Put(e, ptr, bits + _upb_FromBase92(min)); - if (!ptr) return NULL; - val >>= shift; - } while (val); - return ptr; -} + if (arr) { + _upb_Decoder_Reserve(d, arr, 1); + } else { + arr = _upb_Decoder_CreateArray(d, field); + *arrp = arr; + } -char* upb_MtDataEncoder_PutModifier(upb_MtDataEncoder* e, char* ptr, - uint64_t mod) { - if (mod) { - ptr = upb_MtDataEncoder_PutBase92Varint(e, ptr, mod, - kUpb_EncodedValue_MinModifier, - kUpb_EncodedValue_MaxModifier); + switch (op) { + case kUpb_DecodeOp_Scalar1Byte: + case kUpb_DecodeOp_Scalar4Byte: + case kUpb_DecodeOp_Scalar8Byte: + /* Append scalar value. */ + mem = UPB_PTR_AT(_upb_array_ptr(arr), arr->size << op, void); + arr->size++; + memcpy(mem, val, 1 << op); + return ptr; + case kUpb_DecodeOp_String: + _upb_Decoder_VerifyUtf8(d, ptr, val->size); + /* Fallthrough. */ + case kUpb_DecodeOp_Bytes: { + /* Append bytes. */ + upb_StringView* str = (upb_StringView*)_upb_array_ptr(arr) + arr->size; + arr->size++; + return _upb_Decoder_ReadString(d, ptr, val->size, str); + } + case kUpb_DecodeOp_SubMessage: { + /* Append submessage / group. */ + upb_TaggedMessagePtr* target = UPB_PTR_AT( + _upb_array_ptr(arr), arr->size * sizeof(void*), upb_TaggedMessagePtr); + upb_Message* submsg = _upb_Decoder_NewSubMessage(d, subs, field, target); + arr->size++; + if (UPB_UNLIKELY(field->UPB_PRIVATE(descriptortype) == + kUpb_FieldType_Group)) { + return _upb_Decoder_DecodeKnownGroup(d, ptr, submsg, subs, field); + } else { + return _upb_Decoder_DecodeSubMessage(d, ptr, submsg, subs, field, + val->size); + } + } + case OP_FIXPCK_LG2(2): + case OP_FIXPCK_LG2(3): + return _upb_Decoder_DecodeFixedPacked(d, ptr, arr, val, field, + op - OP_FIXPCK_LG2(0)); + case OP_VARPCK_LG2(0): + case OP_VARPCK_LG2(2): + case OP_VARPCK_LG2(3): + return _upb_Decoder_DecodeVarintPacked(d, ptr, arr, val, field, + op - OP_VARPCK_LG2(0)); + case kUpb_DecodeOp_Enum: + return _upb_Decoder_DecodeEnumArray(d, ptr, msg, arr, subs, field, val); + case kUpb_DecodeOp_PackedEnum: + return _upb_Decoder_DecodeEnumPacked(d, ptr, msg, arr, subs, field, val); + default: + UPB_UNREACHABLE(); } - return ptr; } -char* upb_MtDataEncoder_EncodeExtension(upb_MtDataEncoder* e, char* ptr, - upb_FieldType type, uint32_t field_num, - uint64_t field_mod) { - upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr); - in->state.msg_state.msg_modifiers = 0; - in->state.msg_state.last_field_num = 0; - in->state.msg_state.oneof_state = kUpb_OneofState_NotStarted; - - ptr = upb_MtDataEncoder_PutRaw(e, ptr, kUpb_EncodedVersion_ExtensionV1); - if (!ptr) return NULL; +upb_Map* _upb_Decoder_CreateMap(upb_Decoder* d, const upb_MiniTable* entry) { + /* Maps descriptor type -> upb map size. */ + static const uint8_t kSizeInMap[] = { + [0] = -1, // invalid descriptor type */ + [kUpb_FieldType_Double] = 8, + [kUpb_FieldType_Float] = 4, + [kUpb_FieldType_Int64] = 8, + [kUpb_FieldType_UInt64] = 8, + [kUpb_FieldType_Int32] = 4, + [kUpb_FieldType_Fixed64] = 8, + [kUpb_FieldType_Fixed32] = 4, + [kUpb_FieldType_Bool] = 1, + [kUpb_FieldType_String] = UPB_MAPTYPE_STRING, + [kUpb_FieldType_Group] = sizeof(void*), + [kUpb_FieldType_Message] = sizeof(void*), + [kUpb_FieldType_Bytes] = UPB_MAPTYPE_STRING, + [kUpb_FieldType_UInt32] = 4, + [kUpb_FieldType_Enum] = 4, + [kUpb_FieldType_SFixed32] = 4, + [kUpb_FieldType_SFixed64] = 8, + [kUpb_FieldType_SInt32] = 4, + [kUpb_FieldType_SInt64] = 8, + }; - return upb_MtDataEncoder_PutField(e, ptr, type, field_num, field_mod); + const upb_MiniTableField* key_field = &entry->fields[0]; + const upb_MiniTableField* val_field = &entry->fields[1]; + char key_size = kSizeInMap[key_field->UPB_PRIVATE(descriptortype)]; + char val_size = kSizeInMap[val_field->UPB_PRIVATE(descriptortype)]; + UPB_ASSERT(key_field->offset == offsetof(upb_MapEntryData, k)); + UPB_ASSERT(val_field->offset == offsetof(upb_MapEntryData, v)); + upb_Map* ret = _upb_Map_New(&d->arena, key_size, val_size); + if (!ret) _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); + return ret; } -char* upb_MtDataEncoder_EncodeMap(upb_MtDataEncoder* e, char* ptr, - upb_FieldType key_type, - upb_FieldType value_type, uint64_t key_mod, - uint64_t value_mod) { - upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr); - in->state.msg_state.msg_modifiers = 0; - in->state.msg_state.last_field_num = 0; - in->state.msg_state.oneof_state = kUpb_OneofState_NotStarted; - - ptr = upb_MtDataEncoder_PutRaw(e, ptr, kUpb_EncodedVersion_MapV1); - if (!ptr) return NULL; - - ptr = upb_MtDataEncoder_PutField(e, ptr, key_type, 1, key_mod); - if (!ptr) return NULL; - - return upb_MtDataEncoder_PutField(e, ptr, value_type, 2, value_mod); -} +static const char* _upb_Decoder_DecodeToMap(upb_Decoder* d, const char* ptr, + upb_Message* msg, + const upb_MiniTableSub* subs, + const upb_MiniTableField* field, + wireval* val) { + upb_Map** map_p = UPB_PTR_AT(msg, field->offset, upb_Map*); + upb_Map* map = *map_p; + upb_MapEntry ent; + UPB_ASSERT(upb_MiniTableField_Type(field) == kUpb_FieldType_Message); + const upb_MiniTable* entry = subs[field->UPB_PRIVATE(submsg_index)].submsg; -char* upb_MtDataEncoder_EncodeMessageSet(upb_MtDataEncoder* e, char* ptr) { - (void)upb_MtDataEncoder_GetInternal(e, ptr); - return upb_MtDataEncoder_PutRaw(e, ptr, kUpb_EncodedVersion_MessageSetV1); -} + UPB_ASSERT(entry); + UPB_ASSERT(entry->field_count == 2); + UPB_ASSERT(!upb_IsRepeatedOrMap(&entry->fields[0])); + UPB_ASSERT(!upb_IsRepeatedOrMap(&entry->fields[1])); -char* upb_MtDataEncoder_StartMessage(upb_MtDataEncoder* e, char* ptr, - uint64_t msg_mod) { - upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr); - in->state.msg_state.msg_modifiers = msg_mod; - in->state.msg_state.last_field_num = 0; - in->state.msg_state.oneof_state = kUpb_OneofState_NotStarted; + if (!map) { + map = _upb_Decoder_CreateMap(d, entry); + *map_p = map; + } - ptr = upb_MtDataEncoder_PutRaw(e, ptr, kUpb_EncodedVersion_MessageV1); - if (!ptr) return NULL; + // Parse map entry. + memset(&ent, 0, sizeof(ent)); - return upb_MtDataEncoder_PutModifier(e, ptr, msg_mod); -} + if (entry->fields[1].UPB_PRIVATE(descriptortype) == kUpb_FieldType_Message || + entry->fields[1].UPB_PRIVATE(descriptortype) == kUpb_FieldType_Group) { + // Create proactively to handle the case where it doesn't appear. + upb_TaggedMessagePtr msg; + _upb_Decoder_NewSubMessage(d, entry->subs, &entry->fields[1], &msg); + ent.data.v.val = upb_value_uintptr(msg); + } -static char* _upb_MtDataEncoder_MaybePutFieldSkip(upb_MtDataEncoder* e, - char* ptr, - uint32_t field_num) { - upb_MtDataEncoderInternal* in = (upb_MtDataEncoderInternal*)e->internal; - if (field_num <= in->state.msg_state.last_field_num) return NULL; - if (in->state.msg_state.last_field_num + 1 != field_num) { - // Put skip. - UPB_ASSERT(field_num > in->state.msg_state.last_field_num); - uint32_t skip = field_num - in->state.msg_state.last_field_num; - ptr = upb_MtDataEncoder_PutBase92Varint( - e, ptr, skip, kUpb_EncodedValue_MinSkip, kUpb_EncodedValue_MaxSkip); - if (!ptr) return NULL; + ptr = + _upb_Decoder_DecodeSubMessage(d, ptr, &ent.data, subs, field, val->size); + // check if ent had any unknown fields + size_t size; + upb_Message_GetUnknown(&ent.data, &size); + if (size != 0) { + char* buf; + size_t size; + uint32_t tag = ((uint32_t)field->number << 3) | kUpb_WireType_Delimited; + upb_EncodeStatus status = + upb_Encode(&ent.data, entry, 0, &d->arena, &buf, &size); + if (status != kUpb_EncodeStatus_Ok) { + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); + } + _upb_Decoder_AddUnknownVarints(d, msg, tag, size); + if (!_upb_Message_AddUnknown(msg, buf, size, &d->arena)) { + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); + } + } else { + if (_upb_Map_Insert(map, &ent.data.k, map->key_size, &ent.data.v, + map->val_size, + &d->arena) == kUpb_MapInsertStatus_OutOfMemory) { + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); + } } - in->state.msg_state.last_field_num = field_num; return ptr; } -static char* _upb_MtDataEncoder_PutFieldType(upb_MtDataEncoder* e, char* ptr, - upb_FieldType type, - uint64_t field_mod) { - static const char kUpb_TypeToEncoded[] = { - [kUpb_FieldType_Double] = kUpb_EncodedType_Double, - [kUpb_FieldType_Float] = kUpb_EncodedType_Float, - [kUpb_FieldType_Int64] = kUpb_EncodedType_Int64, - [kUpb_FieldType_UInt64] = kUpb_EncodedType_UInt64, - [kUpb_FieldType_Int32] = kUpb_EncodedType_Int32, - [kUpb_FieldType_Fixed64] = kUpb_EncodedType_Fixed64, - [kUpb_FieldType_Fixed32] = kUpb_EncodedType_Fixed32, - [kUpb_FieldType_Bool] = kUpb_EncodedType_Bool, - [kUpb_FieldType_String] = kUpb_EncodedType_String, - [kUpb_FieldType_Group] = kUpb_EncodedType_Group, - [kUpb_FieldType_Message] = kUpb_EncodedType_Message, - [kUpb_FieldType_Bytes] = kUpb_EncodedType_Bytes, - [kUpb_FieldType_UInt32] = kUpb_EncodedType_UInt32, - [kUpb_FieldType_Enum] = kUpb_EncodedType_OpenEnum, - [kUpb_FieldType_SFixed32] = kUpb_EncodedType_SFixed32, - [kUpb_FieldType_SFixed64] = kUpb_EncodedType_SFixed64, - [kUpb_FieldType_SInt32] = kUpb_EncodedType_SInt32, - [kUpb_FieldType_SInt64] = kUpb_EncodedType_SInt64, - }; +static const char* _upb_Decoder_DecodeToSubMessage( + upb_Decoder* d, const char* ptr, upb_Message* msg, + const upb_MiniTableSub* subs, const upb_MiniTableField* field, wireval* val, + int op) { + void* mem = UPB_PTR_AT(msg, field->offset, void); + int type = field->UPB_PRIVATE(descriptortype); - int encoded_type = kUpb_TypeToEncoded[type]; + if (UPB_UNLIKELY(op == kUpb_DecodeOp_Enum) && + !_upb_Decoder_CheckEnum(d, ptr, msg, + subs[field->UPB_PRIVATE(submsg_index)].subenum, + field, val)) { + return ptr; + } - if (field_mod & kUpb_FieldModifier_IsClosedEnum) { - UPB_ASSERT(type == kUpb_FieldType_Enum); - encoded_type = kUpb_EncodedType_ClosedEnum; + /* Set presence if necessary. */ + if (field->presence > 0) { + _upb_sethas_field(msg, field); + } else if (field->presence < 0) { + /* Oneof case */ + uint32_t* oneof_case = _upb_oneofcase_field(msg, field); + if (op == kUpb_DecodeOp_SubMessage && *oneof_case != field->number) { + memset(mem, 0, sizeof(void*)); + } + *oneof_case = field->number; } - if (field_mod & kUpb_FieldModifier_IsRepeated) { - // Repeated fields shift the type number up (unlike other modifiers which - // are bit flags). - encoded_type += kUpb_EncodedType_RepeatedBase; + /* Store into message. */ + switch (op) { + case kUpb_DecodeOp_SubMessage: { + upb_TaggedMessagePtr* submsgp = mem; + upb_Message* submsg; + if (*submsgp) { + submsg = _upb_Decoder_ReuseSubMessage(d, subs, field, submsgp); + } else { + submsg = _upb_Decoder_NewSubMessage(d, subs, field, submsgp); + } + if (UPB_UNLIKELY(type == kUpb_FieldType_Group)) { + ptr = _upb_Decoder_DecodeKnownGroup(d, ptr, submsg, subs, field); + } else { + ptr = _upb_Decoder_DecodeSubMessage(d, ptr, submsg, subs, field, + val->size); + } + break; + } + case kUpb_DecodeOp_String: + _upb_Decoder_VerifyUtf8(d, ptr, val->size); + /* Fallthrough. */ + case kUpb_DecodeOp_Bytes: + return _upb_Decoder_ReadString(d, ptr, val->size, mem); + case kUpb_DecodeOp_Scalar8Byte: + memcpy(mem, val, 8); + break; + case kUpb_DecodeOp_Enum: + case kUpb_DecodeOp_Scalar4Byte: + memcpy(mem, val, 4); + break; + case kUpb_DecodeOp_Scalar1Byte: + memcpy(mem, val, 1); + break; + default: + UPB_UNREACHABLE(); } - return upb_MtDataEncoder_Put(e, ptr, encoded_type); + return ptr; } -static char* _upb_MtDataEncoder_MaybePutModifiers(upb_MtDataEncoder* e, - char* ptr, upb_FieldType type, - uint64_t field_mod) { - upb_MtDataEncoderInternal* in = (upb_MtDataEncoderInternal*)e->internal; - uint32_t encoded_modifiers = 0; - if ((field_mod & kUpb_FieldModifier_IsRepeated) && - upb_FieldType_IsPackable(type)) { - bool field_is_packed = field_mod & kUpb_FieldModifier_IsPacked; - bool default_is_packed = in->state.msg_state.msg_modifiers & - kUpb_MessageModifier_DefaultIsPacked; - if (field_is_packed != default_is_packed) { - encoded_modifiers |= kUpb_EncodedFieldModifier_FlipPacked; - } +UPB_NOINLINE +const char* _upb_Decoder_CheckRequired(upb_Decoder* d, const char* ptr, + const upb_Message* msg, + const upb_MiniTable* l) { + assert(l->required_count); + if (UPB_LIKELY((d->options & kUpb_DecodeOption_CheckRequired) == 0)) { + return ptr; } - - if (field_mod & kUpb_FieldModifier_IsProto3Singular) { - encoded_modifiers |= kUpb_EncodedFieldModifier_IsProto3Singular; + uint64_t msg_head; + memcpy(&msg_head, msg, 8); + msg_head = _upb_BigEndian_Swap64(msg_head); + if (upb_MiniTable_requiredmask(l) & ~msg_head) { + d->missing_required = true; } + return ptr; +} - if (field_mod & kUpb_FieldModifier_IsRequired) { - encoded_modifiers |= kUpb_EncodedFieldModifier_IsRequired; +UPB_FORCEINLINE +static bool _upb_Decoder_TryFastDispatch(upb_Decoder* d, const char** ptr, + upb_Message* msg, + const upb_MiniTable* layout) { +#if UPB_FASTTABLE + if (layout && layout->table_mask != (unsigned char)-1) { + uint16_t tag = _upb_FastDecoder_LoadTag(*ptr); + intptr_t table = decode_totable(layout); + *ptr = _upb_FastDecoder_TagDispatch(d, *ptr, msg, table, 0, tag); + return true; } +#endif + return false; +} - return upb_MtDataEncoder_PutModifier(e, ptr, encoded_modifiers); +static const char* upb_Decoder_SkipField(upb_Decoder* d, const char* ptr, + uint32_t tag) { + int field_number = tag >> 3; + int wire_type = tag & 7; + switch (wire_type) { + case kUpb_WireType_Varint: { + uint64_t val; + return _upb_Decoder_DecodeVarint(d, ptr, &val); + } + case kUpb_WireType_64Bit: + return ptr + 8; + case kUpb_WireType_32Bit: + return ptr + 4; + case kUpb_WireType_Delimited: { + uint32_t size; + ptr = upb_Decoder_DecodeSize(d, ptr, &size); + return ptr + size; + } + case kUpb_WireType_StartGroup: + return _upb_Decoder_DecodeUnknownGroup(d, ptr, field_number); + default: + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); + } } -char* upb_MtDataEncoder_PutField(upb_MtDataEncoder* e, char* ptr, - upb_FieldType type, uint32_t field_num, - uint64_t field_mod) { - upb_MtDataEncoder_GetInternal(e, ptr); +enum { + kStartItemTag = ((kUpb_MsgSet_Item << 3) | kUpb_WireType_StartGroup), + kEndItemTag = ((kUpb_MsgSet_Item << 3) | kUpb_WireType_EndGroup), + kTypeIdTag = ((kUpb_MsgSet_TypeId << 3) | kUpb_WireType_Varint), + kMessageTag = ((kUpb_MsgSet_Message << 3) | kUpb_WireType_Delimited), +}; - ptr = _upb_MtDataEncoder_MaybePutFieldSkip(e, ptr, field_num); - if (!ptr) return NULL; +static void upb_Decoder_AddKnownMessageSetItem( + upb_Decoder* d, upb_Message* msg, const upb_MiniTableExtension* item_mt, + const char* data, uint32_t size) { + upb_Message_Extension* ext = + _upb_Message_GetOrCreateExtension(msg, item_mt, &d->arena); + if (UPB_UNLIKELY(!ext)) { + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); + } + upb_Message* submsg = _upb_Decoder_NewSubMessage( + d, &ext->ext->sub, &ext->ext->field, (upb_TaggedMessagePtr*)&ext->data); + upb_DecodeStatus status = upb_Decode(data, size, submsg, item_mt->sub.submsg, + d->extreg, d->options, &d->arena); + if (status != kUpb_DecodeStatus_Ok) _upb_Decoder_ErrorJmp(d, status); +} - ptr = _upb_MtDataEncoder_PutFieldType(e, ptr, type, field_mod); - if (!ptr) return NULL; +static void upb_Decoder_AddUnknownMessageSetItem(upb_Decoder* d, + upb_Message* msg, + uint32_t type_id, + const char* message_data, + uint32_t message_size) { + char buf[60]; + char* ptr = buf; + ptr = upb_Decoder_EncodeVarint32(kStartItemTag, ptr); + ptr = upb_Decoder_EncodeVarint32(kTypeIdTag, ptr); + ptr = upb_Decoder_EncodeVarint32(type_id, ptr); + ptr = upb_Decoder_EncodeVarint32(kMessageTag, ptr); + ptr = upb_Decoder_EncodeVarint32(message_size, ptr); + char* split = ptr; - return _upb_MtDataEncoder_MaybePutModifiers(e, ptr, type, field_mod); + ptr = upb_Decoder_EncodeVarint32(kEndItemTag, ptr); + char* end = ptr; + + if (!_upb_Message_AddUnknown(msg, buf, split - buf, &d->arena) || + !_upb_Message_AddUnknown(msg, message_data, message_size, &d->arena) || + !_upb_Message_AddUnknown(msg, split, end - split, &d->arena)) { + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); + } } -char* upb_MtDataEncoder_StartOneof(upb_MtDataEncoder* e, char* ptr) { - upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr); - if (in->state.msg_state.oneof_state == kUpb_OneofState_NotStarted) { - ptr = upb_MtDataEncoder_Put(e, ptr, _upb_FromBase92(kUpb_EncodedValue_End)); +static void upb_Decoder_AddMessageSetItem(upb_Decoder* d, upb_Message* msg, + const upb_MiniTable* t, + uint32_t type_id, const char* data, + uint32_t size) { + const upb_MiniTableExtension* item_mt = + upb_ExtensionRegistry_Lookup(d->extreg, t, type_id); + if (item_mt) { + upb_Decoder_AddKnownMessageSetItem(d, msg, item_mt, data, size); } else { - ptr = upb_MtDataEncoder_Put( - e, ptr, _upb_FromBase92(kUpb_EncodedValue_OneofSeparator)); + upb_Decoder_AddUnknownMessageSetItem(d, msg, type_id, data, size); } - in->state.msg_state.oneof_state = kUpb_OneofState_StartedOneof; - return ptr; } -char* upb_MtDataEncoder_PutOneofField(upb_MtDataEncoder* e, char* ptr, - uint32_t field_num) { - upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr); - if (in->state.msg_state.oneof_state == kUpb_OneofState_EmittedOneofField) { - ptr = upb_MtDataEncoder_Put( - e, ptr, _upb_FromBase92(kUpb_EncodedValue_FieldSeparator)); - if (!ptr) return NULL; +static const char* upb_Decoder_DecodeMessageSetItem( + upb_Decoder* d, const char* ptr, upb_Message* msg, + const upb_MiniTable* layout) { + uint32_t type_id = 0; + upb_StringView preserved = {NULL, 0}; + typedef enum { + kUpb_HaveId = 1 << 0, + kUpb_HavePayload = 1 << 1, + } StateMask; + StateMask state_mask = 0; + while (!_upb_Decoder_IsDone(d, &ptr)) { + uint32_t tag; + ptr = _upb_Decoder_DecodeTag(d, ptr, &tag); + switch (tag) { + case kEndItemTag: + return ptr; + case kTypeIdTag: { + uint64_t tmp; + ptr = _upb_Decoder_DecodeVarint(d, ptr, &tmp); + if (state_mask & kUpb_HaveId) break; // Ignore dup. + state_mask |= kUpb_HaveId; + type_id = tmp; + if (state_mask & kUpb_HavePayload) { + upb_Decoder_AddMessageSetItem(d, msg, layout, type_id, preserved.data, + preserved.size); + } + break; + } + case kMessageTag: { + uint32_t size; + ptr = upb_Decoder_DecodeSize(d, ptr, &size); + const char* data = ptr; + ptr += size; + if (state_mask & kUpb_HavePayload) break; // Ignore dup. + state_mask |= kUpb_HavePayload; + if (state_mask & kUpb_HaveId) { + upb_Decoder_AddMessageSetItem(d, msg, layout, type_id, data, size); + } else { + // Out of order, we must preserve the payload. + preserved.data = data; + preserved.size = size; + } + break; + } + default: + // We do not preserve unexpected fields inside a message set item. + ptr = upb_Decoder_SkipField(d, ptr, tag); + break; + } } - ptr = upb_MtDataEncoder_PutBase92Varint(e, ptr, field_num, _upb_ToBase92(0), - _upb_ToBase92(63)); - in->state.msg_state.oneof_state = kUpb_OneofState_EmittedOneofField; - return ptr; + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); } -char* upb_MtDataEncoder_StartEnum(upb_MtDataEncoder* e, char* ptr) { - upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr); - in->state.enum_state.present_values_mask = 0; - in->state.enum_state.last_written_value = 0; +static const upb_MiniTableField* _upb_Decoder_FindField(upb_Decoder* d, + const upb_MiniTable* t, + uint32_t field_number, + int* last_field_index) { + static upb_MiniTableField none = { + 0, 0, 0, 0, kUpb_FakeFieldType_FieldNotFound, 0}; + if (t == NULL) return &none; - return upb_MtDataEncoder_PutRaw(e, ptr, kUpb_EncodedVersion_EnumV1); -} + size_t idx = ((size_t)field_number) - 1; // 0 wraps to SIZE_MAX + if (idx < t->dense_below) { + /* Fastest case: index into dense fields. */ + goto found; + } -static char* upb_MtDataEncoder_FlushDenseEnumMask(upb_MtDataEncoder* e, - char* ptr) { - upb_MtDataEncoderInternal* in = (upb_MtDataEncoderInternal*)e->internal; - ptr = upb_MtDataEncoder_Put(e, ptr, in->state.enum_state.present_values_mask); - in->state.enum_state.present_values_mask = 0; - in->state.enum_state.last_written_value += 5; - return ptr; -} + if (t->dense_below < t->field_count) { + /* Linear search non-dense fields. Resume scanning from last_field_index + * since fields are usually in order. */ + size_t last = *last_field_index; + for (idx = last; idx < t->field_count; idx++) { + if (t->fields[idx].number == field_number) { + goto found; + } + } -char* upb_MtDataEncoder_PutEnumValue(upb_MtDataEncoder* e, char* ptr, - uint32_t val) { - // TODO(b/229641772): optimize this encoding. - upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr); - UPB_ASSERT(val >= in->state.enum_state.last_written_value); - uint32_t delta = val - in->state.enum_state.last_written_value; - if (delta >= 5 && in->state.enum_state.present_values_mask) { - ptr = upb_MtDataEncoder_FlushDenseEnumMask(e, ptr); - if (!ptr) { - return NULL; + for (idx = t->dense_below; idx < last; idx++) { + if (t->fields[idx].number == field_number) { + goto found; + } } - delta -= 5; } - if (delta >= 5) { - ptr = upb_MtDataEncoder_PutBase92Varint( - e, ptr, delta, kUpb_EncodedValue_MinSkip, kUpb_EncodedValue_MaxSkip); - in->state.enum_state.last_written_value += delta; - delta = 0; + if (d->extreg) { + switch (t->ext) { + case kUpb_ExtMode_Extendable: { + const upb_MiniTableExtension* ext = + upb_ExtensionRegistry_Lookup(d->extreg, t, field_number); + if (ext) return &ext->field; + break; + } + case kUpb_ExtMode_IsMessageSet: + if (field_number == kUpb_MsgSet_Item) { + static upb_MiniTableField item = { + 0, 0, 0, 0, kUpb_FakeFieldType_MessageSetItem, 0}; + return &item; + } + break; + } } - UPB_ASSERT((in->state.enum_state.present_values_mask >> delta) == 0); - in->state.enum_state.present_values_mask |= 1ULL << delta; - return ptr; -} + return &none; /* Unknown field. */ -char* upb_MtDataEncoder_EndEnum(upb_MtDataEncoder* e, char* ptr) { - upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr); - if (!in->state.enum_state.present_values_mask) return ptr; - return upb_MtDataEncoder_FlushDenseEnumMask(e, ptr); +found: + UPB_ASSERT(t->fields[idx].number == field_number); + *last_field_index = idx; + return &t->fields[idx]; } +int _upb_Decoder_GetVarintOp(const upb_MiniTableField* field) { + static const int8_t kVarintOps[] = { + [kUpb_FakeFieldType_FieldNotFound] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_Double] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_Float] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_Int64] = kUpb_DecodeOp_Scalar8Byte, + [kUpb_FieldType_UInt64] = kUpb_DecodeOp_Scalar8Byte, + [kUpb_FieldType_Int32] = kUpb_DecodeOp_Scalar4Byte, + [kUpb_FieldType_Fixed64] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_Fixed32] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_Bool] = kUpb_DecodeOp_Scalar1Byte, + [kUpb_FieldType_String] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_Group] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_Message] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_Bytes] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_UInt32] = kUpb_DecodeOp_Scalar4Byte, + [kUpb_FieldType_Enum] = kUpb_DecodeOp_Enum, + [kUpb_FieldType_SFixed32] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_SFixed64] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_SInt32] = kUpb_DecodeOp_Scalar4Byte, + [kUpb_FieldType_SInt64] = kUpb_DecodeOp_Scalar8Byte, + [kUpb_FakeFieldType_MessageSetItem] = kUpb_DecodeOp_UnknownField, + }; -const char _kUpb_ToBase92[] = { - ' ', '!', '#', '$', '%', '&', '(', ')', '*', '+', ',', '-', '.', '/', - '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<', '=', - '>', '?', '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', - 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', - 'Z', '[', ']', '^', '_', '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', - 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', - 'w', 'x', 'y', 'z', '{', '|', '}', '~', -}; - -const int8_t _kUpb_FromBase92[] = { - 0, 1, -1, 2, 3, 4, 5, -1, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, - 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, - 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, - 55, 56, 57, -1, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, - 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, -}; - - - -// Must be last. - -typedef struct { - upb_MdDecoder base; - upb_Arena* arena; - upb_MiniTableEnum* enum_table; - uint32_t enum_value_count; - uint32_t enum_data_count; - uint32_t enum_data_capacity; -} upb_MdEnumDecoder; - -static size_t upb_MiniTableEnum_Size(size_t count) { - return sizeof(upb_MiniTableEnum) + count * sizeof(uint32_t); + return kVarintOps[field->UPB_PRIVATE(descriptortype)]; } -static upb_MiniTableEnum* _upb_MiniTable_AddEnumDataMember(upb_MdEnumDecoder* d, - uint32_t val) { - if (d->enum_data_count == d->enum_data_capacity) { - size_t old_sz = upb_MiniTableEnum_Size(d->enum_data_capacity); - d->enum_data_capacity = UPB_MAX(2, d->enum_data_capacity * 2); - size_t new_sz = upb_MiniTableEnum_Size(d->enum_data_capacity); - d->enum_table = upb_Arena_Realloc(d->arena, d->enum_table, old_sz, new_sz); - upb_MdDecoder_CheckOutOfMemory(&d->base, d->enum_table); +UPB_FORCEINLINE +static void _upb_Decoder_CheckUnlinked(upb_Decoder* d, const upb_MiniTable* mt, + const upb_MiniTableField* field, + int* op) { + // If sub-message is not linked, treat as unknown. + if (field->mode & kUpb_LabelFlags_IsExtension) return; + const upb_MiniTableSub* sub = &mt->subs[field->UPB_PRIVATE(submsg_index)]; + if ((d->options & kUpb_DecodeOption_ExperimentalAllowUnlinked) || + sub->submsg != &_kUpb_MiniTable_Empty) { + return; } - d->enum_table->data[d->enum_data_count++] = val; - return d->enum_table; -} - -static void upb_MiniTableEnum_BuildValue(upb_MdEnumDecoder* d, uint32_t val) { - upb_MiniTableEnum* table = d->enum_table; - d->enum_value_count++; - if (table->value_count || (val > 512 && d->enum_value_count < val / 32)) { - if (table->value_count == 0) { - assert(d->enum_data_count == table->mask_limit / 32); - } - table = _upb_MiniTable_AddEnumDataMember(d, val); - table->value_count++; - } else { - uint32_t new_mask_limit = ((val / 32) + 1) * 32; - while (table->mask_limit < new_mask_limit) { - table = _upb_MiniTable_AddEnumDataMember(d, 0); - table->mask_limit += 32; - } - table->data[val / 32] |= 1ULL << (val % 32); +#ifndef NDEBUG + const upb_MiniTableField* oneof = upb_MiniTable_GetOneof(mt, field); + if (oneof) { + // All other members of the oneof must be message fields that are also + // unlinked. + do { + assert(upb_MiniTableField_CType(oneof) == kUpb_CType_Message); + const upb_MiniTableSub* oneof_sub = + &mt->subs[oneof->UPB_PRIVATE(submsg_index)]; + assert(!oneof_sub); + } while (upb_MiniTable_NextOneofField(mt, &oneof)); } +#endif // NDEBUG + *op = kUpb_DecodeOp_UnknownField; } -static upb_MiniTableEnum* upb_MtDecoder_DoBuildMiniTableEnum( - upb_MdEnumDecoder* d, const char* data, size_t len) { - // If the string is non-empty then it must begin with a version tag. - if (len) { - if (*data != kUpb_EncodedVersion_EnumV1) { - upb_MdDecoder_ErrorJmp(&d->base, "Invalid enum version: %c", *data); - } - data++; - len--; - } - - upb_MdDecoder_CheckOutOfMemory(&d->base, d->enum_table); - - // Guarantee at least 64 bits of mask without checking mask size. - d->enum_table->mask_limit = 64; - d->enum_table = _upb_MiniTable_AddEnumDataMember(d, 0); - d->enum_table = _upb_MiniTable_AddEnumDataMember(d, 0); +int _upb_Decoder_GetDelimitedOp(upb_Decoder* d, const upb_MiniTable* mt, + const upb_MiniTableField* field) { + enum { kRepeatedBase = 19 }; - d->enum_table->value_count = 0; + static const int8_t kDelimitedOps[] = { + /* For non-repeated field type. */ + [kUpb_FakeFieldType_FieldNotFound] = + kUpb_DecodeOp_UnknownField, // Field not found. + [kUpb_FieldType_Double] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_Float] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_Int64] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_UInt64] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_Int32] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_Fixed64] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_Fixed32] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_Bool] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_String] = kUpb_DecodeOp_String, + [kUpb_FieldType_Group] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_Message] = kUpb_DecodeOp_SubMessage, + [kUpb_FieldType_Bytes] = kUpb_DecodeOp_Bytes, + [kUpb_FieldType_UInt32] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_Enum] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_SFixed32] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_SFixed64] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_SInt32] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_SInt64] = kUpb_DecodeOp_UnknownField, + [kUpb_FakeFieldType_MessageSetItem] = kUpb_DecodeOp_UnknownField, + // For repeated field type. */ + [kRepeatedBase + kUpb_FieldType_Double] = OP_FIXPCK_LG2(3), + [kRepeatedBase + kUpb_FieldType_Float] = OP_FIXPCK_LG2(2), + [kRepeatedBase + kUpb_FieldType_Int64] = OP_VARPCK_LG2(3), + [kRepeatedBase + kUpb_FieldType_UInt64] = OP_VARPCK_LG2(3), + [kRepeatedBase + kUpb_FieldType_Int32] = OP_VARPCK_LG2(2), + [kRepeatedBase + kUpb_FieldType_Fixed64] = OP_FIXPCK_LG2(3), + [kRepeatedBase + kUpb_FieldType_Fixed32] = OP_FIXPCK_LG2(2), + [kRepeatedBase + kUpb_FieldType_Bool] = OP_VARPCK_LG2(0), + [kRepeatedBase + kUpb_FieldType_String] = kUpb_DecodeOp_String, + [kRepeatedBase + kUpb_FieldType_Group] = kUpb_DecodeOp_SubMessage, + [kRepeatedBase + kUpb_FieldType_Message] = kUpb_DecodeOp_SubMessage, + [kRepeatedBase + kUpb_FieldType_Bytes] = kUpb_DecodeOp_Bytes, + [kRepeatedBase + kUpb_FieldType_UInt32] = OP_VARPCK_LG2(2), + [kRepeatedBase + kUpb_FieldType_Enum] = kUpb_DecodeOp_PackedEnum, + [kRepeatedBase + kUpb_FieldType_SFixed32] = OP_FIXPCK_LG2(2), + [kRepeatedBase + kUpb_FieldType_SFixed64] = OP_FIXPCK_LG2(3), + [kRepeatedBase + kUpb_FieldType_SInt32] = OP_VARPCK_LG2(2), + [kRepeatedBase + kUpb_FieldType_SInt64] = OP_VARPCK_LG2(3), + // Omitting kUpb_FakeFieldType_MessageSetItem, because we never emit a + // repeated msgset type + }; - const char* ptr = data; - uint32_t base = 0; + int ndx = field->UPB_PRIVATE(descriptortype); + if (upb_FieldMode_Get(field) == kUpb_FieldMode_Array) ndx += kRepeatedBase; + int op = kDelimitedOps[ndx]; - while (ptr < d->base.end) { - char ch = *ptr++; - if (ch <= kUpb_EncodedValue_MaxEnumMask) { - uint32_t mask = _upb_FromBase92(ch); - for (int i = 0; i < 5; i++, base++, mask >>= 1) { - if (mask & 1) upb_MiniTableEnum_BuildValue(d, base); - } - } else if (kUpb_EncodedValue_MinSkip <= ch && - ch <= kUpb_EncodedValue_MaxSkip) { - uint32_t skip; - ptr = upb_MdDecoder_DecodeBase92Varint(&d->base, ptr, ch, - kUpb_EncodedValue_MinSkip, - kUpb_EncodedValue_MaxSkip, &skip); - base += skip; - } else { - upb_MdDecoder_ErrorJmp(&d->base, "Unexpected character: %c", ch); - } + if (op == kUpb_DecodeOp_SubMessage) { + _upb_Decoder_CheckUnlinked(d, mt, field, &op); } - return d->enum_table; + return op; } -static upb_MiniTableEnum* upb_MtDecoder_BuildMiniTableEnum( - upb_MdEnumDecoder* const decoder, const char* const data, size_t const len) { - if (UPB_SETJMP(decoder->base.err) != 0) return NULL; - return upb_MtDecoder_DoBuildMiniTableEnum(decoder, data, len); -} +UPB_FORCEINLINE +static const char* _upb_Decoder_DecodeWireValue(upb_Decoder* d, const char* ptr, + const upb_MiniTable* mt, + const upb_MiniTableField* field, + int wire_type, wireval* val, + int* op) { + static const unsigned kFixed32OkMask = (1 << kUpb_FieldType_Float) | + (1 << kUpb_FieldType_Fixed32) | + (1 << kUpb_FieldType_SFixed32); -upb_MiniTableEnum* upb_MiniDescriptor_BuildEnum(const char* data, size_t len, - upb_Arena* arena, - upb_Status* status) { - upb_MdEnumDecoder decoder = { - .base = - { - .end = UPB_PTRADD(data, len), - .status = status, - }, - .arena = arena, - .enum_table = upb_Arena_Malloc(arena, upb_MiniTableEnum_Size(2)), - .enum_value_count = 0, - .enum_data_count = 0, - .enum_data_capacity = 1, - }; + static const unsigned kFixed64OkMask = (1 << kUpb_FieldType_Double) | + (1 << kUpb_FieldType_Fixed64) | + (1 << kUpb_FieldType_SFixed64); - return upb_MtDecoder_BuildMiniTableEnum(&decoder, data, len); + switch (wire_type) { + case kUpb_WireType_Varint: + ptr = _upb_Decoder_DecodeVarint(d, ptr, &val->uint64_val); + *op = _upb_Decoder_GetVarintOp(field); + _upb_Decoder_Munge(field->UPB_PRIVATE(descriptortype), val); + return ptr; + case kUpb_WireType_32Bit: + *op = kUpb_DecodeOp_Scalar4Byte; + if (((1 << field->UPB_PRIVATE(descriptortype)) & kFixed32OkMask) == 0) { + *op = kUpb_DecodeOp_UnknownField; + } + return upb_WireReader_ReadFixed32(ptr, &val->uint32_val); + case kUpb_WireType_64Bit: + *op = kUpb_DecodeOp_Scalar8Byte; + if (((1 << field->UPB_PRIVATE(descriptortype)) & kFixed64OkMask) == 0) { + *op = kUpb_DecodeOp_UnknownField; + } + return upb_WireReader_ReadFixed64(ptr, &val->uint64_val); + case kUpb_WireType_Delimited: + ptr = upb_Decoder_DecodeSize(d, ptr, &val->size); + *op = _upb_Decoder_GetDelimitedOp(d, mt, field); + return ptr; + case kUpb_WireType_StartGroup: + val->uint32_val = field->number; + if (field->UPB_PRIVATE(descriptortype) == kUpb_FieldType_Group) { + *op = kUpb_DecodeOp_SubMessage; + _upb_Decoder_CheckUnlinked(d, mt, field, op); + } else if (field->UPB_PRIVATE(descriptortype) == + kUpb_FakeFieldType_MessageSetItem) { + *op = kUpb_DecodeOp_MessageSetItem; + } else { + *op = kUpb_DecodeOp_UnknownField; + } + return ptr; + default: + break; + } + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); } +UPB_FORCEINLINE +static const char* _upb_Decoder_DecodeKnownField( + upb_Decoder* d, const char* ptr, upb_Message* msg, + const upb_MiniTable* layout, const upb_MiniTableField* field, int op, + wireval* val) { + const upb_MiniTableSub* subs = layout->subs; + uint8_t mode = field->mode; -#include -#include - + if (UPB_UNLIKELY(mode & kUpb_LabelFlags_IsExtension)) { + const upb_MiniTableExtension* ext_layout = + (const upb_MiniTableExtension*)field; + upb_Message_Extension* ext = + _upb_Message_GetOrCreateExtension(msg, ext_layout, &d->arena); + if (UPB_UNLIKELY(!ext)) { + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); + } + d->unknown_msg = msg; + msg = &ext->data; + subs = &ext->ext->sub; + } -// Must be last. + switch (mode & kUpb_FieldMode_Mask) { + case kUpb_FieldMode_Array: + return _upb_Decoder_DecodeToArray(d, ptr, msg, subs, field, val, op); + case kUpb_FieldMode_Map: + return _upb_Decoder_DecodeToMap(d, ptr, msg, subs, field, val); + case kUpb_FieldMode_Scalar: + return _upb_Decoder_DecodeToSubMessage(d, ptr, msg, subs, field, val, op); + default: + UPB_UNREACHABLE(); + } +} -// Note: we sort by this number when calculating layout order. -typedef enum { - kUpb_LayoutItemType_OneofCase, // Oneof case. - kUpb_LayoutItemType_OneofField, // Oneof field data. - kUpb_LayoutItemType_Field, // Non-oneof field data. +static const char* _upb_Decoder_ReverseSkipVarint(const char* ptr, + uint32_t val) { + uint32_t seen = 0; + do { + ptr--; + seen <<= 7; + seen |= *ptr & 0x7f; + } while (seen != val); + return ptr; +} - kUpb_LayoutItemType_Max = kUpb_LayoutItemType_Field, -} upb_LayoutItemType; +static const char* _upb_Decoder_DecodeUnknownField(upb_Decoder* d, + const char* ptr, + upb_Message* msg, + int field_number, + int wire_type, wireval val) { + if (field_number == 0) _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); -#define kUpb_LayoutItem_IndexSentinel ((uint16_t)-1) + // Since unknown fields are the uncommon case, we do a little extra work here + // to walk backwards through the buffer to find the field start. This frees + // up a register in the fast paths (when the field is known), which leads to + // significant speedups in benchmarks. + const char* start = ptr; -typedef struct { - // Index of the corresponding field. When this is a oneof field, the field's - // offset will be the index of the next field in a linked list. - uint16_t field_index; - uint16_t offset; - upb_FieldRep rep; - upb_LayoutItemType type; -} upb_LayoutItem; + if (wire_type == kUpb_WireType_Delimited) ptr += val.size; + if (msg) { + switch (wire_type) { + case kUpb_WireType_Varint: + case kUpb_WireType_Delimited: + start--; + while (start[-1] & 0x80) start--; + break; + case kUpb_WireType_32Bit: + start -= 4; + break; + case kUpb_WireType_64Bit: + start -= 8; + break; + default: + break; + } -typedef struct { - upb_LayoutItem* data; - size_t size; - size_t capacity; -} upb_LayoutItemVector; + assert(start == d->debug_valstart); + uint32_t tag = ((uint32_t)field_number << 3) | wire_type; + start = _upb_Decoder_ReverseSkipVarint(start, tag); + assert(start == d->debug_tagstart); -typedef struct { - upb_MdDecoder base; - upb_MiniTable* table; - upb_MiniTableField* fields; - upb_MiniTablePlatform platform; - upb_LayoutItemVector vec; - upb_Arena* arena; -} upb_MtDecoder; + if (wire_type == kUpb_WireType_StartGroup) { + d->unknown = start; + d->unknown_msg = msg; + ptr = _upb_Decoder_DecodeUnknownGroup(d, ptr, field_number); + start = d->unknown; + d->unknown = NULL; + } + if (!_upb_Message_AddUnknown(msg, start, ptr - start, &d->arena)) { + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); + } + } else if (wire_type == kUpb_WireType_StartGroup) { + ptr = _upb_Decoder_DecodeUnknownGroup(d, ptr, field_number); + } + return ptr; +} -// In each field's offset, we temporarily store a presence classifier: -enum PresenceClass { - kNoPresence = 0, - kHasbitPresence = 1, - kRequiredPresence = 2, - kOneofBase = 3, - // Negative values refer to a specific oneof with that number. Positive - // values >= kOneofBase indicate that this field is in a oneof, and specify - // the next field in this oneof's linked list. -}; +UPB_NOINLINE +static const char* _upb_Decoder_DecodeMessage(upb_Decoder* d, const char* ptr, + upb_Message* msg, + const upb_MiniTable* layout) { + int last_field_index = 0; -static bool upb_MtDecoder_FieldIsPackable(upb_MiniTableField* field) { - return (field->mode & kUpb_FieldMode_Array) && - upb_FieldType_IsPackable(field->UPB_PRIVATE(descriptortype)); -} +#if UPB_FASTTABLE + // The first time we want to skip fast dispatch, because we may have just been + // invoked by the fast parser to handle a case that it bailed on. + if (!_upb_Decoder_IsDone(d, &ptr)) goto nofast; +#endif -typedef struct { - uint16_t submsg_count; - uint16_t subenum_count; -} upb_SubCounts; + while (!_upb_Decoder_IsDone(d, &ptr)) { + uint32_t tag; + const upb_MiniTableField* field; + int field_number; + int wire_type; + wireval val; + int op; -static void upb_MiniTable_SetTypeAndSub(upb_MiniTableField* field, - upb_FieldType type, - upb_SubCounts* sub_counts, - uint64_t msg_modifiers, - bool is_proto3_enum) { - if (is_proto3_enum) { - UPB_ASSERT(type == kUpb_FieldType_Enum); - type = kUpb_FieldType_Int32; - field->mode |= kUpb_LabelFlags_IsAlternate; - } else if (type == kUpb_FieldType_String && - !(msg_modifiers & kUpb_MessageModifier_ValidateUtf8)) { - type = kUpb_FieldType_Bytes; - field->mode |= kUpb_LabelFlags_IsAlternate; - } + if (_upb_Decoder_TryFastDispatch(d, &ptr, msg, layout)) break; - field->UPB_PRIVATE(descriptortype) = type; +#if UPB_FASTTABLE + nofast: +#endif - if (upb_MtDecoder_FieldIsPackable(field) && - (msg_modifiers & kUpb_MessageModifier_DefaultIsPacked)) { - field->mode |= kUpb_LabelFlags_IsPacked; - } +#ifndef NDEBUG + d->debug_tagstart = ptr; +#endif - if (type == kUpb_FieldType_Message || type == kUpb_FieldType_Group) { - field->UPB_PRIVATE(submsg_index) = sub_counts->submsg_count++; - } else if (type == kUpb_FieldType_Enum) { - // We will need to update this later once we know the total number of - // submsg fields. - field->UPB_PRIVATE(submsg_index) = sub_counts->subenum_count++; - } else { - field->UPB_PRIVATE(submsg_index) = kUpb_NoSub; - } -} + UPB_ASSERT(ptr < d->input.limit_ptr); + ptr = _upb_Decoder_DecodeTag(d, ptr, &tag); + field_number = tag >> 3; + wire_type = tag & 7; -static const char kUpb_EncodedToType[] = { - [kUpb_EncodedType_Double] = kUpb_FieldType_Double, - [kUpb_EncodedType_Float] = kUpb_FieldType_Float, - [kUpb_EncodedType_Int64] = kUpb_FieldType_Int64, - [kUpb_EncodedType_UInt64] = kUpb_FieldType_UInt64, - [kUpb_EncodedType_Int32] = kUpb_FieldType_Int32, - [kUpb_EncodedType_Fixed64] = kUpb_FieldType_Fixed64, - [kUpb_EncodedType_Fixed32] = kUpb_FieldType_Fixed32, - [kUpb_EncodedType_Bool] = kUpb_FieldType_Bool, - [kUpb_EncodedType_String] = kUpb_FieldType_String, - [kUpb_EncodedType_Group] = kUpb_FieldType_Group, - [kUpb_EncodedType_Message] = kUpb_FieldType_Message, - [kUpb_EncodedType_Bytes] = kUpb_FieldType_Bytes, - [kUpb_EncodedType_UInt32] = kUpb_FieldType_UInt32, - [kUpb_EncodedType_OpenEnum] = kUpb_FieldType_Enum, - [kUpb_EncodedType_SFixed32] = kUpb_FieldType_SFixed32, - [kUpb_EncodedType_SFixed64] = kUpb_FieldType_SFixed64, - [kUpb_EncodedType_SInt32] = kUpb_FieldType_SInt32, - [kUpb_EncodedType_SInt64] = kUpb_FieldType_SInt64, - [kUpb_EncodedType_ClosedEnum] = kUpb_FieldType_Enum, -}; +#ifndef NDEBUG + d->debug_valstart = ptr; +#endif -static void upb_MiniTable_SetField(upb_MtDecoder* d, uint8_t ch, - upb_MiniTableField* field, - uint64_t msg_modifiers, - upb_SubCounts* sub_counts) { - static const char kUpb_EncodedToFieldRep[] = { - [kUpb_EncodedType_Double] = kUpb_FieldRep_8Byte, - [kUpb_EncodedType_Float] = kUpb_FieldRep_4Byte, - [kUpb_EncodedType_Int64] = kUpb_FieldRep_8Byte, - [kUpb_EncodedType_UInt64] = kUpb_FieldRep_8Byte, - [kUpb_EncodedType_Int32] = kUpb_FieldRep_4Byte, - [kUpb_EncodedType_Fixed64] = kUpb_FieldRep_8Byte, - [kUpb_EncodedType_Fixed32] = kUpb_FieldRep_4Byte, - [kUpb_EncodedType_Bool] = kUpb_FieldRep_1Byte, - [kUpb_EncodedType_String] = kUpb_FieldRep_StringView, - [kUpb_EncodedType_Bytes] = kUpb_FieldRep_StringView, - [kUpb_EncodedType_UInt32] = kUpb_FieldRep_4Byte, - [kUpb_EncodedType_OpenEnum] = kUpb_FieldRep_4Byte, - [kUpb_EncodedType_SFixed32] = kUpb_FieldRep_4Byte, - [kUpb_EncodedType_SFixed64] = kUpb_FieldRep_8Byte, - [kUpb_EncodedType_SInt32] = kUpb_FieldRep_4Byte, - [kUpb_EncodedType_SInt64] = kUpb_FieldRep_8Byte, - [kUpb_EncodedType_ClosedEnum] = kUpb_FieldRep_4Byte, - }; + if (wire_type == kUpb_WireType_EndGroup) { + d->end_group = field_number; + return ptr; + } - char pointer_rep = d->platform == kUpb_MiniTablePlatform_32Bit - ? kUpb_FieldRep_4Byte - : kUpb_FieldRep_8Byte; + field = _upb_Decoder_FindField(d, layout, field_number, &last_field_index); + ptr = _upb_Decoder_DecodeWireValue(d, ptr, layout, field, wire_type, &val, + &op); - int8_t type = _upb_FromBase92(ch); - if (ch >= _upb_ToBase92(kUpb_EncodedType_RepeatedBase)) { - type -= kUpb_EncodedType_RepeatedBase; - field->mode = kUpb_FieldMode_Array; - field->mode |= pointer_rep << kUpb_FieldRep_Shift; - field->offset = kNoPresence; - } else { - field->mode = kUpb_FieldMode_Scalar; - field->offset = kHasbitPresence; - if (type == kUpb_EncodedType_Group || type == kUpb_EncodedType_Message) { - field->mode |= pointer_rep << kUpb_FieldRep_Shift; - } else if ((unsigned long)type >= sizeof(kUpb_EncodedToFieldRep)) { - upb_MdDecoder_ErrorJmp(&d->base, "Invalid field type: %d", (int)type); + if (op >= 0) { + ptr = _upb_Decoder_DecodeKnownField(d, ptr, msg, layout, field, op, &val); } else { - field->mode |= kUpb_EncodedToFieldRep[type] << kUpb_FieldRep_Shift; + switch (op) { + case kUpb_DecodeOp_UnknownField: + ptr = _upb_Decoder_DecodeUnknownField(d, ptr, msg, field_number, + wire_type, val); + break; + case kUpb_DecodeOp_MessageSetItem: + ptr = upb_Decoder_DecodeMessageSetItem(d, ptr, msg, layout); + break; + } } } - if ((unsigned long)type >= sizeof(kUpb_EncodedToType)) { - upb_MdDecoder_ErrorJmp(&d->base, "Invalid field type: %d", (int)type); - } - upb_MiniTable_SetTypeAndSub(field, kUpb_EncodedToType[type], sub_counts, - msg_modifiers, type == kUpb_EncodedType_OpenEnum); -} -static void upb_MtDecoder_ModifyField(upb_MtDecoder* d, - uint32_t message_modifiers, - uint32_t field_modifiers, - upb_MiniTableField* field) { - if (field_modifiers & kUpb_EncodedFieldModifier_FlipPacked) { - if (!upb_MtDecoder_FieldIsPackable(field)) { - upb_MdDecoder_ErrorJmp(&d->base, - "Cannot flip packed on unpackable field %" PRIu32, - field->number); - } - field->mode ^= kUpb_LabelFlags_IsPacked; - } + return UPB_UNLIKELY(layout && layout->required_count) + ? _upb_Decoder_CheckRequired(d, ptr, msg, layout) + : ptr; +} - bool singular = field_modifiers & kUpb_EncodedFieldModifier_IsProto3Singular; - bool required = field_modifiers & kUpb_EncodedFieldModifier_IsRequired; +const char* _upb_FastDecoder_DecodeGeneric(struct upb_Decoder* d, + const char* ptr, upb_Message* msg, + intptr_t table, uint64_t hasbits, + uint64_t data) { + (void)data; + *(uint32_t*)msg |= hasbits; + return _upb_Decoder_DecodeMessage(d, ptr, msg, decode_totablep(table)); +} - // Validate. - if ((singular || required) && field->offset != kHasbitPresence) { - upb_MdDecoder_ErrorJmp(&d->base, - "Invalid modifier(s) for repeated field %" PRIu32, - field->number); - } - if (singular && required) { - upb_MdDecoder_ErrorJmp( - &d->base, "Field %" PRIu32 " cannot be both singular and required", - field->number); +static upb_DecodeStatus _upb_Decoder_DecodeTop(struct upb_Decoder* d, + const char* buf, void* msg, + const upb_MiniTable* l) { + if (!_upb_Decoder_TryFastDispatch(d, &buf, msg, l)) { + _upb_Decoder_DecodeMessage(d, buf, msg, l); } + if (d->end_group != DECODE_NOGROUP) return kUpb_DecodeStatus_Malformed; + if (d->missing_required) return kUpb_DecodeStatus_MissingRequired; + return kUpb_DecodeStatus_Ok; +} - if (singular) field->offset = kNoPresence; - if (required) { - field->offset = kRequiredPresence; - } +UPB_NOINLINE +const char* _upb_Decoder_IsDoneFallback(upb_EpsCopyInputStream* e, + const char* ptr, int overrun) { + return _upb_EpsCopyInputStream_IsDoneFallbackInline( + e, ptr, overrun, _upb_Decoder_BufferFlipCallback); } -static void upb_MtDecoder_PushItem(upb_MtDecoder* d, upb_LayoutItem item) { - if (d->vec.size == d->vec.capacity) { - size_t new_cap = UPB_MAX(8, d->vec.size * 2); - d->vec.data = realloc(d->vec.data, new_cap * sizeof(*d->vec.data)); - upb_MdDecoder_CheckOutOfMemory(&d->base, d->vec.data); - d->vec.capacity = new_cap; +static upb_DecodeStatus upb_Decoder_Decode(upb_Decoder* const decoder, + const char* const buf, + void* const msg, + const upb_MiniTable* const l, + upb_Arena* const arena) { + if (UPB_SETJMP(decoder->err) == 0) { + decoder->status = _upb_Decoder_DecodeTop(decoder, buf, msg, l); + } else { + UPB_ASSERT(decoder->status != kUpb_DecodeStatus_Ok); } - d->vec.data[d->vec.size++] = item; -} -static void upb_MtDecoder_PushOneof(upb_MtDecoder* d, upb_LayoutItem item) { - if (item.field_index == kUpb_LayoutItem_IndexSentinel) { - upb_MdDecoder_ErrorJmp(&d->base, "Empty oneof"); - } - item.field_index -= kOneofBase; + _upb_MemBlock* blocks = + upb_Atomic_Load(&decoder->arena.blocks, memory_order_relaxed); + arena->head = decoder->arena.head; + upb_Atomic_Store(&arena->blocks, blocks, memory_order_relaxed); + return decoder->status; +} - // Push oneof data. - item.type = kUpb_LayoutItemType_OneofField; - upb_MtDecoder_PushItem(d, item); +upb_DecodeStatus upb_Decode(const char* buf, size_t size, void* msg, + const upb_MiniTable* l, + const upb_ExtensionRegistry* extreg, int options, + upb_Arena* arena) { + upb_Decoder decoder; + unsigned depth = (unsigned)options >> 16; - // Push oneof case. - item.rep = kUpb_FieldRep_4Byte; // Field Number. - item.type = kUpb_LayoutItemType_OneofCase; - upb_MtDecoder_PushItem(d, item); -} + upb_EpsCopyInputStream_Init(&decoder.input, &buf, size, + options & kUpb_DecodeOption_AliasString); -size_t upb_MtDecoder_SizeOfRep(upb_FieldRep rep, - upb_MiniTablePlatform platform) { - static const uint8_t kRepToSize32[] = { - [kUpb_FieldRep_1Byte] = 1, - [kUpb_FieldRep_4Byte] = 4, - [kUpb_FieldRep_StringView] = 8, - [kUpb_FieldRep_8Byte] = 8, - }; - static const uint8_t kRepToSize64[] = { - [kUpb_FieldRep_1Byte] = 1, - [kUpb_FieldRep_4Byte] = 4, - [kUpb_FieldRep_StringView] = 16, - [kUpb_FieldRep_8Byte] = 8, - }; - UPB_ASSERT(sizeof(upb_StringView) == - UPB_SIZE(kRepToSize32, kRepToSize64)[kUpb_FieldRep_StringView]); - return platform == kUpb_MiniTablePlatform_32Bit ? kRepToSize32[rep] - : kRepToSize64[rep]; -} + decoder.extreg = extreg; + decoder.unknown = NULL; + decoder.depth = depth ? depth : kUpb_WireFormat_DefaultDepthLimit; + decoder.end_group = DECODE_NOGROUP; + decoder.options = (uint16_t)options; + decoder.missing_required = false; + decoder.status = kUpb_DecodeStatus_Ok; -size_t upb_MtDecoder_AlignOfRep(upb_FieldRep rep, - upb_MiniTablePlatform platform) { - static const uint8_t kRepToAlign32[] = { - [kUpb_FieldRep_1Byte] = 1, - [kUpb_FieldRep_4Byte] = 4, - [kUpb_FieldRep_StringView] = 4, - [kUpb_FieldRep_8Byte] = 8, - }; - static const uint8_t kRepToAlign64[] = { - [kUpb_FieldRep_1Byte] = 1, - [kUpb_FieldRep_4Byte] = 4, - [kUpb_FieldRep_StringView] = 8, - [kUpb_FieldRep_8Byte] = 8, - }; - UPB_ASSERT(UPB_ALIGN_OF(upb_StringView) == - UPB_SIZE(kRepToAlign32, kRepToAlign64)[kUpb_FieldRep_StringView]); - return platform == kUpb_MiniTablePlatform_32Bit ? kRepToAlign32[rep] - : kRepToAlign64[rep]; + // Violating the encapsulation of the arena for performance reasons. + // This is a temporary arena that we swap into and swap out of when we are + // done. The temporary arena only needs to be able to handle allocation, + // not fuse or free, so it does not need many of the members to be initialized + // (particularly parent_or_count). + _upb_MemBlock* blocks = upb_Atomic_Load(&arena->blocks, memory_order_relaxed); + decoder.arena.head = arena->head; + decoder.arena.block_alloc = arena->block_alloc; + upb_Atomic_Init(&decoder.arena.blocks, blocks); + + return upb_Decoder_Decode(&decoder, buf, msg, l, arena); } -static const char* upb_MtDecoder_DecodeOneofField(upb_MtDecoder* d, - const char* ptr, - char first_ch, - upb_LayoutItem* item) { - uint32_t field_num; - ptr = upb_MdDecoder_DecodeBase92Varint( - &d->base, ptr, first_ch, kUpb_EncodedValue_MinOneofField, - kUpb_EncodedValue_MaxOneofField, &field_num); - upb_MiniTableField* f = - (void*)upb_MiniTable_FindFieldByNumber(d->table, field_num); +#undef OP_FIXPCK_LG2 +#undef OP_VARPCK_LG2 - if (!f) { - upb_MdDecoder_ErrorJmp(&d->base, - "Couldn't add field number %" PRIu32 - " to oneof, no such field number.", - field_num); +// Fast decoder: ~3x the speed of decode.c, but requires x86-64/ARM64. +// Also the table size grows by 2x. +// +// Could potentially be ported to other 64-bit archs that pass at least six +// arguments in registers and have 8 unused high bits in pointers. +// +// The overall design is to create specialized functions for every possible +// field type (eg. oneof boolean field with a 1 byte tag) and then dispatch +// to the specialized function as quickly as possible. + + + +// Must be last. + +#if UPB_FASTTABLE + +// The standard set of arguments passed to each parsing function. +// Thanks to x86-64 calling conventions, these will stay in registers. +#define UPB_PARSE_PARAMS \ + upb_Decoder *d, const char *ptr, upb_Message *msg, intptr_t table, \ + uint64_t hasbits, uint64_t data + +#define UPB_PARSE_ARGS d, ptr, msg, table, hasbits, data + +#define RETURN_GENERIC(m) \ + /* Uncomment either of these for debugging purposes. */ \ + /* fprintf(stderr, m); */ \ + /*__builtin_trap(); */ \ + return _upb_FastDecoder_DecodeGeneric(d, ptr, msg, table, hasbits, 0); + +typedef enum { + CARD_s = 0, /* Singular (optional, non-repeated) */ + CARD_o = 1, /* Oneof */ + CARD_r = 2, /* Repeated */ + CARD_p = 3 /* Packed Repeated */ +} upb_card; + +UPB_NOINLINE +static const char* fastdecode_isdonefallback(UPB_PARSE_PARAMS) { + int overrun = data; + ptr = _upb_EpsCopyInputStream_IsDoneFallbackInline( + &d->input, ptr, overrun, _upb_Decoder_BufferFlipCallback); + data = _upb_FastDecoder_LoadTag(ptr); + UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); +} + +UPB_FORCEINLINE +static const char* fastdecode_dispatch(UPB_PARSE_PARAMS) { + int overrun; + switch (upb_EpsCopyInputStream_IsDoneStatus(&d->input, ptr, &overrun)) { + case kUpb_IsDoneStatus_Done: + *(uint32_t*)msg |= hasbits; // Sync hasbits. + const upb_MiniTable* l = decode_totablep(table); + return UPB_UNLIKELY(l->required_count) + ? _upb_Decoder_CheckRequired(d, ptr, msg, l) + : ptr; + case kUpb_IsDoneStatus_NotDone: + break; + case kUpb_IsDoneStatus_NeedFallback: + data = overrun; + UPB_MUSTTAIL return fastdecode_isdonefallback(UPB_PARSE_ARGS); } - if (f->offset != kHasbitPresence) { - upb_MdDecoder_ErrorJmp( - &d->base, - "Cannot add repeated, required, or singular field %" PRIu32 - " to oneof.", - field_num); + + // Read two bytes of tag data (for a one-byte tag, the high byte is junk). + data = _upb_FastDecoder_LoadTag(ptr); + UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); +} + +UPB_FORCEINLINE +static bool fastdecode_checktag(uint16_t data, int tagbytes) { + if (tagbytes == 1) { + return (data & 0xff) == 0; + } else { + return data == 0; } +} - // Oneof storage must be large enough to accommodate the largest member. - int rep = f->mode >> kUpb_FieldRep_Shift; - if (upb_MtDecoder_SizeOfRep(rep, d->platform) > - upb_MtDecoder_SizeOfRep(item->rep, d->platform)) { - item->rep = rep; +UPB_FORCEINLINE +static const char* fastdecode_longsize(const char* ptr, int* size) { + int i; + UPB_ASSERT(*size & 0x80); + *size &= 0xff; + for (i = 0; i < 3; i++) { + ptr++; + size_t byte = (uint8_t)ptr[-1]; + *size += (byte - 1) << (7 + 7 * i); + if (UPB_LIKELY((byte & 0x80) == 0)) return ptr; } - // Prepend this field to the linked list. - f->offset = item->field_index; - item->field_index = (f - d->fields) + kOneofBase; + ptr++; + size_t byte = (uint8_t)ptr[-1]; + // len is limited by 2gb not 4gb, hence 8 and not 16 as normally expected + // for a 32 bit varint. + if (UPB_UNLIKELY(byte >= 8)) return NULL; + *size += (byte - 1) << 28; return ptr; } -static const char* upb_MtDecoder_DecodeOneofs(upb_MtDecoder* d, - const char* ptr) { - upb_LayoutItem item = {.rep = 0, - .field_index = kUpb_LayoutItem_IndexSentinel}; - while (ptr < d->base.end) { - char ch = *ptr++; - if (ch == kUpb_EncodedValue_FieldSeparator) { - // Field separator, no action needed. - } else if (ch == kUpb_EncodedValue_OneofSeparator) { - // End of oneof. - upb_MtDecoder_PushOneof(d, item); - item.field_index = kUpb_LayoutItem_IndexSentinel; // Move to next oneof. - } else { - ptr = upb_MtDecoder_DecodeOneofField(d, ptr, ch, &item); +UPB_FORCEINLINE +static const char* fastdecode_delimited( + upb_Decoder* d, const char* ptr, + upb_EpsCopyInputStream_ParseDelimitedFunc* func, void* ctx) { + ptr++; + + // Sign-extend so varint greater than one byte becomes negative, causing + // fast delimited parse to fail. + int len = (int8_t)ptr[-1]; + + if (!upb_EpsCopyInputStream_TryParseDelimitedFast(&d->input, &ptr, len, func, + ctx)) { + // Slow case: Sub-message is >=128 bytes and/or exceeds the current buffer. + // If it exceeds the buffer limit, limit/limit_ptr will change during + // sub-message parsing, so we need to preserve delta, not limit. + if (UPB_UNLIKELY(len & 0x80)) { + // Size varint >1 byte (length >= 128). + ptr = fastdecode_longsize(ptr, &len); + if (!ptr) { + // Corrupt wire format: size exceeded INT_MAX. + return NULL; + } + } + if (!upb_EpsCopyInputStream_CheckSize(&d->input, ptr, len)) { + // Corrupt wire format: invalid limit. + return NULL; } + int delta = upb_EpsCopyInputStream_PushLimit(&d->input, ptr, len); + ptr = func(&d->input, ptr, ctx); + upb_EpsCopyInputStream_PopLimit(&d->input, ptr, delta); } - - // Push final oneof. - upb_MtDecoder_PushOneof(d, item); return ptr; } -static const char* upb_MtDecoder_ParseModifier(upb_MtDecoder* d, - const char* ptr, char first_ch, - upb_MiniTableField* last_field, - uint64_t* msg_modifiers) { - uint32_t mod; - ptr = upb_MdDecoder_DecodeBase92Varint(&d->base, ptr, first_ch, - kUpb_EncodedValue_MinModifier, - kUpb_EncodedValue_MaxModifier, &mod); - if (last_field) { - upb_MtDecoder_ModifyField(d, *msg_modifiers, mod, last_field); +/* singular, oneof, repeated field handling ***********************************/ + +typedef struct { + upb_Array* arr; + void* end; +} fastdecode_arr; + +typedef enum { + FD_NEXT_ATLIMIT, + FD_NEXT_SAMEFIELD, + FD_NEXT_OTHERFIELD +} fastdecode_next; + +typedef struct { + void* dst; + fastdecode_next next; + uint32_t tag; +} fastdecode_nextret; + +UPB_FORCEINLINE +static void* fastdecode_resizearr(upb_Decoder* d, void* dst, + fastdecode_arr* farr, int valbytes) { + if (UPB_UNLIKELY(dst == farr->end)) { + size_t old_size = farr->arr->capacity; + size_t old_bytes = old_size * valbytes; + size_t new_size = old_size * 2; + size_t new_bytes = new_size * valbytes; + char* old_ptr = _upb_array_ptr(farr->arr); + char* new_ptr = upb_Arena_Realloc(&d->arena, old_ptr, old_bytes, new_bytes); + uint8_t elem_size_lg2 = __builtin_ctz(valbytes); + farr->arr->capacity = new_size; + farr->arr->data = _upb_array_tagptr(new_ptr, elem_size_lg2); + dst = (void*)(new_ptr + (old_size * valbytes)); + farr->end = (void*)(new_ptr + (new_size * valbytes)); + } + return dst; +} + +UPB_FORCEINLINE +static bool fastdecode_tagmatch(uint32_t tag, uint64_t data, int tagbytes) { + if (tagbytes == 1) { + return (uint8_t)tag == (uint8_t)data; + } else { + return (uint16_t)tag == (uint16_t)data; + } +} + +UPB_FORCEINLINE +static void fastdecode_commitarr(void* dst, fastdecode_arr* farr, + int valbytes) { + farr->arr->size = + (size_t)((char*)dst - (char*)_upb_array_ptr(farr->arr)) / valbytes; +} + +UPB_FORCEINLINE +static fastdecode_nextret fastdecode_nextrepeated(upb_Decoder* d, void* dst, + const char** ptr, + fastdecode_arr* farr, + uint64_t data, int tagbytes, + int valbytes) { + fastdecode_nextret ret; + dst = (char*)dst + valbytes; + + if (UPB_LIKELY(!_upb_Decoder_IsDone(d, ptr))) { + ret.tag = _upb_FastDecoder_LoadTag(*ptr); + if (fastdecode_tagmatch(ret.tag, data, tagbytes)) { + ret.next = FD_NEXT_SAMEFIELD; + } else { + fastdecode_commitarr(dst, farr, valbytes); + ret.next = FD_NEXT_OTHERFIELD; + } + } else { + fastdecode_commitarr(dst, farr, valbytes); + ret.next = FD_NEXT_ATLIMIT; + } + + ret.dst = dst; + return ret; +} + +UPB_FORCEINLINE +static void* fastdecode_fieldmem(upb_Message* msg, uint64_t data) { + size_t ofs = data >> 48; + return (char*)msg + ofs; +} + +UPB_FORCEINLINE +static void* fastdecode_getfield(upb_Decoder* d, const char* ptr, + upb_Message* msg, uint64_t* data, + uint64_t* hasbits, fastdecode_arr* farr, + int valbytes, upb_card card) { + switch (card) { + case CARD_s: { + uint8_t hasbit_index = *data >> 24; + // Set hasbit and return pointer to scalar field. + *hasbits |= 1ull << hasbit_index; + return fastdecode_fieldmem(msg, *data); + } + case CARD_o: { + uint16_t case_ofs = *data >> 32; + uint32_t* oneof_case = UPB_PTR_AT(msg, case_ofs, uint32_t); + uint8_t field_number = *data >> 24; + *oneof_case = field_number; + return fastdecode_fieldmem(msg, *data); + } + case CARD_r: { + // Get pointer to upb_Array and allocate/expand if necessary. + uint8_t elem_size_lg2 = __builtin_ctz(valbytes); + upb_Array** arr_p = fastdecode_fieldmem(msg, *data); + char* begin; + *(uint32_t*)msg |= *hasbits; + *hasbits = 0; + if (UPB_LIKELY(!*arr_p)) { + farr->arr = _upb_Array_New(&d->arena, 8, elem_size_lg2); + *arr_p = farr->arr; + } else { + farr->arr = *arr_p; + } + begin = _upb_array_ptr(farr->arr); + farr->end = begin + (farr->arr->capacity * valbytes); + *data = _upb_FastDecoder_LoadTag(ptr); + return begin + (farr->arr->size * valbytes); + } + default: + UPB_UNREACHABLE(); + } +} + +UPB_FORCEINLINE +static bool fastdecode_flippacked(uint64_t* data, int tagbytes) { + *data ^= (0x2 ^ 0x0); // Patch data to match packed wiretype. + return fastdecode_checktag(*data, tagbytes); +} + +#define FASTDECODE_CHECKPACKED(tagbytes, card, func) \ + if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { \ + if (card == CARD_r && fastdecode_flippacked(&data, tagbytes)) { \ + UPB_MUSTTAIL return func(UPB_PARSE_ARGS); \ + } \ + RETURN_GENERIC("packed check tag mismatch\n"); \ + } + +/* varint fields **************************************************************/ + +UPB_FORCEINLINE +static uint64_t fastdecode_munge(uint64_t val, int valbytes, bool zigzag) { + if (valbytes == 1) { + return val != 0; + } else if (zigzag) { + if (valbytes == 4) { + uint32_t n = val; + return (n >> 1) ^ -(int32_t)(n & 1); + } else if (valbytes == 8) { + return (val >> 1) ^ -(int64_t)(val & 1); + } + UPB_UNREACHABLE(); + } + return val; +} + +UPB_FORCEINLINE +static const char* fastdecode_varint64(const char* ptr, uint64_t* val) { + ptr++; + *val = (uint8_t)ptr[-1]; + if (UPB_UNLIKELY(*val & 0x80)) { + int i; + for (i = 0; i < 8; i++) { + ptr++; + uint64_t byte = (uint8_t)ptr[-1]; + *val += (byte - 1) << (7 + 7 * i); + if (UPB_LIKELY((byte & 0x80) == 0)) goto done; + } + ptr++; + uint64_t byte = (uint8_t)ptr[-1]; + if (byte > 1) { + return NULL; + } + *val += (byte - 1) << 63; + } +done: + UPB_ASSUME(ptr != NULL); + return ptr; +} + +#define FASTDECODE_UNPACKEDVARINT(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, card, zigzag, packed) \ + uint64_t val; \ + void* dst; \ + fastdecode_arr farr; \ + \ + FASTDECODE_CHECKPACKED(tagbytes, card, packed); \ + \ + dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, valbytes, \ + card); \ + if (card == CARD_r) { \ + if (UPB_UNLIKELY(!dst)) { \ + RETURN_GENERIC("need array resize\n"); \ + } \ + } \ + \ + again: \ + if (card == CARD_r) { \ + dst = fastdecode_resizearr(d, dst, &farr, valbytes); \ + } \ + \ + ptr += tagbytes; \ + ptr = fastdecode_varint64(ptr, &val); \ + if (ptr == NULL) _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \ + val = fastdecode_munge(val, valbytes, zigzag); \ + memcpy(dst, &val, valbytes); \ + \ + if (card == CARD_r) { \ + fastdecode_nextret ret = fastdecode_nextrepeated( \ + d, dst, &ptr, &farr, data, tagbytes, valbytes); \ + switch (ret.next) { \ + case FD_NEXT_SAMEFIELD: \ + dst = ret.dst; \ + goto again; \ + case FD_NEXT_OTHERFIELD: \ + data = ret.tag; \ + UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); \ + case FD_NEXT_ATLIMIT: \ + return ptr; \ + } \ + } \ + \ + UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); + +typedef struct { + uint8_t valbytes; + bool zigzag; + void* dst; + fastdecode_arr farr; +} fastdecode_varintdata; + +UPB_FORCEINLINE +static const char* fastdecode_topackedvarint(upb_EpsCopyInputStream* e, + const char* ptr, void* ctx) { + upb_Decoder* d = (upb_Decoder*)e; + fastdecode_varintdata* data = ctx; + void* dst = data->dst; + uint64_t val; + + while (!_upb_Decoder_IsDone(d, &ptr)) { + dst = fastdecode_resizearr(d, dst, &data->farr, data->valbytes); + ptr = fastdecode_varint64(ptr, &val); + if (ptr == NULL) return NULL; + val = fastdecode_munge(val, data->valbytes, data->zigzag); + memcpy(dst, &val, data->valbytes); + dst = (char*)dst + data->valbytes; + } + + fastdecode_commitarr(dst, &data->farr, data->valbytes); + return ptr; +} + +#define FASTDECODE_PACKEDVARINT(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, zigzag, unpacked) \ + fastdecode_varintdata ctx = {valbytes, zigzag}; \ + \ + FASTDECODE_CHECKPACKED(tagbytes, CARD_r, unpacked); \ + \ + ctx.dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &ctx.farr, \ + valbytes, CARD_r); \ + if (UPB_UNLIKELY(!ctx.dst)) { \ + RETURN_GENERIC("need array resize\n"); \ + } \ + \ + ptr += tagbytes; \ + ptr = fastdecode_delimited(d, ptr, &fastdecode_topackedvarint, &ctx); \ + \ + if (UPB_UNLIKELY(ptr == NULL)) { \ + _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \ + } \ + \ + UPB_MUSTTAIL return fastdecode_dispatch(d, ptr, msg, table, hasbits, 0); + +#define FASTDECODE_VARINT(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, card, zigzag, unpacked, packed) \ + if (card == CARD_p) { \ + FASTDECODE_PACKEDVARINT(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, zigzag, unpacked); \ + } else { \ + FASTDECODE_UNPACKEDVARINT(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, card, zigzag, packed); \ + } + +#define z_ZZ true +#define b_ZZ false +#define v_ZZ false + +/* Generate all combinations: + * {s,o,r,p} x {b1,v4,z4,v8,z8} x {1bt,2bt} */ + +#define F(card, type, valbytes, tagbytes) \ + UPB_NOINLINE \ + const char* upb_p##card##type##valbytes##_##tagbytes##bt(UPB_PARSE_PARAMS) { \ + FASTDECODE_VARINT(d, ptr, msg, table, hasbits, data, tagbytes, valbytes, \ + CARD_##card, type##_ZZ, \ + upb_pr##type##valbytes##_##tagbytes##bt, \ + upb_pp##type##valbytes##_##tagbytes##bt); \ + } + +#define TYPES(card, tagbytes) \ + F(card, b, 1, tagbytes) \ + F(card, v, 4, tagbytes) \ + F(card, v, 8, tagbytes) \ + F(card, z, 4, tagbytes) \ + F(card, z, 8, tagbytes) + +#define TAGBYTES(card) \ + TYPES(card, 1) \ + TYPES(card, 2) + +TAGBYTES(s) +TAGBYTES(o) +TAGBYTES(r) +TAGBYTES(p) + +#undef z_ZZ +#undef b_ZZ +#undef v_ZZ +#undef o_ONEOF +#undef s_ONEOF +#undef r_ONEOF +#undef F +#undef TYPES +#undef TAGBYTES +#undef FASTDECODE_UNPACKEDVARINT +#undef FASTDECODE_PACKEDVARINT +#undef FASTDECODE_VARINT + +/* fixed fields ***************************************************************/ + +#define FASTDECODE_UNPACKEDFIXED(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, card, packed) \ + void* dst; \ + fastdecode_arr farr; \ + \ + FASTDECODE_CHECKPACKED(tagbytes, card, packed) \ + \ + dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, valbytes, \ + card); \ + if (card == CARD_r) { \ + if (UPB_UNLIKELY(!dst)) { \ + RETURN_GENERIC("couldn't allocate array in arena\n"); \ + } \ + } \ + \ + again: \ + if (card == CARD_r) { \ + dst = fastdecode_resizearr(d, dst, &farr, valbytes); \ + } \ + \ + ptr += tagbytes; \ + memcpy(dst, ptr, valbytes); \ + ptr += valbytes; \ + \ + if (card == CARD_r) { \ + fastdecode_nextret ret = fastdecode_nextrepeated( \ + d, dst, &ptr, &farr, data, tagbytes, valbytes); \ + switch (ret.next) { \ + case FD_NEXT_SAMEFIELD: \ + dst = ret.dst; \ + goto again; \ + case FD_NEXT_OTHERFIELD: \ + data = ret.tag; \ + UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); \ + case FD_NEXT_ATLIMIT: \ + return ptr; \ + } \ + } \ + \ + UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); + +#define FASTDECODE_PACKEDFIXED(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, unpacked) \ + FASTDECODE_CHECKPACKED(tagbytes, CARD_r, unpacked) \ + \ + ptr += tagbytes; \ + int size = (uint8_t)ptr[0]; \ + ptr++; \ + if (size & 0x80) { \ + ptr = fastdecode_longsize(ptr, &size); \ + } \ + \ + if (UPB_UNLIKELY(!upb_EpsCopyInputStream_CheckDataSizeAvailable( \ + &d->input, ptr, size) || \ + (size % valbytes) != 0)) { \ + _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \ + } \ + \ + upb_Array** arr_p = fastdecode_fieldmem(msg, data); \ + upb_Array* arr = *arr_p; \ + uint8_t elem_size_lg2 = __builtin_ctz(valbytes); \ + int elems = size / valbytes; \ + \ + if (UPB_LIKELY(!arr)) { \ + *arr_p = arr = _upb_Array_New(&d->arena, elems, elem_size_lg2); \ + if (!arr) { \ + _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \ + } \ + } else { \ + _upb_Array_ResizeUninitialized(arr, elems, &d->arena); \ + } \ + \ + char* dst = _upb_array_ptr(arr); \ + memcpy(dst, ptr, size); \ + arr->size = elems; \ + \ + ptr += size; \ + UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); + +#define FASTDECODE_FIXED(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, card, unpacked, packed) \ + if (card == CARD_p) { \ + FASTDECODE_PACKEDFIXED(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, unpacked); \ + } else { \ + FASTDECODE_UNPACKEDFIXED(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, card, packed); \ + } + +/* Generate all combinations: + * {s,o,r,p} x {f4,f8} x {1bt,2bt} */ + +#define F(card, valbytes, tagbytes) \ + UPB_NOINLINE \ + const char* upb_p##card##f##valbytes##_##tagbytes##bt(UPB_PARSE_PARAMS) { \ + FASTDECODE_FIXED(d, ptr, msg, table, hasbits, data, tagbytes, valbytes, \ + CARD_##card, upb_ppf##valbytes##_##tagbytes##bt, \ + upb_prf##valbytes##_##tagbytes##bt); \ + } + +#define TYPES(card, tagbytes) \ + F(card, 4, tagbytes) \ + F(card, 8, tagbytes) + +#define TAGBYTES(card) \ + TYPES(card, 1) \ + TYPES(card, 2) + +TAGBYTES(s) +TAGBYTES(o) +TAGBYTES(r) +TAGBYTES(p) + +#undef F +#undef TYPES +#undef TAGBYTES +#undef FASTDECODE_UNPACKEDFIXED +#undef FASTDECODE_PACKEDFIXED + +/* string fields **************************************************************/ + +typedef const char* fastdecode_copystr_func(struct upb_Decoder* d, + const char* ptr, upb_Message* msg, + const upb_MiniTable* table, + uint64_t hasbits, + upb_StringView* dst); + +UPB_NOINLINE +static const char* fastdecode_verifyutf8(upb_Decoder* d, const char* ptr, + upb_Message* msg, intptr_t table, + uint64_t hasbits, uint64_t data) { + upb_StringView* dst = (upb_StringView*)data; + if (!_upb_Decoder_VerifyUtf8Inline(dst->data, dst->size)) { + _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_BadUtf8); + } + UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); +} + +#define FASTDECODE_LONGSTRING(d, ptr, msg, table, hasbits, dst, validate_utf8) \ + int size = (uint8_t)ptr[0]; /* Could plumb through hasbits. */ \ + ptr++; \ + if (size & 0x80) { \ + ptr = fastdecode_longsize(ptr, &size); \ + } \ + \ + if (UPB_UNLIKELY(!upb_EpsCopyInputStream_CheckSize(&d->input, ptr, size))) { \ + dst->size = 0; \ + _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \ + } \ + \ + const char* s_ptr = ptr; \ + ptr = upb_EpsCopyInputStream_ReadString(&d->input, &s_ptr, size, &d->arena); \ + if (!ptr) _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); \ + dst->data = s_ptr; \ + dst->size = size; \ + \ + if (validate_utf8) { \ + data = (uint64_t)dst; \ + UPB_MUSTTAIL return fastdecode_verifyutf8(UPB_PARSE_ARGS); \ + } else { \ + UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); \ + } + +UPB_NOINLINE +static const char* fastdecode_longstring_utf8(struct upb_Decoder* d, + const char* ptr, upb_Message* msg, + intptr_t table, uint64_t hasbits, + uint64_t data) { + upb_StringView* dst = (upb_StringView*)data; + FASTDECODE_LONGSTRING(d, ptr, msg, table, hasbits, dst, true); +} + +UPB_NOINLINE +static const char* fastdecode_longstring_noutf8( + struct upb_Decoder* d, const char* ptr, upb_Message* msg, intptr_t table, + uint64_t hasbits, uint64_t data) { + upb_StringView* dst = (upb_StringView*)data; + FASTDECODE_LONGSTRING(d, ptr, msg, table, hasbits, dst, false); +} + +UPB_FORCEINLINE +static void fastdecode_docopy(upb_Decoder* d, const char* ptr, uint32_t size, + int copy, char* data, upb_StringView* dst) { + d->arena.head.ptr += copy; + dst->data = data; + UPB_UNPOISON_MEMORY_REGION(data, copy); + memcpy(data, ptr, copy); + UPB_POISON_MEMORY_REGION(data + size, copy - size); +} + +#define FASTDECODE_COPYSTRING(d, ptr, msg, table, hasbits, data, tagbytes, \ + card, validate_utf8) \ + upb_StringView* dst; \ + fastdecode_arr farr; \ + int64_t size; \ + size_t arena_has; \ + size_t common_has; \ + char* buf; \ + \ + UPB_ASSERT(!upb_EpsCopyInputStream_AliasingAvailable(&d->input, ptr, 0)); \ + UPB_ASSERT(fastdecode_checktag(data, tagbytes)); \ + \ + dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, \ + sizeof(upb_StringView), card); \ + \ + again: \ + if (card == CARD_r) { \ + dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_StringView)); \ + } \ + \ + size = (uint8_t)ptr[tagbytes]; \ + ptr += tagbytes + 1; \ + dst->size = size; \ + \ + buf = d->arena.head.ptr; \ + arena_has = _upb_ArenaHas(&d->arena); \ + common_has = UPB_MIN(arena_has, \ + upb_EpsCopyInputStream_BytesAvailable(&d->input, ptr)); \ + \ + if (UPB_LIKELY(size <= 15 - tagbytes)) { \ + if (arena_has < 16) goto longstr; \ + d->arena.head.ptr += 16; \ + memcpy(buf, ptr - tagbytes - 1, 16); \ + dst->data = buf + tagbytes + 1; \ + } else if (UPB_LIKELY(size <= 32)) { \ + if (UPB_UNLIKELY(common_has < 32)) goto longstr; \ + fastdecode_docopy(d, ptr, size, 32, buf, dst); \ + } else if (UPB_LIKELY(size <= 64)) { \ + if (UPB_UNLIKELY(common_has < 64)) goto longstr; \ + fastdecode_docopy(d, ptr, size, 64, buf, dst); \ + } else if (UPB_LIKELY(size < 128)) { \ + if (UPB_UNLIKELY(common_has < 128)) goto longstr; \ + fastdecode_docopy(d, ptr, size, 128, buf, dst); \ + } else { \ + goto longstr; \ + } \ + \ + ptr += size; \ + \ + if (card == CARD_r) { \ + if (validate_utf8 && \ + !_upb_Decoder_VerifyUtf8Inline(dst->data, dst->size)) { \ + _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_BadUtf8); \ + } \ + fastdecode_nextret ret = fastdecode_nextrepeated( \ + d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_StringView)); \ + switch (ret.next) { \ + case FD_NEXT_SAMEFIELD: \ + dst = ret.dst; \ + goto again; \ + case FD_NEXT_OTHERFIELD: \ + data = ret.tag; \ + UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); \ + case FD_NEXT_ATLIMIT: \ + return ptr; \ + } \ + } \ + \ + if (card != CARD_r && validate_utf8) { \ + data = (uint64_t)dst; \ + UPB_MUSTTAIL return fastdecode_verifyutf8(UPB_PARSE_ARGS); \ + } \ + \ + UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); \ + \ + longstr: \ + if (card == CARD_r) { \ + fastdecode_commitarr(dst + 1, &farr, sizeof(upb_StringView)); \ + } \ + ptr--; \ + if (validate_utf8) { \ + UPB_MUSTTAIL return fastdecode_longstring_utf8(d, ptr, msg, table, \ + hasbits, (uint64_t)dst); \ + } else { \ + UPB_MUSTTAIL return fastdecode_longstring_noutf8(d, ptr, msg, table, \ + hasbits, (uint64_t)dst); \ + } + +#define FASTDECODE_STRING(d, ptr, msg, table, hasbits, data, tagbytes, card, \ + copyfunc, validate_utf8) \ + upb_StringView* dst; \ + fastdecode_arr farr; \ + int64_t size; \ + \ + if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { \ + RETURN_GENERIC("string field tag mismatch\n"); \ + } \ + \ + if (UPB_UNLIKELY( \ + !upb_EpsCopyInputStream_AliasingAvailable(&d->input, ptr, 0))) { \ + UPB_MUSTTAIL return copyfunc(UPB_PARSE_ARGS); \ + } \ + \ + dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, \ + sizeof(upb_StringView), card); \ + \ + again: \ + if (card == CARD_r) { \ + dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_StringView)); \ + } \ + \ + size = (int8_t)ptr[tagbytes]; \ + ptr += tagbytes + 1; \ + \ + if (UPB_UNLIKELY( \ + !upb_EpsCopyInputStream_AliasingAvailable(&d->input, ptr, size))) { \ + ptr--; \ + if (validate_utf8) { \ + return fastdecode_longstring_utf8(d, ptr, msg, table, hasbits, \ + (uint64_t)dst); \ + } else { \ + return fastdecode_longstring_noutf8(d, ptr, msg, table, hasbits, \ + (uint64_t)dst); \ + } \ + } \ + \ + dst->data = ptr; \ + dst->size = size; \ + ptr = upb_EpsCopyInputStream_ReadStringAliased(&d->input, &dst->data, \ + dst->size); \ + \ + if (card == CARD_r) { \ + if (validate_utf8 && \ + !_upb_Decoder_VerifyUtf8Inline(dst->data, dst->size)) { \ + _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_BadUtf8); \ + } \ + fastdecode_nextret ret = fastdecode_nextrepeated( \ + d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_StringView)); \ + switch (ret.next) { \ + case FD_NEXT_SAMEFIELD: \ + dst = ret.dst; \ + goto again; \ + case FD_NEXT_OTHERFIELD: \ + data = ret.tag; \ + UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); \ + case FD_NEXT_ATLIMIT: \ + return ptr; \ + } \ + } \ + \ + if (card != CARD_r && validate_utf8) { \ + data = (uint64_t)dst; \ + UPB_MUSTTAIL return fastdecode_verifyutf8(UPB_PARSE_ARGS); \ + } \ + \ + UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); + +/* Generate all combinations: + * {p,c} x {s,o,r} x {s, b} x {1bt,2bt} */ + +#define s_VALIDATE true +#define b_VALIDATE false + +#define F(card, tagbytes, type) \ + UPB_NOINLINE \ + const char* upb_c##card##type##_##tagbytes##bt(UPB_PARSE_PARAMS) { \ + FASTDECODE_COPYSTRING(d, ptr, msg, table, hasbits, data, tagbytes, \ + CARD_##card, type##_VALIDATE); \ + } \ + const char* upb_p##card##type##_##tagbytes##bt(UPB_PARSE_PARAMS) { \ + FASTDECODE_STRING(d, ptr, msg, table, hasbits, data, tagbytes, \ + CARD_##card, upb_c##card##type##_##tagbytes##bt, \ + type##_VALIDATE); \ + } + +#define UTF8(card, tagbytes) \ + F(card, tagbytes, s) \ + F(card, tagbytes, b) + +#define TAGBYTES(card) \ + UTF8(card, 1) \ + UTF8(card, 2) + +TAGBYTES(s) +TAGBYTES(o) +TAGBYTES(r) + +#undef s_VALIDATE +#undef b_VALIDATE +#undef F +#undef TAGBYTES +#undef FASTDECODE_LONGSTRING +#undef FASTDECODE_COPYSTRING +#undef FASTDECODE_STRING + +/* message fields *************************************************************/ + +UPB_INLINE +upb_Message* decode_newmsg_ceil(upb_Decoder* d, const upb_MiniTable* l, + int msg_ceil_bytes) { + size_t size = l->size + sizeof(upb_Message_Internal); + char* msg_data; + if (UPB_LIKELY(msg_ceil_bytes > 0 && + _upb_ArenaHas(&d->arena) >= msg_ceil_bytes)) { + UPB_ASSERT(size <= (size_t)msg_ceil_bytes); + msg_data = d->arena.head.ptr; + d->arena.head.ptr += size; + UPB_UNPOISON_MEMORY_REGION(msg_data, msg_ceil_bytes); + memset(msg_data, 0, msg_ceil_bytes); + UPB_POISON_MEMORY_REGION(msg_data + size, msg_ceil_bytes - size); } else { - if (!d->table) { - upb_MdDecoder_ErrorJmp(&d->base, - "Extensions cannot have message modifiers"); - } - *msg_modifiers = mod; + msg_data = (char*)upb_Arena_Malloc(&d->arena, size); + memset(msg_data, 0, size); } + return msg_data + sizeof(upb_Message_Internal); +} + +typedef struct { + intptr_t table; + upb_Message* msg; +} fastdecode_submsgdata; +UPB_FORCEINLINE +static const char* fastdecode_tosubmsg(upb_EpsCopyInputStream* e, + const char* ptr, void* ctx) { + upb_Decoder* d = (upb_Decoder*)e; + fastdecode_submsgdata* submsg = ctx; + ptr = fastdecode_dispatch(d, ptr, submsg->msg, submsg->table, 0, 0); + UPB_ASSUME(ptr != NULL); return ptr; } -static void upb_MtDecoder_AllocateSubs(upb_MtDecoder* d, - upb_SubCounts sub_counts) { - uint32_t total_count = sub_counts.submsg_count + sub_counts.subenum_count; - size_t subs_bytes = sizeof(*d->table->subs) * total_count; - upb_MiniTableSub* subs = upb_Arena_Malloc(d->arena, subs_bytes); - upb_MdDecoder_CheckOutOfMemory(&d->base, subs); - uint32_t i = 0; - for (; i < sub_counts.submsg_count; i++) { - subs[i].submsg = &_kUpb_MiniTable_Empty; - } - if (sub_counts.subenum_count) { - upb_MiniTableField* f = d->fields; - upb_MiniTableField* end_f = f + d->table->field_count; - for (; f < end_f; f++) { - if (f->UPB_PRIVATE(descriptortype) == kUpb_FieldType_Enum) { - f->UPB_PRIVATE(submsg_index) += sub_counts.submsg_count; - } - } - for (; i < sub_counts.submsg_count + sub_counts.subenum_count; i++) { - subs[i].subenum = NULL; - } +#define FASTDECODE_SUBMSG(d, ptr, msg, table, hasbits, data, tagbytes, \ + msg_ceil_bytes, card) \ + \ + if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { \ + RETURN_GENERIC("submessage field tag mismatch\n"); \ + } \ + \ + if (--d->depth == 0) { \ + _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_MaxDepthExceeded); \ + } \ + \ + upb_Message** dst; \ + uint32_t submsg_idx = (data >> 16) & 0xff; \ + const upb_MiniTable* tablep = decode_totablep(table); \ + const upb_MiniTable* subtablep = tablep->subs[submsg_idx].submsg; \ + fastdecode_submsgdata submsg = {decode_totable(subtablep)}; \ + fastdecode_arr farr; \ + \ + if (subtablep->table_mask == (uint8_t)-1) { \ + RETURN_GENERIC("submessage doesn't have fast tables."); \ + } \ + \ + dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, \ + sizeof(upb_Message*), card); \ + \ + if (card == CARD_s) { \ + *(uint32_t*)msg |= hasbits; \ + hasbits = 0; \ + } \ + \ + again: \ + if (card == CARD_r) { \ + dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_Message*)); \ + } \ + \ + submsg.msg = *dst; \ + \ + if (card == CARD_r || UPB_LIKELY(!submsg.msg)) { \ + *dst = submsg.msg = decode_newmsg_ceil(d, subtablep, msg_ceil_bytes); \ + } \ + \ + ptr += tagbytes; \ + ptr = fastdecode_delimited(d, ptr, fastdecode_tosubmsg, &submsg); \ + \ + if (UPB_UNLIKELY(ptr == NULL || d->end_group != DECODE_NOGROUP)) { \ + _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \ + } \ + \ + if (card == CARD_r) { \ + fastdecode_nextret ret = fastdecode_nextrepeated( \ + d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_Message*)); \ + switch (ret.next) { \ + case FD_NEXT_SAMEFIELD: \ + dst = ret.dst; \ + goto again; \ + case FD_NEXT_OTHERFIELD: \ + d->depth++; \ + data = ret.tag; \ + UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); \ + case FD_NEXT_ATLIMIT: \ + d->depth++; \ + return ptr; \ + } \ + } \ + \ + d->depth++; \ + UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); + +#define F(card, tagbytes, size_ceil, ceil_arg) \ + const char* upb_p##card##m_##tagbytes##bt_max##size_ceil##b( \ + UPB_PARSE_PARAMS) { \ + FASTDECODE_SUBMSG(d, ptr, msg, table, hasbits, data, tagbytes, ceil_arg, \ + CARD_##card); \ } - d->table->subs = subs; -} -static const char* upb_MtDecoder_Parse(upb_MtDecoder* d, const char* ptr, - size_t len, void* fields, - size_t field_size, uint16_t* field_count, - upb_SubCounts* sub_counts) { - uint64_t msg_modifiers = 0; - uint32_t last_field_number = 0; - upb_MiniTableField* last_field = NULL; - bool need_dense_below = d->table != NULL; +#define SIZES(card, tagbytes) \ + F(card, tagbytes, 64, 64) \ + F(card, tagbytes, 128, 128) \ + F(card, tagbytes, 192, 192) \ + F(card, tagbytes, 256, 256) \ + F(card, tagbytes, max, -1) - d->base.end = UPB_PTRADD(ptr, len); +#define TAGBYTES(card) \ + SIZES(card, 1) \ + SIZES(card, 2) - while (ptr < d->base.end) { - char ch = *ptr++; - if (ch <= kUpb_EncodedValue_MaxField) { - if (!d->table && last_field) { - // For extensions, consume only a single field and then return. - return --ptr; - } - upb_MiniTableField* field = fields; - *field_count += 1; - fields = (char*)fields + field_size; - field->number = ++last_field_number; - last_field = field; - upb_MiniTable_SetField(d, ch, field, msg_modifiers, sub_counts); - } else if (kUpb_EncodedValue_MinModifier <= ch && - ch <= kUpb_EncodedValue_MaxModifier) { - ptr = upb_MtDecoder_ParseModifier(d, ptr, ch, last_field, &msg_modifiers); - if (msg_modifiers & kUpb_MessageModifier_IsExtendable) { - d->table->ext |= kUpb_ExtMode_Extendable; - } - } else if (ch == kUpb_EncodedValue_End) { - if (!d->table) { - upb_MdDecoder_ErrorJmp(&d->base, "Extensions cannot have oneofs."); - } - ptr = upb_MtDecoder_DecodeOneofs(d, ptr); - } else if (kUpb_EncodedValue_MinSkip <= ch && - ch <= kUpb_EncodedValue_MaxSkip) { - if (need_dense_below) { - d->table->dense_below = d->table->field_count; - need_dense_below = false; - } - uint32_t skip; - ptr = upb_MdDecoder_DecodeBase92Varint(&d->base, ptr, ch, - kUpb_EncodedValue_MinSkip, - kUpb_EncodedValue_MaxSkip, &skip); - last_field_number += skip; - last_field_number--; // Next field seen will increment. - } else { - upb_MdDecoder_ErrorJmp(&d->base, "Invalid char: %c", ch); - } - } +TAGBYTES(s) +TAGBYTES(o) +TAGBYTES(r) - if (need_dense_below) { - d->table->dense_below = d->table->field_count; - } +#undef TAGBYTES +#undef SIZES +#undef F +#undef FASTDECODE_SUBMSG - return ptr; -} +#endif /* UPB_FASTTABLE */ -static void upb_MtDecoder_ParseMessage(upb_MtDecoder* d, const char* data, - size_t len) { - // Buffer length is an upper bound on the number of fields. We will return - // what we don't use. - d->fields = upb_Arena_Malloc(d->arena, sizeof(*d->fields) * len); - upb_MdDecoder_CheckOutOfMemory(&d->base, d->fields); +// We encode backwards, to avoid pre-computing lengths (one-pass encode). - upb_SubCounts sub_counts = {0, 0}; - d->table->field_count = 0; - d->table->fields = d->fields; - upb_MtDecoder_Parse(d, data, len, d->fields, sizeof(*d->fields), - &d->table->field_count, &sub_counts); - upb_Arena_ShrinkLast(d->arena, d->fields, sizeof(*d->fields) * len, - sizeof(*d->fields) * d->table->field_count); - d->table->fields = d->fields; - upb_MtDecoder_AllocateSubs(d, sub_counts); -} +#include -int upb_MtDecoder_CompareFields(const void* _a, const void* _b) { - const upb_LayoutItem* a = _a; - const upb_LayoutItem* b = _b; - // Currently we just sort by: - // 1. rep (smallest fields first) - // 2. type (oneof cases first) - // 2. field_index (smallest numbers first) - // The main goal of this is to reduce space lost to padding. - // Later we may have more subtle reasons to prefer a different ordering. - const int rep_bits = upb_Log2Ceiling(kUpb_FieldRep_Max); - const int type_bits = upb_Log2Ceiling(kUpb_LayoutItemType_Max); - const int idx_bits = (sizeof(a->field_index) * 8); - UPB_ASSERT(idx_bits + rep_bits + type_bits < 32); -#define UPB_COMBINE(rep, ty, idx) (((rep << type_bits) | ty) << idx_bits) | idx - uint32_t a_packed = UPB_COMBINE(a->rep, a->type, a->field_index); - uint32_t b_packed = UPB_COMBINE(b->rep, b->type, b->field_index); - assert(a_packed != b_packed); -#undef UPB_COMBINE - return a_packed < b_packed ? -1 : 1; -} -static bool upb_MtDecoder_SortLayoutItems(upb_MtDecoder* d) { - // Add items for all non-oneof fields (oneofs were already added). - int n = d->table->field_count; - for (int i = 0; i < n; i++) { - upb_MiniTableField* f = &d->fields[i]; - if (f->offset >= kOneofBase) continue; - upb_LayoutItem item = {.field_index = i, - .rep = f->mode >> kUpb_FieldRep_Shift, - .type = kUpb_LayoutItemType_Field}; - upb_MtDecoder_PushItem(d, item); - } +// Must be last. + +#define UPB_PB_VARINT_MAX_LEN 10 + +UPB_NOINLINE +static size_t encode_varint64(uint64_t val, char* buf) { + size_t i = 0; + do { + uint8_t byte = val & 0x7fU; + val >>= 7; + if (val) byte |= 0x80U; + buf[i++] = byte; + } while (val); + return i; +} - if (d->vec.size) { - qsort(d->vec.data, d->vec.size, sizeof(*d->vec.data), - upb_MtDecoder_CompareFields); - } +static uint32_t encode_zz32(int32_t n) { + return ((uint32_t)n << 1) ^ (n >> 31); +} +static uint64_t encode_zz64(int64_t n) { + return ((uint64_t)n << 1) ^ (n >> 63); +} - return true; +typedef struct { + upb_EncodeStatus status; + jmp_buf err; + upb_Arena* arena; + char *buf, *ptr, *limit; + int options; + int depth; + _upb_mapsorter sorter; +} upb_encstate; + +static size_t upb_roundup_pow2(size_t bytes) { + size_t ret = 128; + while (ret < bytes) { + ret *= 2; + } + return ret; } -static size_t upb_MiniTable_DivideRoundUp(size_t n, size_t d) { - return (n + d - 1) / d; +UPB_NORETURN static void encode_err(upb_encstate* e, upb_EncodeStatus s) { + UPB_ASSERT(s != kUpb_EncodeStatus_Ok); + e->status = s; + UPB_LONGJMP(e->err, 1); } -static void upb_MtDecoder_AssignHasbits(upb_MtDecoder* d) { - upb_MiniTable* ret = d->table; - int n = ret->field_count; - int last_hasbit = 0; // 0 cannot be used. +UPB_NOINLINE +static void encode_growbuffer(upb_encstate* e, size_t bytes) { + size_t old_size = e->limit - e->buf; + size_t new_size = upb_roundup_pow2(bytes + (e->limit - e->ptr)); + char* new_buf = upb_Arena_Realloc(e->arena, e->buf, old_size, new_size); - // First assign required fields, which must have the lowest hasbits. - for (int i = 0; i < n; i++) { - upb_MiniTableField* field = (upb_MiniTableField*)&ret->fields[i]; - if (field->offset == kRequiredPresence) { - field->presence = ++last_hasbit; - } else if (field->offset == kNoPresence) { - field->presence = 0; - } - } - ret->required_count = last_hasbit; + if (!new_buf) encode_err(e, kUpb_EncodeStatus_OutOfMemory); - if (ret->required_count > 63) { - upb_MdDecoder_ErrorJmp(&d->base, "Too many required fields"); + // We want previous data at the end, realloc() put it at the beginning. + // TODO(salo): This is somewhat inefficient since we are copying twice. + // Maybe create a realloc() that copies to the end of the new buffer? + if (old_size > 0) { + memmove(new_buf + new_size - old_size, e->buf, old_size); } - // Next assign non-required hasbit fields. - for (int i = 0; i < n; i++) { - upb_MiniTableField* field = (upb_MiniTableField*)&ret->fields[i]; - if (field->offset == kHasbitPresence) { - field->presence = ++last_hasbit; - } - } + e->ptr = new_buf + new_size - (e->limit - e->ptr); + e->limit = new_buf + new_size; + e->buf = new_buf; - ret->size = last_hasbit ? upb_MiniTable_DivideRoundUp(last_hasbit + 1, 8) : 0; + e->ptr -= bytes; } -size_t upb_MtDecoder_Place(upb_MtDecoder* d, upb_FieldRep rep) { - size_t size = upb_MtDecoder_SizeOfRep(rep, d->platform); - size_t align = upb_MtDecoder_AlignOfRep(rep, d->platform); - size_t ret = UPB_ALIGN_UP(d->table->size, align); - static const size_t max = UINT16_MAX; - size_t new_size = ret + size; - if (new_size > max) { - upb_MdDecoder_ErrorJmp( - &d->base, "Message size exceeded maximum size of %zu bytes", max); +/* Call to ensure that at least "bytes" bytes are available for writing at + * e->ptr. Returns false if the bytes could not be allocated. */ +UPB_FORCEINLINE +static void encode_reserve(upb_encstate* e, size_t bytes) { + if ((size_t)(e->ptr - e->buf) < bytes) { + encode_growbuffer(e, bytes); + return; } - d->table->size = new_size; - return ret; -} - -static void upb_MtDecoder_AssignOffsets(upb_MtDecoder* d) { - upb_LayoutItem* end = UPB_PTRADD(d->vec.data, d->vec.size); - // Compute offsets. - for (upb_LayoutItem* item = d->vec.data; item < end; item++) { - item->offset = upb_MtDecoder_Place(d, item->rep); - } + e->ptr -= bytes; +} - // Assign oneof case offsets. We must do these first, since assigning - // actual offsets will overwrite the links of the linked list. - for (upb_LayoutItem* item = d->vec.data; item < end; item++) { - if (item->type != kUpb_LayoutItemType_OneofCase) continue; - upb_MiniTableField* f = &d->fields[item->field_index]; - while (true) { - f->presence = ~item->offset; - if (f->offset == kUpb_LayoutItem_IndexSentinel) break; - UPB_ASSERT(f->offset - kOneofBase < d->table->field_count); - f = &d->fields[f->offset - kOneofBase]; - } - } +/* Writes the given bytes to the buffer, handling reserve/advance. */ +static void encode_bytes(upb_encstate* e, const void* data, size_t len) { + if (len == 0) return; /* memcpy() with zero size is UB */ + encode_reserve(e, len); + memcpy(e->ptr, data, len); +} - // Assign offsets. - for (upb_LayoutItem* item = d->vec.data; item < end; item++) { - upb_MiniTableField* f = &d->fields[item->field_index]; - switch (item->type) { - case kUpb_LayoutItemType_OneofField: - while (true) { - uint16_t next_offset = f->offset; - f->offset = item->offset; - if (next_offset == kUpb_LayoutItem_IndexSentinel) break; - f = &d->fields[next_offset - kOneofBase]; - } - break; - case kUpb_LayoutItemType_Field: - f->offset = item->offset; - break; - default: - break; - } - } +static void encode_fixed64(upb_encstate* e, uint64_t val) { + val = _upb_BigEndian_Swap64(val); + encode_bytes(e, &val, sizeof(uint64_t)); +} - // The fasttable parser (supported on 64-bit only) depends on this being a - // multiple of 8 in order to satisfy UPB_MALLOC_ALIGN, which is also 8. - // - // On 32-bit we could potentially make this smaller, but there is no - // compelling reason to optimize this right now. - d->table->size = UPB_ALIGN_UP(d->table->size, 8); +static void encode_fixed32(upb_encstate* e, uint32_t val) { + val = _upb_BigEndian_Swap32(val); + encode_bytes(e, &val, sizeof(uint32_t)); } -static void upb_MtDecoder_ValidateEntryField(upb_MtDecoder* d, - const upb_MiniTableField* f, - uint32_t expected_num) { - const char* name = expected_num == 1 ? "key" : "val"; - if (f->number != expected_num) { - upb_MdDecoder_ErrorJmp(&d->base, - "map %s did not have expected number (%d vs %d)", - name, expected_num, (int)f->number); - } +UPB_NOINLINE +static void encode_longvarint(upb_encstate* e, uint64_t val) { + size_t len; + char* start; - if (upb_IsRepeatedOrMap(f)) { - upb_MdDecoder_ErrorJmp( - &d->base, "map %s cannot be repeated or map, or be in oneof", name); - } + encode_reserve(e, UPB_PB_VARINT_MAX_LEN); + len = encode_varint64(val, e->ptr); + start = e->ptr + UPB_PB_VARINT_MAX_LEN - len; + memmove(start, e->ptr, len); + e->ptr = start; +} - uint32_t not_ok_types; - if (expected_num == 1) { - not_ok_types = (1 << kUpb_FieldType_Float) | (1 << kUpb_FieldType_Double) | - (1 << kUpb_FieldType_Message) | (1 << kUpb_FieldType_Group) | - (1 << kUpb_FieldType_Bytes) | (1 << kUpb_FieldType_Enum); +UPB_FORCEINLINE +static void encode_varint(upb_encstate* e, uint64_t val) { + if (val < 128 && e->ptr != e->buf) { + --e->ptr; + *e->ptr = val; } else { - not_ok_types = 1 << kUpb_FieldType_Group; - } - - if ((1 << upb_MiniTableField_Type(f)) & not_ok_types) { - upb_MdDecoder_ErrorJmp(&d->base, "map %s cannot have type %d", name, - (int)f->UPB_PRIVATE(descriptortype)); + encode_longvarint(e, val); } } -static void upb_MtDecoder_ParseMap(upb_MtDecoder* d, const char* data, - size_t len) { - upb_MtDecoder_ParseMessage(d, data, len); - upb_MtDecoder_AssignHasbits(d); +static void encode_double(upb_encstate* e, double d) { + uint64_t u64; + UPB_ASSERT(sizeof(double) == sizeof(uint64_t)); + memcpy(&u64, &d, sizeof(uint64_t)); + encode_fixed64(e, u64); +} - if (UPB_UNLIKELY(d->table->field_count != 2)) { - upb_MdDecoder_ErrorJmp(&d->base, "%hu fields in map", - d->table->field_count); - UPB_UNREACHABLE(); - } +static void encode_float(upb_encstate* e, float d) { + uint32_t u32; + UPB_ASSERT(sizeof(float) == sizeof(uint32_t)); + memcpy(&u32, &d, sizeof(uint32_t)); + encode_fixed32(e, u32); +} - upb_LayoutItem* end = UPB_PTRADD(d->vec.data, d->vec.size); - for (upb_LayoutItem* item = d->vec.data; item < end; item++) { - if (item->type == kUpb_LayoutItemType_OneofCase) { - upb_MdDecoder_ErrorJmp(&d->base, "Map entry cannot have oneof"); - } - } +static void encode_tag(upb_encstate* e, uint32_t field_number, + uint8_t wire_type) { + encode_varint(e, (field_number << 3) | wire_type); +} - upb_MtDecoder_ValidateEntryField(d, &d->table->fields[0], 1); - upb_MtDecoder_ValidateEntryField(d, &d->table->fields[1], 2); +static void encode_fixedarray(upb_encstate* e, const upb_Array* arr, + size_t elem_size, uint32_t tag) { + size_t bytes = arr->size * elem_size; + const char* data = _upb_array_constptr(arr); + const char* ptr = data + bytes - elem_size; - // Map entries have a pre-determined layout, regardless of types. - // NOTE: sync with mini_table/message_internal.h. - const size_t kv_size = d->platform == kUpb_MiniTablePlatform_32Bit ? 8 : 16; - const size_t hasbit_size = 8; - d->fields[0].offset = hasbit_size; - d->fields[1].offset = hasbit_size + kv_size; - d->table->size = UPB_ALIGN_UP(hasbit_size + kv_size + kv_size, 8); + if (tag || !_upb_IsLittleEndian()) { + while (true) { + if (elem_size == 4) { + uint32_t val; + memcpy(&val, ptr, sizeof(val)); + val = _upb_BigEndian_Swap32(val); + encode_bytes(e, &val, elem_size); + } else { + UPB_ASSERT(elem_size == 8); + uint64_t val; + memcpy(&val, ptr, sizeof(val)); + val = _upb_BigEndian_Swap64(val); + encode_bytes(e, &val, elem_size); + } - // Map entries have a special bit set to signal it's a map entry, used in - // upb_MiniTable_SetSubMessage() below. - d->table->ext |= kUpb_ExtMode_IsMapEntry; + if (tag) encode_varint(e, tag); + if (ptr == data) break; + ptr -= elem_size; + } + } else { + encode_bytes(e, data, bytes); + } } -static void upb_MtDecoder_ParseMessageSet(upb_MtDecoder* d, const char* data, - size_t len) { - if (len > 0) { - upb_MdDecoder_ErrorJmp(&d->base, "Invalid message set encode length: %zu", - len); - } +static void encode_message(upb_encstate* e, const upb_Message* msg, + const upb_MiniTable* m, size_t* size); - upb_MiniTable* ret = d->table; - ret->size = 0; - ret->field_count = 0; - ret->ext = kUpb_ExtMode_IsMessageSet; - ret->dense_below = 0; - ret->table_mask = -1; - ret->required_count = 0; +static void encode_TaggedMessagePtr(upb_encstate* e, + upb_TaggedMessagePtr tagged, + const upb_MiniTable* m, size_t* size) { + if (upb_TaggedMessagePtr_IsEmpty(tagged)) { + m = &_kUpb_MiniTable_Empty; + } + encode_message(e, _upb_TaggedMessagePtr_GetMessage(tagged), m, size); } -static upb_MiniTable* upb_MtDecoder_DoBuildMiniTableWithBuf( - upb_MtDecoder* decoder, const char* data, size_t len, void** buf, - size_t* buf_size) { - upb_MdDecoder_CheckOutOfMemory(&decoder->base, decoder->table); - - decoder->table->size = 0; - decoder->table->field_count = 0; - decoder->table->ext = kUpb_ExtMode_NonExtendable; - decoder->table->dense_below = 0; - decoder->table->table_mask = -1; - decoder->table->required_count = 0; +static void encode_scalar(upb_encstate* e, const void* _field_mem, + const upb_MiniTableSub* subs, + const upb_MiniTableField* f) { + const char* field_mem = _field_mem; + int wire_type; - // Strip off and verify the version tag. - if (!len--) goto done; - const char vers = *data++; +#define CASE(ctype, type, wtype, encodeval) \ + { \ + ctype val = *(ctype*)field_mem; \ + encode_##type(e, encodeval); \ + wire_type = wtype; \ + break; \ + } - switch (vers) { - case kUpb_EncodedVersion_MapV1: - upb_MtDecoder_ParseMap(decoder, data, len); + switch (f->UPB_PRIVATE(descriptortype)) { + case kUpb_FieldType_Double: + CASE(double, double, kUpb_WireType_64Bit, val); + case kUpb_FieldType_Float: + CASE(float, float, kUpb_WireType_32Bit, val); + case kUpb_FieldType_Int64: + case kUpb_FieldType_UInt64: + CASE(uint64_t, varint, kUpb_WireType_Varint, val); + case kUpb_FieldType_UInt32: + CASE(uint32_t, varint, kUpb_WireType_Varint, val); + case kUpb_FieldType_Int32: + case kUpb_FieldType_Enum: + CASE(int32_t, varint, kUpb_WireType_Varint, (int64_t)val); + case kUpb_FieldType_SFixed64: + case kUpb_FieldType_Fixed64: + CASE(uint64_t, fixed64, kUpb_WireType_64Bit, val); + case kUpb_FieldType_Fixed32: + case kUpb_FieldType_SFixed32: + CASE(uint32_t, fixed32, kUpb_WireType_32Bit, val); + case kUpb_FieldType_Bool: + CASE(bool, varint, kUpb_WireType_Varint, val); + case kUpb_FieldType_SInt32: + CASE(int32_t, varint, kUpb_WireType_Varint, encode_zz32(val)); + case kUpb_FieldType_SInt64: + CASE(int64_t, varint, kUpb_WireType_Varint, encode_zz64(val)); + case kUpb_FieldType_String: + case kUpb_FieldType_Bytes: { + upb_StringView view = *(upb_StringView*)field_mem; + encode_bytes(e, view.data, view.size); + encode_varint(e, view.size); + wire_type = kUpb_WireType_Delimited; break; - - case kUpb_EncodedVersion_MessageV1: - upb_MtDecoder_ParseMessage(decoder, data, len); - upb_MtDecoder_AssignHasbits(decoder); - upb_MtDecoder_SortLayoutItems(decoder); - upb_MtDecoder_AssignOffsets(decoder); + } + case kUpb_FieldType_Group: { + size_t size; + upb_TaggedMessagePtr submsg = *(upb_TaggedMessagePtr*)field_mem; + const upb_MiniTable* subm = subs[f->UPB_PRIVATE(submsg_index)].submsg; + if (submsg == 0) { + return; + } + if (--e->depth == 0) encode_err(e, kUpb_EncodeStatus_MaxDepthExceeded); + encode_tag(e, f->number, kUpb_WireType_EndGroup); + encode_TaggedMessagePtr(e, submsg, subm, &size); + wire_type = kUpb_WireType_StartGroup; + e->depth++; break; - - case kUpb_EncodedVersion_MessageSetV1: - upb_MtDecoder_ParseMessageSet(decoder, data, len); + } + case kUpb_FieldType_Message: { + size_t size; + upb_TaggedMessagePtr submsg = *(upb_TaggedMessagePtr*)field_mem; + const upb_MiniTable* subm = subs[f->UPB_PRIVATE(submsg_index)].submsg; + if (submsg == 0) { + return; + } + if (--e->depth == 0) encode_err(e, kUpb_EncodeStatus_MaxDepthExceeded); + encode_TaggedMessagePtr(e, submsg, subm, &size); + encode_varint(e, size); + wire_type = kUpb_WireType_Delimited; + e->depth++; break; - + } default: - upb_MdDecoder_ErrorJmp(&decoder->base, "Invalid message version: %c", - vers); + UPB_UNREACHABLE(); } +#undef CASE -done: - *buf = decoder->vec.data; - *buf_size = decoder->vec.capacity * sizeof(*decoder->vec.data); - return decoder->table; + encode_tag(e, f->number, wire_type); } -static upb_MiniTable* upb_MtDecoder_BuildMiniTableWithBuf( - upb_MtDecoder* const decoder, const char* const data, const size_t len, - void** const buf, size_t* const buf_size) { - if (UPB_SETJMP(decoder->base.err) != 0) { - *buf = decoder->vec.data; - *buf_size = decoder->vec.capacity * sizeof(*decoder->vec.data); - return NULL; - } +static void encode_array(upb_encstate* e, const upb_Message* msg, + const upb_MiniTableSub* subs, + const upb_MiniTableField* f) { + const upb_Array* arr = *UPB_PTR_AT(msg, f->offset, upb_Array*); + bool packed = f->mode & kUpb_LabelFlags_IsPacked; + size_t pre_len = e->limit - e->ptr; - return upb_MtDecoder_DoBuildMiniTableWithBuf(decoder, data, len, buf, - buf_size); -} + if (arr == NULL || arr->size == 0) { + return; + } -upb_MiniTable* upb_MiniTable_BuildWithBuf(const char* data, size_t len, - upb_MiniTablePlatform platform, - upb_Arena* arena, void** buf, - size_t* buf_size, - upb_Status* status) { - upb_MtDecoder decoder = { - .base = {.status = status}, - .platform = platform, - .vec = - { - .data = *buf, - .capacity = *buf_size / sizeof(*decoder.vec.data), - .size = 0, - }, - .arena = arena, - .table = upb_Arena_Malloc(arena, sizeof(*decoder.table)), - }; +#define VARINT_CASE(ctype, encode) \ + { \ + const ctype* start = _upb_array_constptr(arr); \ + const ctype* ptr = start + arr->size; \ + uint32_t tag = packed ? 0 : (f->number << 3) | kUpb_WireType_Varint; \ + do { \ + ptr--; \ + encode_varint(e, encode); \ + if (tag) encode_varint(e, tag); \ + } while (ptr != start); \ + } \ + break; - return upb_MtDecoder_BuildMiniTableWithBuf(&decoder, data, len, buf, - buf_size); -} +#define TAG(wire_type) (packed ? 0 : (f->number << 3 | wire_type)) -static const char* upb_MtDecoder_DoBuildMiniTableExtension( - upb_MtDecoder* decoder, const char* data, size_t len, - upb_MiniTableExtension* ext, const upb_MiniTable* extendee, - upb_MiniTableSub sub) { - // If the string is non-empty then it must begin with a version tag. - if (len) { - if (*data != kUpb_EncodedVersion_ExtensionV1) { - upb_MdDecoder_ErrorJmp(&decoder->base, "Invalid ext version: %c", *data); + switch (f->UPB_PRIVATE(descriptortype)) { + case kUpb_FieldType_Double: + encode_fixedarray(e, arr, sizeof(double), TAG(kUpb_WireType_64Bit)); + break; + case kUpb_FieldType_Float: + encode_fixedarray(e, arr, sizeof(float), TAG(kUpb_WireType_32Bit)); + break; + case kUpb_FieldType_SFixed64: + case kUpb_FieldType_Fixed64: + encode_fixedarray(e, arr, sizeof(uint64_t), TAG(kUpb_WireType_64Bit)); + break; + case kUpb_FieldType_Fixed32: + case kUpb_FieldType_SFixed32: + encode_fixedarray(e, arr, sizeof(uint32_t), TAG(kUpb_WireType_32Bit)); + break; + case kUpb_FieldType_Int64: + case kUpb_FieldType_UInt64: + VARINT_CASE(uint64_t, *ptr); + case kUpb_FieldType_UInt32: + VARINT_CASE(uint32_t, *ptr); + case kUpb_FieldType_Int32: + case kUpb_FieldType_Enum: + VARINT_CASE(int32_t, (int64_t)*ptr); + case kUpb_FieldType_Bool: + VARINT_CASE(bool, *ptr); + case kUpb_FieldType_SInt32: + VARINT_CASE(int32_t, encode_zz32(*ptr)); + case kUpb_FieldType_SInt64: + VARINT_CASE(int64_t, encode_zz64(*ptr)); + case kUpb_FieldType_String: + case kUpb_FieldType_Bytes: { + const upb_StringView* start = _upb_array_constptr(arr); + const upb_StringView* ptr = start + arr->size; + do { + ptr--; + encode_bytes(e, ptr->data, ptr->size); + encode_varint(e, ptr->size); + encode_tag(e, f->number, kUpb_WireType_Delimited); + } while (ptr != start); + return; + } + case kUpb_FieldType_Group: { + const upb_TaggedMessagePtr* start = _upb_array_constptr(arr); + const upb_TaggedMessagePtr* ptr = start + arr->size; + const upb_MiniTable* subm = subs[f->UPB_PRIVATE(submsg_index)].submsg; + if (--e->depth == 0) encode_err(e, kUpb_EncodeStatus_MaxDepthExceeded); + do { + size_t size; + ptr--; + encode_tag(e, f->number, kUpb_WireType_EndGroup); + encode_TaggedMessagePtr(e, *ptr, subm, &size); + encode_tag(e, f->number, kUpb_WireType_StartGroup); + } while (ptr != start); + e->depth++; + return; + } + case kUpb_FieldType_Message: { + const upb_TaggedMessagePtr* start = _upb_array_constptr(arr); + const upb_TaggedMessagePtr* ptr = start + arr->size; + const upb_MiniTable* subm = subs[f->UPB_PRIVATE(submsg_index)].submsg; + if (--e->depth == 0) encode_err(e, kUpb_EncodeStatus_MaxDepthExceeded); + do { + size_t size; + ptr--; + encode_TaggedMessagePtr(e, *ptr, subm, &size); + encode_varint(e, size); + encode_tag(e, f->number, kUpb_WireType_Delimited); + } while (ptr != start); + e->depth++; + return; } - data++; - len--; } +#undef VARINT_CASE - uint16_t count = 0; - upb_SubCounts sub_counts = {0, 0}; - const char* ret = upb_MtDecoder_Parse(decoder, data, len, ext, sizeof(*ext), - &count, &sub_counts); - if (!ret || count != 1) return NULL; - - upb_MiniTableField* f = &ext->field; - - f->mode |= kUpb_LabelFlags_IsExtension; - f->offset = 0; - f->presence = 0; - - if (extendee->ext & kUpb_ExtMode_IsMessageSet) { - // Extensions of MessageSet must be messages. - if (!upb_IsSubMessage(f)) return NULL; - - // Extensions of MessageSet must be non-repeating. - if ((f->mode & kUpb_FieldMode_Mask) == kUpb_FieldMode_Array) return NULL; + if (packed) { + encode_varint(e, e->limit - e->ptr - pre_len); + encode_tag(e, f->number, kUpb_WireType_Delimited); } - - ext->extendee = extendee; - ext->sub = sub; - - return ret; -} - -static const char* upb_MtDecoder_BuildMiniTableExtension( - upb_MtDecoder* const decoder, const char* const data, const size_t len, - upb_MiniTableExtension* const ext, const upb_MiniTable* const extendee, - const upb_MiniTableSub sub) { - if (UPB_SETJMP(decoder->base.err) != 0) return NULL; - return upb_MtDecoder_DoBuildMiniTableExtension(decoder, data, len, ext, - extendee, sub); } -const char* _upb_MiniTableExtension_Init(const char* data, size_t len, - upb_MiniTableExtension* ext, - const upb_MiniTable* extendee, - upb_MiniTableSub sub, - upb_MiniTablePlatform platform, - upb_Status* status) { - upb_MtDecoder decoder = { - .base = {.status = status}, - .arena = NULL, - .table = NULL, - .platform = platform, - }; - - return upb_MtDecoder_BuildMiniTableExtension(&decoder, data, len, ext, - extendee, sub); +static void encode_mapentry(upb_encstate* e, uint32_t number, + const upb_MiniTable* layout, + const upb_MapEntry* ent) { + const upb_MiniTableField* key_field = &layout->fields[0]; + const upb_MiniTableField* val_field = &layout->fields[1]; + size_t pre_len = e->limit - e->ptr; + size_t size; + encode_scalar(e, &ent->data.v, layout->subs, val_field); + encode_scalar(e, &ent->data.k, layout->subs, key_field); + size = (e->limit - e->ptr) - pre_len; + encode_varint(e, size); + encode_tag(e, number, kUpb_WireType_Delimited); } -upb_MiniTableExtension* _upb_MiniTableExtension_Build( - const char* data, size_t len, const upb_MiniTable* extendee, - upb_MiniTableSub sub, upb_MiniTablePlatform platform, upb_Arena* arena, - upb_Status* status) { - upb_MiniTableExtension* ext = - upb_Arena_Malloc(arena, sizeof(upb_MiniTableExtension)); - if (UPB_UNLIKELY(!ext)) return NULL; +static void encode_map(upb_encstate* e, const upb_Message* msg, + const upb_MiniTableSub* subs, + const upb_MiniTableField* f) { + const upb_Map* map = *UPB_PTR_AT(msg, f->offset, const upb_Map*); + const upb_MiniTable* layout = subs[f->UPB_PRIVATE(submsg_index)].submsg; + UPB_ASSERT(layout->field_count == 2); - const char* ptr = _upb_MiniTableExtension_Init(data, len, ext, extendee, sub, - platform, status); - if (UPB_UNLIKELY(!ptr)) return NULL; + if (map == NULL) return; - return ext; + if (e->options & kUpb_EncodeOption_Deterministic) { + _upb_sortedmap sorted; + _upb_mapsorter_pushmap(&e->sorter, + layout->fields[0].UPB_PRIVATE(descriptortype), map, + &sorted); + upb_MapEntry ent; + while (_upb_sortedmap_next(&e->sorter, map, &sorted, &ent)) { + encode_mapentry(e, f->number, layout, &ent); + } + _upb_mapsorter_popmap(&e->sorter, &sorted); + } else { + intptr_t iter = UPB_STRTABLE_BEGIN; + upb_StringView key; + upb_value val; + while (upb_strtable_next2(&map->table, &key, &val, &iter)) { + upb_MapEntry ent; + _upb_map_fromkey(key, &ent.data.k, map->key_size); + _upb_map_fromvalue(val, &ent.data.v, map->val_size); + encode_mapentry(e, f->number, layout, &ent); + } + } } -upb_MiniTable* _upb_MiniTable_Build(const char* data, size_t len, - upb_MiniTablePlatform platform, - upb_Arena* arena, upb_Status* status) { - void* buf = NULL; - size_t size = 0; - upb_MiniTable* ret = upb_MiniTable_BuildWithBuf(data, len, platform, arena, - &buf, &size, status); - free(buf); - return ret; +static bool encode_shouldencode(upb_encstate* e, const upb_Message* msg, + const upb_MiniTableSub* subs, + const upb_MiniTableField* f) { + if (f->presence == 0) { + /* Proto3 presence or map/array. */ + const void* mem = UPB_PTR_AT(msg, f->offset, void); + switch (_upb_MiniTableField_GetRep(f)) { + case kUpb_FieldRep_1Byte: { + char ch; + memcpy(&ch, mem, 1); + return ch != 0; + } + case kUpb_FieldRep_4Byte: { + uint32_t u32; + memcpy(&u32, mem, 4); + return u32 != 0; + } + case kUpb_FieldRep_8Byte: { + uint64_t u64; + memcpy(&u64, mem, 8); + return u64 != 0; + } + case kUpb_FieldRep_StringView: { + const upb_StringView* str = (const upb_StringView*)mem; + return str->size != 0; + } + default: + UPB_UNREACHABLE(); + } + } else if (f->presence > 0) { + /* Proto2 presence: hasbit. */ + return _upb_hasbit_field(msg, f); + } else { + /* Field is in a oneof. */ + return _upb_getoneofcase_field(msg, f) == f->number; + } } - -// Must be last. - -bool upb_MiniTable_SetSubMessage(upb_MiniTable* table, - upb_MiniTableField* field, - const upb_MiniTable* sub) { - UPB_ASSERT((uintptr_t)table->fields <= (uintptr_t)field && - (uintptr_t)field < - (uintptr_t)(table->fields + table->field_count)); - UPB_ASSERT(sub); - - const bool sub_is_map = sub->ext & kUpb_ExtMode_IsMapEntry; - - switch (field->UPB_PRIVATE(descriptortype)) { - case kUpb_FieldType_Message: - if (sub_is_map) { - const bool table_is_map = table->ext & kUpb_ExtMode_IsMapEntry; - if (UPB_UNLIKELY(table_is_map)) return false; - - field->mode = (field->mode & ~kUpb_FieldMode_Mask) | kUpb_FieldMode_Map; - } +static void encode_field(upb_encstate* e, const upb_Message* msg, + const upb_MiniTableSub* subs, + const upb_MiniTableField* field) { + switch (upb_FieldMode_Get(field)) { + case kUpb_FieldMode_Array: + encode_array(e, msg, subs, field); break; - - case kUpb_FieldType_Group: - if (UPB_UNLIKELY(sub_is_map)) return false; + case kUpb_FieldMode_Map: + encode_map(e, msg, subs, field); + break; + case kUpb_FieldMode_Scalar: + encode_scalar(e, UPB_PTR_AT(msg, field->offset, void), subs, field); break; - default: - return false; + UPB_UNREACHABLE(); } - - upb_MiniTableSub* table_sub = - (void*)&table->subs[field->UPB_PRIVATE(submsg_index)]; - // TODO(haberman): Add this assert back once YouTube is updated to not call - // this function repeatedly. - // UPB_ASSERT(table_sub->submsg == &_kUpb_MiniTable_Empty); - table_sub->submsg = sub; - return true; } -bool upb_MiniTable_SetSubEnum(upb_MiniTable* table, upb_MiniTableField* field, - const upb_MiniTableEnum* sub) { - UPB_ASSERT((uintptr_t)table->fields <= (uintptr_t)field && - (uintptr_t)field < - (uintptr_t)(table->fields + table->field_count)); - UPB_ASSERT(sub); - - upb_MiniTableSub* table_sub = - (void*)&table->subs[field->UPB_PRIVATE(submsg_index)]; - table_sub->subenum = sub; - return true; +static void encode_msgset_item(upb_encstate* e, + const upb_Message_Extension* ext) { + size_t size; + encode_tag(e, kUpb_MsgSet_Item, kUpb_WireType_EndGroup); + encode_message(e, ext->data.ptr, ext->ext->sub.submsg, &size); + encode_varint(e, size); + encode_tag(e, kUpb_MsgSet_Message, kUpb_WireType_Delimited); + encode_varint(e, ext->ext->field.number); + encode_tag(e, kUpb_MsgSet_TypeId, kUpb_WireType_Varint); + encode_tag(e, kUpb_MsgSet_Item, kUpb_WireType_StartGroup); } -uint32_t upb_MiniTable_GetSubList(const upb_MiniTable* mt, - const upb_MiniTableField** subs) { - uint32_t msg_count = 0; - uint32_t enum_count = 0; - - for (int i = 0; i < mt->field_count; i++) { - const upb_MiniTableField* f = &mt->fields[i]; - if (upb_MiniTableField_CType(f) == kUpb_CType_Message) { - *subs = f; - ++subs; - msg_count++; - } +static void encode_ext(upb_encstate* e, const upb_Message_Extension* ext, + bool is_message_set) { + if (UPB_UNLIKELY(is_message_set)) { + encode_msgset_item(e, ext); + } else { + encode_field(e, &ext->data, &ext->ext->sub, &ext->ext->field); } +} - for (int i = 0; i < mt->field_count; i++) { - const upb_MiniTableField* f = &mt->fields[i]; - if (upb_MiniTableField_CType(f) == kUpb_CType_Enum) { - *subs = f; - ++subs; - enum_count++; +static void encode_message(upb_encstate* e, const upb_Message* msg, + const upb_MiniTable* m, size_t* size) { + size_t pre_len = e->limit - e->ptr; + + if ((e->options & kUpb_EncodeOption_CheckRequired) && m->required_count) { + uint64_t msg_head; + memcpy(&msg_head, msg, 8); + msg_head = _upb_BigEndian_Swap64(msg_head); + if (upb_MiniTable_requiredmask(m) & ~msg_head) { + encode_err(e, kUpb_EncodeStatus_MissingRequired); } } - return (msg_count << 16) | enum_count; -} + if ((e->options & kUpb_EncodeOption_SkipUnknown) == 0) { + size_t unknown_size; + const char* unknown = upb_Message_GetUnknown(msg, &unknown_size); -// The list of sub_tables and sub_enums must exactly match the number and order -// of sub-message fields and sub-enum fields given by upb_MiniTable_GetSubList() -// above. -bool upb_MiniTable_Link(upb_MiniTable* mt, const upb_MiniTable** sub_tables, - size_t sub_table_count, - const upb_MiniTableEnum** sub_enums, - size_t sub_enum_count) { - uint32_t msg_count = 0; - uint32_t enum_count = 0; + if (unknown) { + encode_bytes(e, unknown, unknown_size); + } + } - for (int i = 0; i < mt->field_count; i++) { - upb_MiniTableField* f = (upb_MiniTableField*)&mt->fields[i]; - if (upb_MiniTableField_CType(f) == kUpb_CType_Message) { - const upb_MiniTable* sub = sub_tables[msg_count++]; - if (msg_count > sub_table_count) return false; - if (sub != NULL) { - if (!upb_MiniTable_SetSubMessage(mt, f, sub)) return false; + if (m->ext != kUpb_ExtMode_NonExtendable) { + /* Encode all extensions together. Unlike C++, we do not attempt to keep + * these in field number order relative to normal fields or even to each + * other. */ + size_t ext_count; + const upb_Message_Extension* ext = _upb_Message_Getexts(msg, &ext_count); + if (ext_count) { + if (e->options & kUpb_EncodeOption_Deterministic) { + _upb_sortedmap sorted; + _upb_mapsorter_pushexts(&e->sorter, ext, ext_count, &sorted); + while (_upb_sortedmap_nextext(&e->sorter, &sorted, &ext)) { + encode_ext(e, ext, m->ext == kUpb_ExtMode_IsMessageSet); + } + _upb_mapsorter_popmap(&e->sorter, &sorted); + } else { + const upb_Message_Extension* end = ext + ext_count; + for (; ext != end; ext++) { + encode_ext(e, ext, m->ext == kUpb_ExtMode_IsMessageSet); + } } } } - for (int i = 0; i < mt->field_count; i++) { - upb_MiniTableField* f = (upb_MiniTableField*)&mt->fields[i]; - if (upb_MiniTableField_IsClosedEnum(f)) { - const upb_MiniTableEnum* sub = sub_enums[enum_count++]; - if (enum_count > sub_enum_count) return false; - if (sub != NULL) { - if (!upb_MiniTable_SetSubEnum(mt, f, sub)) return false; + if (m->field_count) { + const upb_MiniTableField* f = &m->fields[m->field_count]; + const upb_MiniTableField* first = &m->fields[0]; + while (f != first) { + f--; + if (encode_shouldencode(e, msg, m->subs, f)) { + encode_field(e, msg, m->subs, f); } } } - return true; -} - - -const struct upb_MiniTable _kUpb_MiniTable_Empty = { - .subs = NULL, - .fields = NULL, - .size = 0, - .field_count = 0, - .ext = kUpb_ExtMode_NonExtendable, - .dense_below = 0, - .table_mask = -1, - .required_count = 0, -}; - - - -// Must be last. - -#define EXTREG_KEY_SIZE (sizeof(upb_MiniTable*) + sizeof(uint32_t)) - -struct upb_ExtensionRegistry { - upb_Arena* arena; - upb_strtable exts; // Key is upb_MiniTable* concatenated with fieldnum. -}; - -static void extreg_key(char* buf, const upb_MiniTable* l, uint32_t fieldnum) { - memcpy(buf, &l, sizeof(l)); - memcpy(buf + sizeof(l), &fieldnum, sizeof(fieldnum)); + *size = (e->limit - e->ptr) - pre_len; } -upb_ExtensionRegistry* upb_ExtensionRegistry_New(upb_Arena* arena) { - upb_ExtensionRegistry* r = upb_Arena_Malloc(arena, sizeof(*r)); - if (!r) return NULL; - r->arena = arena; - if (!upb_strtable_init(&r->exts, 8, arena)) return NULL; - return r; -} +static upb_EncodeStatus upb_Encoder_Encode(upb_encstate* const encoder, + const void* const msg, + const upb_MiniTable* const l, + char** const buf, + size_t* const size) { + // Unfortunately we must continue to perform hackery here because there are + // code paths which blindly copy the returned pointer without bothering to + // check for errors until much later (b/235839510). So we still set *buf to + // NULL on error and we still set it to non-NULL on a successful empty result. + if (UPB_SETJMP(encoder->err) == 0) { + encode_message(encoder, msg, l, size); + *size = encoder->limit - encoder->ptr; + if (*size == 0) { + static char ch; + *buf = &ch; + } else { + UPB_ASSERT(encoder->ptr); + *buf = encoder->ptr; + } + } else { + UPB_ASSERT(encoder->status != kUpb_EncodeStatus_Ok); + *buf = NULL; + *size = 0; + } -UPB_API bool upb_ExtensionRegistry_Add(upb_ExtensionRegistry* r, - const upb_MiniTableExtension* e) { - char buf[EXTREG_KEY_SIZE]; - extreg_key(buf, e->extendee, e->field.number); - if (upb_strtable_lookup2(&r->exts, buf, EXTREG_KEY_SIZE, NULL)) return false; - return upb_strtable_insert(&r->exts, buf, EXTREG_KEY_SIZE, - upb_value_constptr(e), r->arena); + _upb_mapsorter_destroy(&encoder->sorter); + return encoder->status; } -bool upb_ExtensionRegistry_AddArray(upb_ExtensionRegistry* r, - const upb_MiniTableExtension** e, - size_t count) { - const upb_MiniTableExtension** start = e; - const upb_MiniTableExtension** end = UPB_PTRADD(e, count); - for (; e < end; e++) { - if (!upb_ExtensionRegistry_Add(r, *e)) goto failure; - } - return true; +upb_EncodeStatus upb_Encode(const void* msg, const upb_MiniTable* l, + int options, upb_Arena* arena, char** buf, + size_t* size) { + upb_encstate e; + unsigned depth = (unsigned)options >> 16; -failure: - // Back out the entries previously added. - for (end = e, e = start; e < end; e++) { - const upb_MiniTableExtension* ext = *e; - char buf[EXTREG_KEY_SIZE]; - extreg_key(buf, ext->extendee, ext->field.number); - upb_strtable_remove2(&r->exts, buf, EXTREG_KEY_SIZE, NULL); - } - return false; -} + e.status = kUpb_EncodeStatus_Ok; + e.arena = arena; + e.buf = NULL; + e.limit = NULL; + e.ptr = NULL; + e.depth = depth ? depth : kUpb_WireFormat_DefaultDepthLimit; + e.options = options; + _upb_mapsorter_init(&e.sorter); -const upb_MiniTableExtension* upb_ExtensionRegistry_Lookup( - const upb_ExtensionRegistry* r, const upb_MiniTable* t, uint32_t num) { - char buf[EXTREG_KEY_SIZE]; - upb_value v; - extreg_key(buf, t, num); - if (upb_strtable_lookup2(&r->exts, buf, EXTREG_KEY_SIZE, &v)) { - return upb_value_getconstptr(v); - } else { - return NULL; - } + return upb_Encoder_Encode(&e, msg, l, buf, size); } -#include - // Must be last. -const upb_MiniTableField* upb_MiniTable_FindFieldByNumber( - const upb_MiniTable* t, uint32_t number) { - const size_t i = ((size_t)number) - 1; // 0 wraps to SIZE_MAX - - // Ideal case: index into dense fields - if (i < t->dense_below) { - UPB_ASSERT(t->fields[i].number == number); - return &t->fields[i]; - } - - // Slow case: binary search - int lo = t->dense_below; - int hi = t->field_count - 1; - while (lo <= hi) { - int mid = (lo + hi) / 2; - uint32_t num = t->fields[mid].number; - if (num < number) { - lo = mid + 1; - continue; - } - if (num > number) { - hi = mid - 1; - continue; - } - return &t->fields[mid]; - } - return NULL; -} - -static bool upb_MiniTable_Is_Oneof(const upb_MiniTableField* f) { - return f->presence < 0; -} - -const upb_MiniTableField* upb_MiniTable_GetOneof(const upb_MiniTable* m, - const upb_MiniTableField* f) { - if (UPB_UNLIKELY(!upb_MiniTable_Is_Oneof(f))) { - return NULL; - } - const upb_MiniTableField* ptr = &m->fields[0]; - const upb_MiniTableField* end = &m->fields[m->field_count]; - while (++ptr < end) { - if (ptr->presence == (*f).presence) { - return ptr; +UPB_NOINLINE _upb_WireReader_ReadLongVarintRet +_upb_WireReader_ReadLongVarint(const char* ptr, uint64_t val) { + _upb_WireReader_ReadLongVarintRet ret = {NULL, 0}; + uint64_t byte; + int i; + for (i = 1; i < 10; i++) { + byte = (uint8_t)ptr[i]; + val += (byte - 1) << (i * 7); + if (!(byte & 0x80)) { + ret.ptr = ptr + i + 1; + ret.val = val; + return ret; } } - return NULL; + return ret; } -bool upb_MiniTable_NextOneofField(const upb_MiniTable* m, - const upb_MiniTableField** f) { - const upb_MiniTableField* ptr = *f; - const upb_MiniTableField* end = &m->fields[m->field_count]; - while (++ptr < end) { - if (ptr->presence == (*f)->presence) { - *f = ptr; - return true; - } +const char* _upb_WireReader_SkipGroup(const char* ptr, uint32_t tag, + int depth_limit, + upb_EpsCopyInputStream* stream) { + if (--depth_limit == 0) return NULL; + uint32_t end_group_tag = (tag & ~7ULL) | kUpb_WireType_EndGroup; + while (!upb_EpsCopyInputStream_IsDone(stream, &ptr)) { + uint32_t tag; + ptr = upb_WireReader_ReadTag(ptr, &tag); + if (!ptr) return NULL; + if (tag == end_group_tag) return ptr; + ptr = _upb_WireReader_SkipValue(ptr, tag, depth_limit, stream); + if (!ptr) return NULL; } - return false; + return ptr; } // This should #undef all macros #defined in def.inc diff --git a/ruby/ext/google/protobuf_c/ruby-upb.h b/ruby/ext/google/protobuf_c/ruby-upb.h index 41be731db1..b9f1b6b3d0 100755 --- a/ruby/ext/google/protobuf_c/ruby-upb.h +++ b/ruby/ext/google/protobuf_c/ruby-upb.h @@ -359,8 +359,8 @@ void upb_Status_VAppendErrorFormat(upb_Status* status, const char* fmt, #endif /* UPB_BASE_STATUS_H_ */ -#ifndef UPB_INTERNAL_ARRAY_INTERNAL_H_ -#define UPB_INTERNAL_ARRAY_INTERNAL_H_ +#ifndef UPB_COLLECTIONS_INTERNAL_ARRAY_H_ +#define UPB_COLLECTIONS_INTERNAL_ARRAY_H_ #include @@ -1366,7 +1366,7 @@ UPB_INLINE void _upb_array_detach(const void* msg, size_t ofs) { #endif -#endif /* UPB_INTERNAL_ARRAY_INTERNAL_H_ */ +#endif /* UPB_COLLECTIONS_INTERNAL_ARRAY_H_ */ #ifndef UPB_COLLECTIONS_MAP_H_ #define UPB_COLLECTIONS_MAP_H_ @@ -1479,8 +1479,8 @@ UPB_API upb_MessageValue upb_MapIterator_Value(const upb_Map* map, size_t iter); // EVERYTHING BELOW THIS LINE IS INTERNAL - DO NOT USE ///////////////////////// -#ifndef UPB_COLLECTIONS_MAP_INTERNAL_H_ -#define UPB_COLLECTIONS_MAP_INTERNAL_H_ +#ifndef UPB_COLLECTIONS_INTERNAL_MAP_H_ +#define UPB_COLLECTIONS_INTERNAL_MAP_H_ #ifndef UPB_HASH_STR_TABLE_H_ @@ -1916,47 +1916,18 @@ upb_Map* _upb_Map_New(upb_Arena* a, size_t key_size, size_t value_size); #endif -#endif /* UPB_COLLECTIONS_MAP_INTERNAL_H_ */ - -#ifndef UPB_BASE_LOG2_H_ -#define UPB_BASE_LOG2_H_ - -// Must be last. - -#ifdef __cplusplus -extern "C" { -#endif - -UPB_INLINE int upb_Log2Ceiling(int x) { - if (x <= 1) return 0; -#ifdef __GNUC__ - return 32 - __builtin_clz(x - 1); -#else - int lg2 = 0; - while ((1 << lg2) < x) lg2++; - return lg2; -#endif -} - -UPB_INLINE int upb_Log2CeilingSize(int x) { return 1 << upb_Log2Ceiling(x); } - -#ifdef __cplusplus -} /* extern "C" */ -#endif - - -#endif /* UPB_BASE_LOG2_H_ */ +#endif /* UPB_COLLECTIONS_INTERNAL_MAP_H_ */ // EVERYTHING BELOW THIS LINE IS INTERNAL - DO NOT USE ///////////////////////// -#ifndef UPB_COLLECTIONS_MAP_SORTER_INTERNAL_H_ -#define UPB_COLLECTIONS_MAP_SORTER_INTERNAL_H_ +#ifndef UPB_COLLECTIONS_INTERNAL_MAP_SORTER_H_ +#define UPB_COLLECTIONS_INTERNAL_MAP_SORTER_H_ #include -#ifndef UPB_MESSAGE_EXTENSION_INTERNAL_H_ -#define UPB_MESSAGE_EXTENSION_INTERNAL_H_ +#ifndef UPB_MESSAGE_INTERNAL_EXTENSION_H_ +#define UPB_MESSAGE_INTERNAL_EXTENSION_H_ // Public APIs for message operations that do not depend on the schema. @@ -2070,7 +2041,7 @@ const upb_Message_Extension* _upb_Message_Getext( #endif -#endif /* UPB_MESSAGE_EXTENSION_INTERNAL_H_ */ +#endif /* UPB_MESSAGE_INTERNAL_EXTENSION_H_ */ #ifndef UPB_MINI_TABLE_INTERNAL_MAP_ENTRY_DATA_H_ #define UPB_MINI_TABLE_INTERNAL_MAP_ENTRY_DATA_H_ @@ -2110,7 +2081,7 @@ typedef struct { // require 8-byte alignment. double d; }; - // LINT.ThenChange(//depot/google3/third_party/upb/upb/message/internal.h:internal_layout) + // LINT.ThenChange(//depot/google3/third_party/upb/upb/message/internal/message.h:internal_layout) upb_MapEntryData data; } upb_MapEntry; @@ -2184,7 +2155,36 @@ bool _upb_mapsorter_pushexts(_upb_mapsorter* s, #endif -#endif /* UPB_COLLECTIONS_MAP_SORTER_INTERNAL_H_ */ +#endif /* UPB_COLLECTIONS_INTERNAL_MAP_SORTER_H_ */ + +#ifndef UPB_BASE_LOG2_H_ +#define UPB_BASE_LOG2_H_ + +// Must be last. + +#ifdef __cplusplus +extern "C" { +#endif + +UPB_INLINE int upb_Log2Ceiling(int x) { + if (x <= 1) return 0; +#ifdef __GNUC__ + return 32 - __builtin_clz(x - 1); +#else + int lg2 = 0; + while ((1 << lg2) < x) lg2++; + return lg2; +#endif +} + +UPB_INLINE int upb_Log2CeilingSize(int x) { return 1 << upb_Log2Ceiling(x); } + +#ifdef __cplusplus +} /* extern "C" */ +#endif + + +#endif /* UPB_BASE_LOG2_H_ */ #ifndef UPB_GENERATED_CODE_SUPPORT_H_ #define UPB_GENERATED_CODE_SUPPORT_H_ @@ -2244,6 +2244,10 @@ UPB_INLINE void _upb_msg_map_set_value(void* msg, const void* val, #define UPB_MESSAGE_ACCESSORS_H_ +#ifndef UPB_MESSAGE_INTERNAL_ACCESSORS_H_ +#define UPB_MESSAGE_INTERNAL_ACCESSORS_H_ + + /* ** Our memory representation for parsing tables and messages themselves. ** Functions in this file are used by generated code and possibly reflection. @@ -2423,10 +2427,6 @@ bool _upb_Message_AddUnknown(upb_Message* msg, const char* data, size_t len, #endif /* UPB_MESSAGE_INTERNAL_H_ */ -#ifndef UPB_MESSAGE_INTERNAL_ACCESSORS_H_ -#define UPB_MESSAGE_INTERNAL_ACCESSORS_H_ - - // Must be last. #if defined(__GNUC__) && !defined(__clang__) @@ -11732,12 +11732,12 @@ UPB_INLINE const char* upb_WireReader_SkipValue( #endif // UPB_WIRE_READER_H_ -#ifndef UPB_REFLECTION_DEF_BUILDER_INTERNAL_H_ -#define UPB_REFLECTION_DEF_BUILDER_INTERNAL_H_ +#ifndef UPB_MINI_DESCRIPTOR_INTERNAL_DECODER_H_ +#define UPB_MINI_DESCRIPTOR_INTERNAL_DECODER_H_ -#ifndef UPB_REFLECTION_DEF_POOL_INTERNAL_H_ -#define UPB_REFLECTION_DEF_POOL_INTERNAL_H_ +#ifndef UPB_MINI_DESCRIPTOR_INTERNAL_BASE92_H_ +#define UPB_MINI_DESCRIPTOR_INTERNAL_BASE92_H_ // Must be last. @@ -11746,117 +11746,377 @@ UPB_INLINE const char* upb_WireReader_SkipValue( extern "C" { #endif -upb_Arena* _upb_DefPool_Arena(const upb_DefPool* s); -size_t _upb_DefPool_BytesLoaded(const upb_DefPool* s); -upb_ExtensionRegistry* _upb_DefPool_ExtReg(const upb_DefPool* s); - -bool _upb_DefPool_InsertExt(upb_DefPool* s, const upb_MiniTableExtension* ext, - const upb_FieldDef* f); -bool _upb_DefPool_InsertSym(upb_DefPool* s, upb_StringView sym, upb_value v, - upb_Status* status); -bool _upb_DefPool_LookupSym(const upb_DefPool* s, const char* sym, size_t size, - upb_value* v); - -void** _upb_DefPool_ScratchData(const upb_DefPool* s); -size_t* _upb_DefPool_ScratchSize(const upb_DefPool* s); -void _upb_DefPool_SetPlatform(upb_DefPool* s, upb_MiniTablePlatform platform); - -// For generated code only: loads a generated descriptor. -typedef struct _upb_DefPool_Init { - struct _upb_DefPool_Init** deps; // Dependencies of this file. - const upb_MiniTableFile* layout; - const char* filename; - upb_StringView descriptor; // Serialized descriptor. -} _upb_DefPool_Init; +UPB_INLINE char _upb_ToBase92(int8_t ch) { + extern const char _kUpb_ToBase92[]; + UPB_ASSERT(0 <= ch && ch < 92); + return _kUpb_ToBase92[ch]; +} -bool _upb_DefPool_LoadDefInit(upb_DefPool* s, const _upb_DefPool_Init* init); +UPB_INLINE char _upb_FromBase92(uint8_t ch) { + extern const int8_t _kUpb_FromBase92[]; + if (' ' > ch || ch > '~') return -1; + return _kUpb_FromBase92[ch - ' ']; +} -// Should only be directly called by tests. This variant lets us suppress -// the use of compiled-in tables, forcing a rebuild of the tables at runtime. -bool _upb_DefPool_LoadDefInitEx(upb_DefPool* s, const _upb_DefPool_Init* init, - bool rebuild_minitable); +UPB_INLINE const char* _upb_Base92_DecodeVarint(const char* ptr, + const char* end, char first_ch, + uint8_t min, uint8_t max, + uint32_t* out_val) { + uint32_t val = 0; + uint32_t shift = 0; + const int bits_per_char = + upb_Log2Ceiling(_upb_FromBase92(max) - _upb_FromBase92(min)); + char ch = first_ch; + while (1) { + uint32_t bits = _upb_FromBase92(ch) - _upb_FromBase92(min); + val |= bits << shift; + if (ptr == end || *ptr < min || max < *ptr) { + *out_val = val; + UPB_ASSUME(ptr != NULL); + return ptr; + } + ch = *ptr++; + shift += bits_per_char; + if (shift >= 32) return NULL; + } +} #ifdef __cplusplus } /* extern "C" */ #endif -#endif /* UPB_REFLECTION_DEF_POOL_INTERNAL_H_ */ +#endif // UPB_MINI_DESCRIPTOR_INTERNAL_BASE92_H_ // Must be last. -// We want to copy the options verbatim into the destination options proto. -// We use serialize+parse as our deep copy. -#define UPB_DEF_SET_OPTIONS(target, desc_type, options_type, proto) \ - if (UPB_DESC(desc_type##_has_options)(proto)) { \ - size_t size; \ - char* pb = UPB_DESC(options_type##_serialize)( \ - UPB_DESC(desc_type##_options)(proto), ctx->tmp_arena, &size); \ - if (!pb) _upb_DefBuilder_OomErr(ctx); \ - target = \ - UPB_DESC(options_type##_parse)(pb, size, _upb_DefBuilder_Arena(ctx)); \ - if (!target) _upb_DefBuilder_OomErr(ctx); \ - } else { \ - target = (const UPB_DESC(options_type)*)kUpbDefOptDefault; \ - } +// upb_MdDecoder: used internally for decoding MiniDescriptors for messages, +// extensions, and enums. +typedef struct { + const char* end; + upb_Status* status; + jmp_buf err; +} upb_MdDecoder; -#ifdef __cplusplus -extern "C" { -#endif +UPB_PRINTF(2, 3) +UPB_NORETURN UPB_INLINE void upb_MdDecoder_ErrorJmp(upb_MdDecoder* d, + const char* fmt, ...) { + if (d->status) { + va_list argp; + upb_Status_SetErrorMessage(d->status, "Error building mini table: "); + va_start(argp, fmt); + upb_Status_VAppendErrorFormat(d->status, fmt, argp); + va_end(argp); + } + UPB_LONGJMP(d->err, 1); +} -struct upb_DefBuilder { - upb_DefPool* symtab; - upb_FileDef* file; // File we are building. - upb_Arena* arena; // Allocate defs here. - upb_Arena* tmp_arena; // For temporary allocations. - upb_Status* status; // Record errors here. - const upb_MiniTableFile* layout; // NULL if we should build layouts. - upb_MiniTablePlatform platform; // Platform we are targeting. - int enum_count; // Count of enums built so far. - int msg_count; // Count of messages built so far. - int ext_count; // Count of extensions built so far. - jmp_buf err; // longjmp() on error. -}; +UPB_INLINE void upb_MdDecoder_CheckOutOfMemory(upb_MdDecoder* d, + const void* ptr) { + if (!ptr) upb_MdDecoder_ErrorJmp(d, "Out of memory"); +} -extern const char* kUpbDefOptDefault; +UPB_INLINE const char* upb_MdDecoder_DecodeBase92Varint( + upb_MdDecoder* d, const char* ptr, char first_ch, uint8_t min, uint8_t max, + uint32_t* out_val) { + ptr = _upb_Base92_DecodeVarint(ptr, d->end, first_ch, min, max, out_val); + if (!ptr) upb_MdDecoder_ErrorJmp(d, "Overlong varint"); + return ptr; +} -// ctx->status has already been set elsewhere so just fail/longjmp() -UPB_NORETURN void _upb_DefBuilder_FailJmp(upb_DefBuilder* ctx); -UPB_NORETURN void _upb_DefBuilder_Errf(upb_DefBuilder* ctx, const char* fmt, - ...) UPB_PRINTF(2, 3); -UPB_NORETURN void _upb_DefBuilder_OomErr(upb_DefBuilder* ctx); +#endif // UPB_MINI_DESCRIPTOR_INTERNAL_DECODER_H_ -const char* _upb_DefBuilder_MakeFullName(upb_DefBuilder* ctx, - const char* prefix, - upb_StringView name); +#ifndef UPB_MINI_DESCRIPTOR_INTERNAL_WIRE_CONSTANTS_H_ +#define UPB_MINI_DESCRIPTOR_INTERNAL_WIRE_CONSTANTS_H_ -// Given a symbol and the base symbol inside which it is defined, -// find the symbol's definition. -const void* _upb_DefBuilder_ResolveAny(upb_DefBuilder* ctx, - const char* from_name_dbg, - const char* base, upb_StringView sym, - upb_deftype_t* type); -const void* _upb_DefBuilder_Resolve(upb_DefBuilder* ctx, - const char* from_name_dbg, const char* base, - upb_StringView sym, upb_deftype_t type); +// Must be last. -char _upb_DefBuilder_ParseEscape(upb_DefBuilder* ctx, const upb_FieldDef* f, - const char** src, const char* end); +typedef enum { + kUpb_EncodedType_Double = 0, + kUpb_EncodedType_Float = 1, + kUpb_EncodedType_Fixed32 = 2, + kUpb_EncodedType_Fixed64 = 3, + kUpb_EncodedType_SFixed32 = 4, + kUpb_EncodedType_SFixed64 = 5, + kUpb_EncodedType_Int32 = 6, + kUpb_EncodedType_UInt32 = 7, + kUpb_EncodedType_SInt32 = 8, + kUpb_EncodedType_Int64 = 9, + kUpb_EncodedType_UInt64 = 10, + kUpb_EncodedType_SInt64 = 11, + kUpb_EncodedType_OpenEnum = 12, + kUpb_EncodedType_Bool = 13, + kUpb_EncodedType_Bytes = 14, + kUpb_EncodedType_String = 15, + kUpb_EncodedType_Group = 16, + kUpb_EncodedType_Message = 17, + kUpb_EncodedType_ClosedEnum = 18, -const char* _upb_DefBuilder_FullToShort(const char* fullname); + kUpb_EncodedType_RepeatedBase = 20, +} upb_EncodedType; -UPB_INLINE void* _upb_DefBuilder_Alloc(upb_DefBuilder* ctx, size_t bytes) { - if (bytes == 0) return NULL; - void* ret = upb_Arena_Malloc(ctx->arena, bytes); - if (!ret) _upb_DefBuilder_OomErr(ctx); - return ret; -} +typedef enum { + kUpb_EncodedFieldModifier_FlipPacked = 1 << 0, + kUpb_EncodedFieldModifier_IsRequired = 1 << 1, + kUpb_EncodedFieldModifier_IsProto3Singular = 1 << 2, +} upb_EncodedFieldModifier; -// Adds a symbol |v| to the symtab, which must be a def pointer previously -// packed with pack_def(). The def's pointer to upb_FileDef* must be set before -// adding, so we know which entries to remove if building this file fails. +enum { + kUpb_EncodedValue_MinField = ' ', + kUpb_EncodedValue_MaxField = 'I', + kUpb_EncodedValue_MinModifier = 'L', + kUpb_EncodedValue_MaxModifier = '[', + kUpb_EncodedValue_End = '^', + kUpb_EncodedValue_MinSkip = '_', + kUpb_EncodedValue_MaxSkip = '~', + kUpb_EncodedValue_OneofSeparator = '~', + kUpb_EncodedValue_FieldSeparator = '|', + kUpb_EncodedValue_MinOneofField = ' ', + kUpb_EncodedValue_MaxOneofField = 'b', + kUpb_EncodedValue_MaxEnumMask = 'A', +}; + +enum { + kUpb_EncodedVersion_EnumV1 = '!', + kUpb_EncodedVersion_ExtensionV1 = '#', + kUpb_EncodedVersion_MapV1 = '%', + kUpb_EncodedVersion_MessageV1 = '$', + kUpb_EncodedVersion_MessageSetV1 = '&', +}; + + +#endif // UPB_MINI_DESCRIPTOR_INTERNAL_WIRE_CONSTANTS_H_ + +#ifndef UPB_MINI_DESCRIPTOR_INTERNAL_MODIFIERS_H_ +#define UPB_MINI_DESCRIPTOR_INTERNAL_MODIFIERS_H_ + +// Must be last. + +typedef enum { + kUpb_FieldModifier_IsRepeated = 1 << 0, + kUpb_FieldModifier_IsPacked = 1 << 1, + kUpb_FieldModifier_IsClosedEnum = 1 << 2, + kUpb_FieldModifier_IsProto3Singular = 1 << 3, + kUpb_FieldModifier_IsRequired = 1 << 4, +} kUpb_FieldModifier; + +typedef enum { + kUpb_MessageModifier_ValidateUtf8 = 1 << 0, + kUpb_MessageModifier_DefaultIsPacked = 1 << 1, + kUpb_MessageModifier_IsExtendable = 1 << 2, +} kUpb_MessageModifier; + + +#endif // UPB_MINI_DESCRIPTOR_INTERNAL_MODIFIERS_H_ + +#ifndef UPB_MINI_DESCRIPTOR_INTERNAL_ENCODE_H_ +#define UPB_MINI_DESCRIPTOR_INTERNAL_ENCODE_H_ + + +// Must be last. + +// If the input buffer has at least this many bytes available, the encoder call +// is guaranteed to succeed (as long as field number order is maintained). +#define kUpb_MtDataEncoder_MinSize 16 + +typedef struct { + char* end; // Limit of the buffer passed as a parameter. + // Aliased to internal-only members in .cc. + char internal[32]; +} upb_MtDataEncoder; + +#ifdef __cplusplus +extern "C" { +#endif + +// Encodes field/oneof information for a given message. The sequence of calls +// should look like: +// +// upb_MtDataEncoder e; +// char buf[256]; +// char* ptr = buf; +// e.end = ptr + sizeof(buf); +// unit64_t msg_mod = ...; // bitwise & of kUpb_MessageModifiers or zero +// ptr = upb_MtDataEncoder_StartMessage(&e, ptr, msg_mod); +// // Fields *must* be in field number order. +// ptr = upb_MtDataEncoder_PutField(&e, ptr, ...); +// ptr = upb_MtDataEncoder_PutField(&e, ptr, ...); +// ptr = upb_MtDataEncoder_PutField(&e, ptr, ...); +// +// // If oneofs are present. Oneofs must be encoded after regular fields. +// ptr = upb_MiniTable_StartOneof(&e, ptr) +// ptr = upb_MiniTable_PutOneofField(&e, ptr, ...); +// ptr = upb_MiniTable_PutOneofField(&e, ptr, ...); +// +// ptr = upb_MiniTable_StartOneof(&e, ptr); +// ptr = upb_MiniTable_PutOneofField(&e, ptr, ...); +// ptr = upb_MiniTable_PutOneofField(&e, ptr, ...); +// +// Oneofs must be encoded after all regular fields. +char* upb_MtDataEncoder_StartMessage(upb_MtDataEncoder* e, char* ptr, + uint64_t msg_mod); +char* upb_MtDataEncoder_PutField(upb_MtDataEncoder* e, char* ptr, + upb_FieldType type, uint32_t field_num, + uint64_t field_mod); +char* upb_MtDataEncoder_StartOneof(upb_MtDataEncoder* e, char* ptr); +char* upb_MtDataEncoder_PutOneofField(upb_MtDataEncoder* e, char* ptr, + uint32_t field_num); + +// Encodes the set of values for a given enum. The values must be given in +// order (after casting to uint32_t), and repeats are not allowed. +char* upb_MtDataEncoder_StartEnum(upb_MtDataEncoder* e, char* ptr); +char* upb_MtDataEncoder_PutEnumValue(upb_MtDataEncoder* e, char* ptr, + uint32_t val); +char* upb_MtDataEncoder_EndEnum(upb_MtDataEncoder* e, char* ptr); + +// Encodes an entire mini descriptor for an extension. +char* upb_MtDataEncoder_EncodeExtension(upb_MtDataEncoder* e, char* ptr, + upb_FieldType type, uint32_t field_num, + uint64_t field_mod); + +// Encodes an entire mini descriptor for a map. +char* upb_MtDataEncoder_EncodeMap(upb_MtDataEncoder* e, char* ptr, + upb_FieldType key_type, + upb_FieldType value_type, uint64_t key_mod, + uint64_t value_mod); + +// Encodes an entire mini descriptor for a message set. +char* upb_MtDataEncoder_EncodeMessageSet(upb_MtDataEncoder* e, char* ptr); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + + +#endif /* UPB_MINI_DESCRIPTOR_INTERNAL_ENCODE_H_ */ + +#ifndef UPB_REFLECTION_DEF_BUILDER_INTERNAL_H_ +#define UPB_REFLECTION_DEF_BUILDER_INTERNAL_H_ + + +#ifndef UPB_REFLECTION_DEF_POOL_INTERNAL_H_ +#define UPB_REFLECTION_DEF_POOL_INTERNAL_H_ + + +// Must be last. + +#ifdef __cplusplus +extern "C" { +#endif + +upb_Arena* _upb_DefPool_Arena(const upb_DefPool* s); +size_t _upb_DefPool_BytesLoaded(const upb_DefPool* s); +upb_ExtensionRegistry* _upb_DefPool_ExtReg(const upb_DefPool* s); + +bool _upb_DefPool_InsertExt(upb_DefPool* s, const upb_MiniTableExtension* ext, + const upb_FieldDef* f); +bool _upb_DefPool_InsertSym(upb_DefPool* s, upb_StringView sym, upb_value v, + upb_Status* status); +bool _upb_DefPool_LookupSym(const upb_DefPool* s, const char* sym, size_t size, + upb_value* v); + +void** _upb_DefPool_ScratchData(const upb_DefPool* s); +size_t* _upb_DefPool_ScratchSize(const upb_DefPool* s); +void _upb_DefPool_SetPlatform(upb_DefPool* s, upb_MiniTablePlatform platform); + +// For generated code only: loads a generated descriptor. +typedef struct _upb_DefPool_Init { + struct _upb_DefPool_Init** deps; // Dependencies of this file. + const upb_MiniTableFile* layout; + const char* filename; + upb_StringView descriptor; // Serialized descriptor. +} _upb_DefPool_Init; + +bool _upb_DefPool_LoadDefInit(upb_DefPool* s, const _upb_DefPool_Init* init); + +// Should only be directly called by tests. This variant lets us suppress +// the use of compiled-in tables, forcing a rebuild of the tables at runtime. +bool _upb_DefPool_LoadDefInitEx(upb_DefPool* s, const _upb_DefPool_Init* init, + bool rebuild_minitable); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + + +#endif /* UPB_REFLECTION_DEF_POOL_INTERNAL_H_ */ + +// Must be last. + +// We want to copy the options verbatim into the destination options proto. +// We use serialize+parse as our deep copy. +#define UPB_DEF_SET_OPTIONS(target, desc_type, options_type, proto) \ + if (UPB_DESC(desc_type##_has_options)(proto)) { \ + size_t size; \ + char* pb = UPB_DESC(options_type##_serialize)( \ + UPB_DESC(desc_type##_options)(proto), ctx->tmp_arena, &size); \ + if (!pb) _upb_DefBuilder_OomErr(ctx); \ + target = \ + UPB_DESC(options_type##_parse)(pb, size, _upb_DefBuilder_Arena(ctx)); \ + if (!target) _upb_DefBuilder_OomErr(ctx); \ + } else { \ + target = (const UPB_DESC(options_type)*)kUpbDefOptDefault; \ + } + +#ifdef __cplusplus +extern "C" { +#endif + +struct upb_DefBuilder { + upb_DefPool* symtab; + upb_FileDef* file; // File we are building. + upb_Arena* arena; // Allocate defs here. + upb_Arena* tmp_arena; // For temporary allocations. + upb_Status* status; // Record errors here. + const upb_MiniTableFile* layout; // NULL if we should build layouts. + upb_MiniTablePlatform platform; // Platform we are targeting. + int enum_count; // Count of enums built so far. + int msg_count; // Count of messages built so far. + int ext_count; // Count of extensions built so far. + jmp_buf err; // longjmp() on error. +}; + +extern const char* kUpbDefOptDefault; + +// ctx->status has already been set elsewhere so just fail/longjmp() +UPB_NORETURN void _upb_DefBuilder_FailJmp(upb_DefBuilder* ctx); + +UPB_NORETURN void _upb_DefBuilder_Errf(upb_DefBuilder* ctx, const char* fmt, + ...) UPB_PRINTF(2, 3); +UPB_NORETURN void _upb_DefBuilder_OomErr(upb_DefBuilder* ctx); + +const char* _upb_DefBuilder_MakeFullName(upb_DefBuilder* ctx, + const char* prefix, + upb_StringView name); + +// Given a symbol and the base symbol inside which it is defined, +// find the symbol's definition. +const void* _upb_DefBuilder_ResolveAny(upb_DefBuilder* ctx, + const char* from_name_dbg, + const char* base, upb_StringView sym, + upb_deftype_t* type); + +const void* _upb_DefBuilder_Resolve(upb_DefBuilder* ctx, + const char* from_name_dbg, const char* base, + upb_StringView sym, upb_deftype_t type); + +char _upb_DefBuilder_ParseEscape(upb_DefBuilder* ctx, const upb_FieldDef* f, + const char** src, const char* end); + +const char* _upb_DefBuilder_FullToShort(const char* fullname); + +UPB_INLINE void* _upb_DefBuilder_Alloc(upb_DefBuilder* ctx, size_t bytes) { + if (bytes == 0) return NULL; + void* ret = upb_Arena_Malloc(ctx->arena, bytes); + if (!ret) _upb_DefBuilder_OomErr(ctx); + return ret; +} + +// Adds a symbol |v| to the symtab, which must be a def pointer previously +// packed with pack_def(). The def's pointer to upb_FileDef* must be set before +// adding, so we know which entries to remove if building this file fails. UPB_INLINE void _upb_DefBuilder_Add(upb_DefBuilder* ctx, const char* name, upb_value v) { upb_StringView sym = {.data = name, .size = strlen(name)}; @@ -12067,116 +12327,35 @@ upb_MessageDef* _upb_MessageDefs_New( #endif -#endif /* UPB_REFLECTION_MESSAGE_DEF_INTERNAL_H_ */ - -#ifndef UPB_REFLECTION_SERVICE_DEF_INTERNAL_H_ -#define UPB_REFLECTION_SERVICE_DEF_INTERNAL_H_ - - -// Must be last. - -#ifdef __cplusplus -extern "C" { -#endif - -upb_ServiceDef* _upb_ServiceDef_At(const upb_ServiceDef* s, int i); - -// Allocate and initialize an array of |n| service defs. -upb_ServiceDef* _upb_ServiceDefs_New( - upb_DefBuilder* ctx, int n, - const UPB_DESC(ServiceDescriptorProto) * const* protos); - -#ifdef __cplusplus -} /* extern "C" */ -#endif - - -#endif /* UPB_REFLECTION_SERVICE_DEF_INTERNAL_H_ */ - -#ifndef UPB_REFLECTION_DESC_STATE_INTERNAL_H_ -#define UPB_REFLECTION_DESC_STATE_INTERNAL_H_ - - -#ifndef UPB_MINI_DESCRIPTOR_INTERNAL_ENCODE_H_ -#define UPB_MINI_DESCRIPTOR_INTERNAL_ENCODE_H_ - - -// Must be last. +#endif /* UPB_REFLECTION_MESSAGE_DEF_INTERNAL_H_ */ -// If the input buffer has at least this many bytes available, the encoder call -// is guaranteed to succeed (as long as field number order is maintained). -#define kUpb_MtDataEncoder_MinSize 16 +#ifndef UPB_REFLECTION_SERVICE_DEF_INTERNAL_H_ +#define UPB_REFLECTION_SERVICE_DEF_INTERNAL_H_ -typedef struct { - char* end; // Limit of the buffer passed as a parameter. - // Aliased to internal-only members in .cc. - char internal[32]; -} upb_MtDataEncoder; + +// Must be last. #ifdef __cplusplus extern "C" { #endif -// Encodes field/oneof information for a given message. The sequence of calls -// should look like: -// -// upb_MtDataEncoder e; -// char buf[256]; -// char* ptr = buf; -// e.end = ptr + sizeof(buf); -// unit64_t msg_mod = ...; // bitwise & of kUpb_MessageModifiers or zero -// ptr = upb_MtDataEncoder_StartMessage(&e, ptr, msg_mod); -// // Fields *must* be in field number order. -// ptr = upb_MtDataEncoder_PutField(&e, ptr, ...); -// ptr = upb_MtDataEncoder_PutField(&e, ptr, ...); -// ptr = upb_MtDataEncoder_PutField(&e, ptr, ...); -// -// // If oneofs are present. Oneofs must be encoded after regular fields. -// ptr = upb_MiniTable_StartOneof(&e, ptr) -// ptr = upb_MiniTable_PutOneofField(&e, ptr, ...); -// ptr = upb_MiniTable_PutOneofField(&e, ptr, ...); -// -// ptr = upb_MiniTable_StartOneof(&e, ptr); -// ptr = upb_MiniTable_PutOneofField(&e, ptr, ...); -// ptr = upb_MiniTable_PutOneofField(&e, ptr, ...); -// -// Oneofs must be encoded after all regular fields. -char* upb_MtDataEncoder_StartMessage(upb_MtDataEncoder* e, char* ptr, - uint64_t msg_mod); -char* upb_MtDataEncoder_PutField(upb_MtDataEncoder* e, char* ptr, - upb_FieldType type, uint32_t field_num, - uint64_t field_mod); -char* upb_MtDataEncoder_StartOneof(upb_MtDataEncoder* e, char* ptr); -char* upb_MtDataEncoder_PutOneofField(upb_MtDataEncoder* e, char* ptr, - uint32_t field_num); - -// Encodes the set of values for a given enum. The values must be given in -// order (after casting to uint32_t), and repeats are not allowed. -char* upb_MtDataEncoder_StartEnum(upb_MtDataEncoder* e, char* ptr); -char* upb_MtDataEncoder_PutEnumValue(upb_MtDataEncoder* e, char* ptr, - uint32_t val); -char* upb_MtDataEncoder_EndEnum(upb_MtDataEncoder* e, char* ptr); - -// Encodes an entire mini descriptor for an extension. -char* upb_MtDataEncoder_EncodeExtension(upb_MtDataEncoder* e, char* ptr, - upb_FieldType type, uint32_t field_num, - uint64_t field_mod); - -// Encodes an entire mini descriptor for a map. -char* upb_MtDataEncoder_EncodeMap(upb_MtDataEncoder* e, char* ptr, - upb_FieldType key_type, - upb_FieldType value_type, uint64_t key_mod, - uint64_t value_mod); +upb_ServiceDef* _upb_ServiceDef_At(const upb_ServiceDef* s, int i); -// Encodes an entire mini descriptor for a message set. -char* upb_MtDataEncoder_EncodeMessageSet(upb_MtDataEncoder* e, char* ptr); +// Allocate and initialize an array of |n| service defs. +upb_ServiceDef* _upb_ServiceDefs_New( + upb_DefBuilder* ctx, int n, + const UPB_DESC(ServiceDescriptorProto) * const* protos); #ifdef __cplusplus } /* extern "C" */ #endif -#endif /* UPB_MINI_DESCRIPTOR_INTERNAL_ENCODE_H_ */ +#endif /* UPB_REFLECTION_SERVICE_DEF_INTERNAL_H_ */ + +#ifndef UPB_REFLECTION_DESC_STATE_INTERNAL_H_ +#define UPB_REFLECTION_DESC_STATE_INTERNAL_H_ + // Must be last. @@ -12281,28 +12460,6 @@ upb_ExtensionRange* _upb_ExtensionRanges_New( #endif /* UPB_REFLECTION_EXTENSION_RANGE_INTERNAL_H_ */ -#ifndef UPB_MINI_DESCRIPTOR_INTERNAL_MODIFIERS_H_ -#define UPB_MINI_DESCRIPTOR_INTERNAL_MODIFIERS_H_ - -// Must be last. - -typedef enum { - kUpb_FieldModifier_IsRepeated = 1 << 0, - kUpb_FieldModifier_IsPacked = 1 << 1, - kUpb_FieldModifier_IsClosedEnum = 1 << 2, - kUpb_FieldModifier_IsProto3Singular = 1 << 3, - kUpb_FieldModifier_IsRequired = 1 << 4, -} kUpb_FieldModifier; - -typedef enum { - kUpb_MessageModifier_ValidateUtf8 = 1 << 0, - kUpb_MessageModifier_DefaultIsPacked = 1 << 1, - kUpb_MessageModifier_IsExtendable = 1 << 2, -} kUpb_MessageModifier; - - -#endif // UPB_MINI_DESCRIPTOR_INTERNAL_MODIFIERS_H_ - #ifndef UPB_REFLECTION_ONEOF_DEF_INTERNAL_H_ #define UPB_REFLECTION_ONEOF_DEF_INTERNAL_H_ @@ -12556,163 +12713,6 @@ UPB_INLINE uint32_t _upb_FastDecoder_LoadTag(const char* ptr) { #endif /* UPB_WIRE_INTERNAL_DECODE_H_ */ -#ifndef UPB_MINI_DESCRIPTOR_INTERNAL_BASE92_H_ -#define UPB_MINI_DESCRIPTOR_INTERNAL_BASE92_H_ - - -// Must be last. - -#ifdef __cplusplus -extern "C" { -#endif - -UPB_INLINE char _upb_ToBase92(int8_t ch) { - extern const char _kUpb_ToBase92[]; - UPB_ASSERT(0 <= ch && ch < 92); - return _kUpb_ToBase92[ch]; -} - -UPB_INLINE char _upb_FromBase92(uint8_t ch) { - extern const int8_t _kUpb_FromBase92[]; - if (' ' > ch || ch > '~') return -1; - return _kUpb_FromBase92[ch - ' ']; -} - -UPB_INLINE const char* _upb_Base92_DecodeVarint(const char* ptr, - const char* end, char first_ch, - uint8_t min, uint8_t max, - uint32_t* out_val) { - uint32_t val = 0; - uint32_t shift = 0; - const int bits_per_char = - upb_Log2Ceiling(_upb_FromBase92(max) - _upb_FromBase92(min)); - char ch = first_ch; - while (1) { - uint32_t bits = _upb_FromBase92(ch) - _upb_FromBase92(min); - val |= bits << shift; - if (ptr == end || *ptr < min || max < *ptr) { - *out_val = val; - UPB_ASSUME(ptr != NULL); - return ptr; - } - ch = *ptr++; - shift += bits_per_char; - if (shift >= 32) return NULL; - } -} - -#ifdef __cplusplus -} /* extern "C" */ -#endif - - -#endif // UPB_MINI_DESCRIPTOR_INTERNAL_BASE92_H_ - -#ifndef UPB_MINI_DESCRIPTOR_INTERNAL_WIRE_CONSTANTS_H_ -#define UPB_MINI_DESCRIPTOR_INTERNAL_WIRE_CONSTANTS_H_ - - -// Must be last. - -typedef enum { - kUpb_EncodedType_Double = 0, - kUpb_EncodedType_Float = 1, - kUpb_EncodedType_Fixed32 = 2, - kUpb_EncodedType_Fixed64 = 3, - kUpb_EncodedType_SFixed32 = 4, - kUpb_EncodedType_SFixed64 = 5, - kUpb_EncodedType_Int32 = 6, - kUpb_EncodedType_UInt32 = 7, - kUpb_EncodedType_SInt32 = 8, - kUpb_EncodedType_Int64 = 9, - kUpb_EncodedType_UInt64 = 10, - kUpb_EncodedType_SInt64 = 11, - kUpb_EncodedType_OpenEnum = 12, - kUpb_EncodedType_Bool = 13, - kUpb_EncodedType_Bytes = 14, - kUpb_EncodedType_String = 15, - kUpb_EncodedType_Group = 16, - kUpb_EncodedType_Message = 17, - kUpb_EncodedType_ClosedEnum = 18, - - kUpb_EncodedType_RepeatedBase = 20, -} upb_EncodedType; - -typedef enum { - kUpb_EncodedFieldModifier_FlipPacked = 1 << 0, - kUpb_EncodedFieldModifier_IsRequired = 1 << 1, - kUpb_EncodedFieldModifier_IsProto3Singular = 1 << 2, -} upb_EncodedFieldModifier; - -enum { - kUpb_EncodedValue_MinField = ' ', - kUpb_EncodedValue_MaxField = 'I', - kUpb_EncodedValue_MinModifier = 'L', - kUpb_EncodedValue_MaxModifier = '[', - kUpb_EncodedValue_End = '^', - kUpb_EncodedValue_MinSkip = '_', - kUpb_EncodedValue_MaxSkip = '~', - kUpb_EncodedValue_OneofSeparator = '~', - kUpb_EncodedValue_FieldSeparator = '|', - kUpb_EncodedValue_MinOneofField = ' ', - kUpb_EncodedValue_MaxOneofField = 'b', - kUpb_EncodedValue_MaxEnumMask = 'A', -}; - -enum { - kUpb_EncodedVersion_EnumV1 = '!', - kUpb_EncodedVersion_ExtensionV1 = '#', - kUpb_EncodedVersion_MapV1 = '%', - kUpb_EncodedVersion_MessageV1 = '$', - kUpb_EncodedVersion_MessageSetV1 = '&', -}; - - -#endif // UPB_MINI_DESCRIPTOR_INTERNAL_WIRE_CONSTANTS_H_ - -#ifndef UPB_MINI_DESCRIPTOR_INTERNAL_DECODER_H_ -#define UPB_MINI_DESCRIPTOR_INTERNAL_DECODER_H_ - - -// Must be last. - -// upb_MdDecoder: used internally for decoding MiniDescriptors for messages, -// extensions, and enums. -typedef struct { - const char* end; - upb_Status* status; - jmp_buf err; -} upb_MdDecoder; - -UPB_PRINTF(2, 3) -UPB_NORETURN UPB_INLINE void upb_MdDecoder_ErrorJmp(upb_MdDecoder* d, - const char* fmt, ...) { - if (d->status) { - va_list argp; - upb_Status_SetErrorMessage(d->status, "Error building mini table: "); - va_start(argp, fmt); - upb_Status_VAppendErrorFormat(d->status, fmt, argp); - va_end(argp); - } - UPB_LONGJMP(d->err, 1); -} - -UPB_INLINE void upb_MdDecoder_CheckOutOfMemory(upb_MdDecoder* d, - const void* ptr) { - if (!ptr) upb_MdDecoder_ErrorJmp(d, "Out of memory"); -} - -UPB_INLINE const char* upb_MdDecoder_DecodeBase92Varint( - upb_MdDecoder* d, const char* ptr, char first_ch, uint8_t min, uint8_t max, - uint32_t* out_val) { - ptr = _upb_Base92_DecodeVarint(ptr, d->end, first_ch, min, max, out_val); - if (!ptr) upb_MdDecoder_ErrorJmp(d, "Overlong varint"); - return ptr; -} - - -#endif // UPB_MINI_DESCRIPTOR_INTERNAL_DECODER_H_ - // This should #undef all macros #defined in def.inc #undef UPB_SIZE