diff --git a/.github/workflows/test_runner.yml b/.github/workflows/test_runner.yml index 27cb54e5b7..3b328a89e3 100644 --- a/.github/workflows/test_runner.yml +++ b/.github/workflows/test_runner.yml @@ -176,6 +176,14 @@ jobs: safe-checkout: ${{ needs.check-tag.outputs.checkout-sha }} secrets: inherit + upb: + name: μpb + needs: [check-tag] + uses: ./.github/workflows/test_upb.yml + with: + safe-checkout: ${{ needs.check-tag.outputs.checkout-sha }} + secrets: inherit + staleness: name: Staleness needs: [check-tag] diff --git a/.github/workflows/test_upb.yml b/.github/workflows/test_upb.yml new file mode 100644 index 0000000000..d942e5add3 --- /dev/null +++ b/.github/workflows/test_upb.yml @@ -0,0 +1,41 @@ +name: μpb Tests + +on: + workflow_call: + inputs: + safe-checkout: + required: true + description: "The SHA key for the commit we want to run over" + type: string + +permissions: + contents: read + +jobs: + linux: + strategy: + fail-fast: false # Don't cancel all jobs if one fails. + matrix: + config: + - { name: "Fastbuild" } + - { name: "Optimized", flags: "-c opt" } + - { name: "FastTable", flags: "--@upb//:fasttable_enabled=true" } + include: + # Set defaults + - image: us-docker.pkg.dev/protobuf-build/containers/test/linux/sanitize@sha256:04cd765285bc52cbbf51d66c8c66d8603579cf0f19cc42df26b09d2c270541fb + - targets: "@upb//..." + name: ${{ matrix.config.name }} + runs-on: ubuntu-latest + + steps: + - name: Checkout pending changes + uses: actions/checkout@ac593985615ec2ede58e132d2e21d2b1cbd6127c # v3.3.0 + with: + ref: ${{ inputs.safe-checkout }} + - name: Run tests + uses: protocolbuffers/protobuf-ci/bazel-docker@v2 + with: + image: ${{ matrix.image }} + credentials: ${{ secrets.GAR_SERVICE_ACCOUNT }} + bazel-cache: upb-bazel + bazel: test --cxxopt=-std=c++17 --host_cxxopt=-std=c++17 ${{ matrix.targets }} ${{ matrix.config.flags }} diff --git a/build_defs/upb.patch b/build_defs/upb.patch deleted file mode 100644 index c3ed912698..0000000000 --- a/build_defs/upb.patch +++ /dev/null @@ -1,11 +0,0 @@ ---- upbc/bootstrap_compiler.bzl -+++ upbc/bootstrap_compiler.bzl -@@ -20,7 +20,7 @@ _upbc_base = "//upbc:protoc-gen-upb" - - # begin:github_only - _is_google3 = False --_extra_proto_path = "-Iexternal/com_google_protobuf/src " -+_extra_proto_path = "-Isrc " - # end:github_only - - def _upbc(stage): diff --git a/php/ext/google/protobuf/php-upb.c b/php/ext/google/protobuf/php-upb.c index 9e5a82e1e6..8e4ede8362 100644 --- a/php/ext/google/protobuf/php-upb.c +++ b/php/ext/google/protobuf/php-upb.c @@ -373,8 +373,8 @@ void upb_Status_VAppendErrorFormat(upb_Status* status, const char* fmt, status->msg[_kUpb_Status_MaxMessage - 1] = '\0'; } -#include +#include // Must be last. @@ -609,6 +609,7 @@ upb_Map* _upb_Map_New(upb_Arena* a, size_t key_size, size_t value_size) { } + // Must be last. static void _upb_mapsorter_getkeys(const void* _a, const void* _b, void* a_key, @@ -6332,6164 +6333,5381 @@ size_t upb_Message_ExtensionCount(const upb_Message* msg) { return count; } -#include // Must be last. -/* The upb core does not generally have a concept of default instances. However - * for descriptor options we make an exception since the max size is known and - * modest (<200 bytes). All types can share a default instance since it is - * initialized to zeroes. - * - * We have to allocate an extra pointer for upb's internal metadata. */ -static const char opt_default_buf[_UPB_MAXOPT_SIZE + sizeof(void*)] = {0}; -const char* kUpbDefOptDefault = &opt_default_buf[sizeof(void*)]; - -const char* _upb_DefBuilder_FullToShort(const char* fullname) { - const char* p; - - if (fullname == NULL) { - return NULL; - } else if ((p = strrchr(fullname, '.')) == NULL) { - /* No '.' in the name, return the full string. */ - return fullname; - } else { - /* Return one past the last '.'. */ - return p + 1; - } -} - -void _upb_DefBuilder_FailJmp(upb_DefBuilder* ctx) { UPB_LONGJMP(ctx->err, 1); } - -void _upb_DefBuilder_Errf(upb_DefBuilder* ctx, const char* fmt, ...) { - va_list argp; - va_start(argp, fmt); - upb_Status_VSetErrorFormat(ctx->status, fmt, argp); - va_end(argp); - _upb_DefBuilder_FailJmp(ctx); -} +typedef struct { + upb_MdDecoder base; + upb_Arena* arena; + upb_MiniTableEnum* enum_table; + uint32_t enum_value_count; + uint32_t enum_data_count; + uint32_t enum_data_capacity; +} upb_MdEnumDecoder; -void _upb_DefBuilder_OomErr(upb_DefBuilder* ctx) { - upb_Status_SetErrorMessage(ctx->status, "out of memory"); - _upb_DefBuilder_FailJmp(ctx); +static size_t upb_MiniTableEnum_Size(size_t count) { + return sizeof(upb_MiniTableEnum) + count * sizeof(uint32_t); } -// Verify a relative identifier string. The loop is branchless for speed. -static void _upb_DefBuilder_CheckIdentNotFull(upb_DefBuilder* ctx, - upb_StringView name) { - bool good = name.size > 0; - - for (size_t i = 0; i < name.size; i++) { - const char c = name.data[i]; - const char d = c | 0x20; // force lowercase - const bool is_alpha = (('a' <= d) & (d <= 'z')) | (c == '_'); - const bool is_numer = ('0' <= c) & (c <= '9') & (i != 0); - - good &= is_alpha | is_numer; +static upb_MiniTableEnum* _upb_MiniTable_AddEnumDataMember(upb_MdEnumDecoder* d, + uint32_t val) { + if (d->enum_data_count == d->enum_data_capacity) { + size_t old_sz = upb_MiniTableEnum_Size(d->enum_data_capacity); + d->enum_data_capacity = UPB_MAX(2, d->enum_data_capacity * 2); + size_t new_sz = upb_MiniTableEnum_Size(d->enum_data_capacity); + d->enum_table = upb_Arena_Realloc(d->arena, d->enum_table, old_sz, new_sz); + upb_MdDecoder_CheckOutOfMemory(&d->base, d->enum_table); } - - if (!good) _upb_DefBuilder_CheckIdentSlow(ctx, name, false); + d->enum_table->data[d->enum_data_count++] = val; + return d->enum_table; } -const char* _upb_DefBuilder_MakeFullName(upb_DefBuilder* ctx, - const char* prefix, - upb_StringView name) { - _upb_DefBuilder_CheckIdentNotFull(ctx, name); - if (prefix) { - // ret = prefix + '.' + name; - size_t n = strlen(prefix); - char* ret = _upb_DefBuilder_Alloc(ctx, n + name.size + 2); - strcpy(ret, prefix); - ret[n] = '.'; - memcpy(&ret[n + 1], name.data, name.size); - ret[n + 1 + name.size] = '\0'; - return ret; +static void upb_MiniTableEnum_BuildValue(upb_MdEnumDecoder* d, uint32_t val) { + upb_MiniTableEnum* table = d->enum_table; + d->enum_value_count++; + if (table->value_count || (val > 512 && d->enum_value_count < val / 32)) { + if (table->value_count == 0) { + assert(d->enum_data_count == table->mask_limit / 32); + } + table = _upb_MiniTable_AddEnumDataMember(d, val); + table->value_count++; } else { - char* ret = upb_strdup2(name.data, name.size, ctx->arena); - if (!ret) _upb_DefBuilder_OomErr(ctx); - return ret; + uint32_t new_mask_limit = ((val / 32) + 1) * 32; + while (table->mask_limit < new_mask_limit) { + table = _upb_MiniTable_AddEnumDataMember(d, 0); + table->mask_limit += 32; + } + table->data[val / 32] |= 1ULL << (val % 32); } } -static bool remove_component(char* base, size_t* len) { - if (*len == 0) return false; - - for (size_t i = *len - 1; i > 0; i--) { - if (base[i] == '.') { - *len = i; - return true; +static upb_MiniTableEnum* upb_MtDecoder_DoBuildMiniTableEnum( + upb_MdEnumDecoder* d, const char* data, size_t len) { + // If the string is non-empty then it must begin with a version tag. + if (len) { + if (*data != kUpb_EncodedVersion_EnumV1) { + upb_MdDecoder_ErrorJmp(&d->base, "Invalid enum version: %c", *data); } + data++; + len--; } - *len = 0; - return true; -} + upb_MdDecoder_CheckOutOfMemory(&d->base, d->enum_table); -const void* _upb_DefBuilder_ResolveAny(upb_DefBuilder* ctx, - const char* from_name_dbg, - const char* base, upb_StringView sym, - upb_deftype_t* type) { - if (sym.size == 0) goto notfound; - upb_value v; - if (sym.data[0] == '.') { - /* Symbols starting with '.' are absolute, so we do a single lookup. - * Slice to omit the leading '.' */ - if (!_upb_DefPool_LookupSym(ctx->symtab, sym.data + 1, sym.size - 1, &v)) { - goto notfound; - } - } else { - /* Remove components from base until we find an entry or run out. */ - size_t baselen = base ? strlen(base) : 0; - char* tmp = malloc(sym.size + baselen + 1); - while (1) { - char* p = tmp; - if (baselen) { - memcpy(p, base, baselen); - p[baselen] = '.'; - p += baselen + 1; - } - memcpy(p, sym.data, sym.size); - p += sym.size; - if (_upb_DefPool_LookupSym(ctx->symtab, tmp, p - tmp, &v)) { - break; - } - if (!remove_component(tmp, &baselen)) { - free(tmp); - goto notfound; + // Guarantee at least 64 bits of mask without checking mask size. + d->enum_table->mask_limit = 64; + d->enum_table = _upb_MiniTable_AddEnumDataMember(d, 0); + d->enum_table = _upb_MiniTable_AddEnumDataMember(d, 0); + + d->enum_table->value_count = 0; + + const char* ptr = data; + uint32_t base = 0; + + while (ptr < d->base.end) { + char ch = *ptr++; + if (ch <= kUpb_EncodedValue_MaxEnumMask) { + uint32_t mask = _upb_FromBase92(ch); + for (int i = 0; i < 5; i++, base++, mask >>= 1) { + if (mask & 1) upb_MiniTableEnum_BuildValue(d, base); } + } else if (kUpb_EncodedValue_MinSkip <= ch && + ch <= kUpb_EncodedValue_MaxSkip) { + uint32_t skip; + ptr = upb_MdDecoder_DecodeBase92Varint(&d->base, ptr, ch, + kUpb_EncodedValue_MinSkip, + kUpb_EncodedValue_MaxSkip, &skip); + base += skip; + } else { + upb_MdDecoder_ErrorJmp(&d->base, "Unexpected character: %c", ch); } - free(tmp); } - *type = _upb_DefType_Type(v); - return _upb_DefType_Unpack(v, *type); - -notfound: - _upb_DefBuilder_Errf(ctx, "couldn't resolve name '" UPB_STRINGVIEW_FORMAT "'", - UPB_STRINGVIEW_ARGS(sym)); + return d->enum_table; } -const void* _upb_DefBuilder_Resolve(upb_DefBuilder* ctx, - const char* from_name_dbg, const char* base, - upb_StringView sym, upb_deftype_t type) { - upb_deftype_t found_type; - const void* ret = - _upb_DefBuilder_ResolveAny(ctx, from_name_dbg, base, sym, &found_type); - if (ret && found_type != type) { - _upb_DefBuilder_Errf(ctx, - "type mismatch when resolving %s: couldn't find " - "name " UPB_STRINGVIEW_FORMAT " with type=%d", - from_name_dbg, UPB_STRINGVIEW_ARGS(sym), (int)type); - } - return ret; +static upb_MiniTableEnum* upb_MtDecoder_BuildMiniTableEnum( + upb_MdEnumDecoder* const decoder, const char* const data, size_t const len) { + if (UPB_SETJMP(decoder->base.err) != 0) return NULL; + return upb_MtDecoder_DoBuildMiniTableEnum(decoder, data, len); } -// Per ASCII this will lower-case a letter. If the result is a letter, the -// input was definitely a letter. If the output is not a letter, this may -// have transformed the character unpredictably. -static char upb_ascii_lower(char ch) { return ch | 0x20; } +upb_MiniTableEnum* upb_MiniDescriptor_BuildEnum(const char* data, size_t len, + upb_Arena* arena, + upb_Status* status) { + upb_MdEnumDecoder decoder = { + .base = + { + .end = UPB_PTRADD(data, len), + .status = status, + }, + .arena = arena, + .enum_table = upb_Arena_Malloc(arena, upb_MiniTableEnum_Size(2)), + .enum_value_count = 0, + .enum_data_count = 0, + .enum_data_capacity = 1, + }; -// isalpha() etc. from are locale-dependent, which we don't want. -static bool upb_isbetween(uint8_t c, uint8_t low, uint8_t high) { - return low <= c && c <= high; + return upb_MtDecoder_BuildMiniTableEnum(&decoder, data, len); } -static bool upb_isletter(char c) { - char lower = upb_ascii_lower(c); - return upb_isbetween(lower, 'a', 'z') || c == '_'; -} -static bool upb_isalphanum(char c) { - return upb_isletter(c) || upb_isbetween(c, '0', '9'); -} +#include +#include -static bool TryGetChar(const char** src, const char* end, char* ch) { - if (*src == end) return false; - *ch = **src; - *src += 1; - return true; -} -static int TryGetHexDigit(const char** src, const char* end) { - char ch; - if (!TryGetChar(src, end, &ch)) return -1; - if ('0' <= ch && ch <= '9') { - return ch - '0'; - } - ch = upb_ascii_lower(ch); - if ('a' <= ch && ch <= 'f') { - return ch - 'a' + 0xa; - } - *src -= 1; // Char wasn't actually a hex digit. - return -1; -} +// Must be last. -static char upb_DefBuilder_ParseHexEscape(upb_DefBuilder* ctx, - const upb_FieldDef* f, - const char** src, const char* end) { - int hex_digit = TryGetHexDigit(src, end); - if (hex_digit < 0) { - _upb_DefBuilder_Errf( - ctx, "\\x must be followed by at least one hex digit (field='%s')", - upb_FieldDef_FullName(f)); - return 0; - } - unsigned int ret = hex_digit; - while ((hex_digit = TryGetHexDigit(src, end)) >= 0) { - ret = (ret << 4) | hex_digit; - } - if (ret > 0xff) { - _upb_DefBuilder_Errf(ctx, "Value of hex escape in field %s exceeds 8 bits", - upb_FieldDef_FullName(f)); - return 0; - } - return ret; -} - -static char TryGetOctalDigit(const char** src, const char* end) { - char ch; - if (!TryGetChar(src, end, &ch)) return -1; - if ('0' <= ch && ch <= '7') { - return ch - '0'; - } - *src -= 1; // Char wasn't actually an octal digit. - return -1; -} - -static char upb_DefBuilder_ParseOctalEscape(upb_DefBuilder* ctx, - const upb_FieldDef* f, - const char** src, const char* end) { - char ch = 0; - for (int i = 0; i < 3; i++) { - char digit; - if ((digit = TryGetOctalDigit(src, end)) >= 0) { - ch = (ch << 3) | digit; - } - } - return ch; -} - -char _upb_DefBuilder_ParseEscape(upb_DefBuilder* ctx, const upb_FieldDef* f, - const char** src, const char* end) { - char ch; - if (!TryGetChar(src, end, &ch)) { - _upb_DefBuilder_Errf(ctx, "unterminated escape sequence in field %s", - upb_FieldDef_FullName(f)); - return 0; - } - switch (ch) { - case 'a': - return '\a'; - case 'b': - return '\b'; - case 'f': - return '\f'; - case 'n': - return '\n'; - case 'r': - return '\r'; - case 't': - return '\t'; - case 'v': - return '\v'; - case '\\': - return '\\'; - case '\'': - return '\''; - case '\"': - return '\"'; - case '?': - return '\?'; - case 'x': - case 'X': - return upb_DefBuilder_ParseHexEscape(ctx, f, src, end); - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - *src -= 1; - return upb_DefBuilder_ParseOctalEscape(ctx, f, src, end); - } - _upb_DefBuilder_Errf(ctx, "Unknown escape sequence: \\%c", ch); -} +// Note: we sort by this number when calculating layout order. +typedef enum { + kUpb_LayoutItemType_OneofCase, // Oneof case. + kUpb_LayoutItemType_OneofField, // Oneof field data. + kUpb_LayoutItemType_Field, // Non-oneof field data. -void _upb_DefBuilder_CheckIdentSlow(upb_DefBuilder* ctx, upb_StringView name, - bool full) { - const char* str = name.data; - const size_t len = name.size; - bool start = true; - for (size_t i = 0; i < len; i++) { - const char c = str[i]; - if (c == '.') { - if (start || !full) { - _upb_DefBuilder_Errf( - ctx, "invalid name: unexpected '.' (" UPB_STRINGVIEW_FORMAT ")", - UPB_STRINGVIEW_ARGS(name)); - } - start = true; - } else if (start) { - if (!upb_isletter(c)) { - _upb_DefBuilder_Errf(ctx, - "invalid name: path components must start with a " - "letter (" UPB_STRINGVIEW_FORMAT ")", - UPB_STRINGVIEW_ARGS(name)); - } - start = false; - } else if (!upb_isalphanum(c)) { - _upb_DefBuilder_Errf( - ctx, - "invalid name: non-alphanumeric character (" UPB_STRINGVIEW_FORMAT - ")", - UPB_STRINGVIEW_ARGS(name)); - } - } - if (start) { - _upb_DefBuilder_Errf(ctx, - "invalid name: empty part (" UPB_STRINGVIEW_FORMAT ")", - UPB_STRINGVIEW_ARGS(name)); - } + kUpb_LayoutItemType_Max = kUpb_LayoutItemType_Field, +} upb_LayoutItemType; - // We should never reach this point. - UPB_ASSERT(false); -} +#define kUpb_LayoutItem_IndexSentinel ((uint16_t)-1) +typedef struct { + // Index of the corresponding field. When this is a oneof field, the field's + // offset will be the index of the next field in a linked list. + uint16_t field_index; + uint16_t offset; + upb_FieldRep rep; + upb_LayoutItemType type; +} upb_LayoutItem; -// Must be last. +typedef struct { + upb_LayoutItem* data; + size_t size; + size_t capacity; +} upb_LayoutItemVector; -struct upb_DefPool { - upb_Arena* arena; - upb_strtable syms; // full_name -> packed def ptr - upb_strtable files; // file_name -> (upb_FileDef*) - upb_inttable exts; // (upb_MiniTableExtension*) -> (upb_FieldDef*) - upb_ExtensionRegistry* extreg; +typedef struct { + upb_MdDecoder base; + upb_MiniTable* table; + upb_MiniTableField* fields; upb_MiniTablePlatform platform; - void* scratch_data; - size_t scratch_size; - size_t bytes_loaded; + upb_LayoutItemVector vec; + upb_Arena* arena; +} upb_MtDecoder; + +// In each field's offset, we temporarily store a presence classifier: +enum PresenceClass { + kNoPresence = 0, + kHasbitPresence = 1, + kRequiredPresence = 2, + kOneofBase = 3, + // Negative values refer to a specific oneof with that number. Positive + // values >= kOneofBase indicate that this field is in a oneof, and specify + // the next field in this oneof's linked list. }; -void upb_DefPool_Free(upb_DefPool* s) { - upb_Arena_Free(s->arena); - upb_gfree(s->scratch_data); - upb_gfree(s); +static bool upb_MtDecoder_FieldIsPackable(upb_MiniTableField* field) { + return (field->mode & kUpb_FieldMode_Array) && + upb_FieldType_IsPackable(field->UPB_PRIVATE(descriptortype)); } -upb_DefPool* upb_DefPool_New(void) { - upb_DefPool* s = upb_gmalloc(sizeof(*s)); - if (!s) return NULL; - - s->arena = upb_Arena_New(); - s->bytes_loaded = 0; - - s->scratch_size = 240; - s->scratch_data = upb_gmalloc(s->scratch_size); - if (!s->scratch_data) goto err; - - if (!upb_strtable_init(&s->syms, 32, s->arena)) goto err; - if (!upb_strtable_init(&s->files, 4, s->arena)) goto err; - if (!upb_inttable_init(&s->exts, s->arena)) goto err; - - s->extreg = upb_ExtensionRegistry_New(s->arena); - if (!s->extreg) goto err; +typedef struct { + uint16_t submsg_count; + uint16_t subenum_count; +} upb_SubCounts; - s->platform = kUpb_MiniTablePlatform_Native; +static void upb_MiniTable_SetTypeAndSub(upb_MiniTableField* field, + upb_FieldType type, + upb_SubCounts* sub_counts, + uint64_t msg_modifiers, + bool is_proto3_enum) { + if (is_proto3_enum) { + UPB_ASSERT(type == kUpb_FieldType_Enum); + type = kUpb_FieldType_Int32; + field->mode |= kUpb_LabelFlags_IsAlternate; + } else if (type == kUpb_FieldType_String && + !(msg_modifiers & kUpb_MessageModifier_ValidateUtf8)) { + type = kUpb_FieldType_Bytes; + field->mode |= kUpb_LabelFlags_IsAlternate; + } - return s; + field->UPB_PRIVATE(descriptortype) = type; -err: - upb_DefPool_Free(s); - return NULL; -} + if (upb_MtDecoder_FieldIsPackable(field) && + (msg_modifiers & kUpb_MessageModifier_DefaultIsPacked)) { + field->mode |= kUpb_LabelFlags_IsPacked; + } -bool _upb_DefPool_InsertExt(upb_DefPool* s, const upb_MiniTableExtension* ext, - const upb_FieldDef* f) { - return upb_inttable_insert(&s->exts, (uintptr_t)ext, upb_value_constptr(f), - s->arena); + if (type == kUpb_FieldType_Message || type == kUpb_FieldType_Group) { + field->UPB_PRIVATE(submsg_index) = sub_counts->submsg_count++; + } else if (type == kUpb_FieldType_Enum) { + // We will need to update this later once we know the total number of + // submsg fields. + field->UPB_PRIVATE(submsg_index) = sub_counts->subenum_count++; + } else { + field->UPB_PRIVATE(submsg_index) = kUpb_NoSub; + } } -bool _upb_DefPool_InsertSym(upb_DefPool* s, upb_StringView sym, upb_value v, - upb_Status* status) { - // TODO: table should support an operation "tryinsert" to avoid the double - // lookup. - if (upb_strtable_lookup2(&s->syms, sym.data, sym.size, NULL)) { - upb_Status_SetErrorFormat(status, "duplicate symbol '%s'", sym.data); - return false; - } - if (!upb_strtable_insert(&s->syms, sym.data, sym.size, v, s->arena)) { - upb_Status_SetErrorMessage(status, "out of memory"); - return false; - } - return true; -} +static const char kUpb_EncodedToType[] = { + [kUpb_EncodedType_Double] = kUpb_FieldType_Double, + [kUpb_EncodedType_Float] = kUpb_FieldType_Float, + [kUpb_EncodedType_Int64] = kUpb_FieldType_Int64, + [kUpb_EncodedType_UInt64] = kUpb_FieldType_UInt64, + [kUpb_EncodedType_Int32] = kUpb_FieldType_Int32, + [kUpb_EncodedType_Fixed64] = kUpb_FieldType_Fixed64, + [kUpb_EncodedType_Fixed32] = kUpb_FieldType_Fixed32, + [kUpb_EncodedType_Bool] = kUpb_FieldType_Bool, + [kUpb_EncodedType_String] = kUpb_FieldType_String, + [kUpb_EncodedType_Group] = kUpb_FieldType_Group, + [kUpb_EncodedType_Message] = kUpb_FieldType_Message, + [kUpb_EncodedType_Bytes] = kUpb_FieldType_Bytes, + [kUpb_EncodedType_UInt32] = kUpb_FieldType_UInt32, + [kUpb_EncodedType_OpenEnum] = kUpb_FieldType_Enum, + [kUpb_EncodedType_SFixed32] = kUpb_FieldType_SFixed32, + [kUpb_EncodedType_SFixed64] = kUpb_FieldType_SFixed64, + [kUpb_EncodedType_SInt32] = kUpb_FieldType_SInt32, + [kUpb_EncodedType_SInt64] = kUpb_FieldType_SInt64, + [kUpb_EncodedType_ClosedEnum] = kUpb_FieldType_Enum, +}; -static const void* _upb_DefPool_Unpack(const upb_DefPool* s, const char* sym, - size_t size, upb_deftype_t type) { - upb_value v; - return upb_strtable_lookup2(&s->syms, sym, size, &v) - ? _upb_DefType_Unpack(v, type) - : NULL; -} +static void upb_MiniTable_SetField(upb_MtDecoder* d, uint8_t ch, + upb_MiniTableField* field, + uint64_t msg_modifiers, + upb_SubCounts* sub_counts) { + static const char kUpb_EncodedToFieldRep[] = { + [kUpb_EncodedType_Double] = kUpb_FieldRep_8Byte, + [kUpb_EncodedType_Float] = kUpb_FieldRep_4Byte, + [kUpb_EncodedType_Int64] = kUpb_FieldRep_8Byte, + [kUpb_EncodedType_UInt64] = kUpb_FieldRep_8Byte, + [kUpb_EncodedType_Int32] = kUpb_FieldRep_4Byte, + [kUpb_EncodedType_Fixed64] = kUpb_FieldRep_8Byte, + [kUpb_EncodedType_Fixed32] = kUpb_FieldRep_4Byte, + [kUpb_EncodedType_Bool] = kUpb_FieldRep_1Byte, + [kUpb_EncodedType_String] = kUpb_FieldRep_StringView, + [kUpb_EncodedType_Bytes] = kUpb_FieldRep_StringView, + [kUpb_EncodedType_UInt32] = kUpb_FieldRep_4Byte, + [kUpb_EncodedType_OpenEnum] = kUpb_FieldRep_4Byte, + [kUpb_EncodedType_SFixed32] = kUpb_FieldRep_4Byte, + [kUpb_EncodedType_SFixed64] = kUpb_FieldRep_8Byte, + [kUpb_EncodedType_SInt32] = kUpb_FieldRep_4Byte, + [kUpb_EncodedType_SInt64] = kUpb_FieldRep_8Byte, + [kUpb_EncodedType_ClosedEnum] = kUpb_FieldRep_4Byte, + }; -bool _upb_DefPool_LookupSym(const upb_DefPool* s, const char* sym, size_t size, - upb_value* v) { - return upb_strtable_lookup2(&s->syms, sym, size, v); -} + char pointer_rep = d->platform == kUpb_MiniTablePlatform_32Bit + ? kUpb_FieldRep_4Byte + : kUpb_FieldRep_8Byte; -upb_ExtensionRegistry* _upb_DefPool_ExtReg(const upb_DefPool* s) { - return s->extreg; + int8_t type = _upb_FromBase92(ch); + if (ch >= _upb_ToBase92(kUpb_EncodedType_RepeatedBase)) { + type -= kUpb_EncodedType_RepeatedBase; + field->mode = kUpb_FieldMode_Array; + field->mode |= pointer_rep << kUpb_FieldRep_Shift; + field->offset = kNoPresence; + } else { + field->mode = kUpb_FieldMode_Scalar; + field->offset = kHasbitPresence; + if (type == kUpb_EncodedType_Group || type == kUpb_EncodedType_Message) { + field->mode |= pointer_rep << kUpb_FieldRep_Shift; + } else if ((unsigned long)type >= sizeof(kUpb_EncodedToFieldRep)) { + upb_MdDecoder_ErrorJmp(&d->base, "Invalid field type: %d", (int)type); + } else { + field->mode |= kUpb_EncodedToFieldRep[type] << kUpb_FieldRep_Shift; + } + } + if ((unsigned long)type >= sizeof(kUpb_EncodedToType)) { + upb_MdDecoder_ErrorJmp(&d->base, "Invalid field type: %d", (int)type); + } + upb_MiniTable_SetTypeAndSub(field, kUpb_EncodedToType[type], sub_counts, + msg_modifiers, type == kUpb_EncodedType_OpenEnum); } -void** _upb_DefPool_ScratchData(const upb_DefPool* s) { - return (void**)&s->scratch_data; -} +static void upb_MtDecoder_ModifyField(upb_MtDecoder* d, + uint32_t message_modifiers, + uint32_t field_modifiers, + upb_MiniTableField* field) { + if (field_modifiers & kUpb_EncodedFieldModifier_FlipPacked) { + if (!upb_MtDecoder_FieldIsPackable(field)) { + upb_MdDecoder_ErrorJmp(&d->base, + "Cannot flip packed on unpackable field %" PRIu32, + field->number); + } + field->mode ^= kUpb_LabelFlags_IsPacked; + } -size_t* _upb_DefPool_ScratchSize(const upb_DefPool* s) { - return (size_t*)&s->scratch_size; -} + bool singular = field_modifiers & kUpb_EncodedFieldModifier_IsProto3Singular; + bool required = field_modifiers & kUpb_EncodedFieldModifier_IsRequired; -void _upb_DefPool_SetPlatform(upb_DefPool* s, upb_MiniTablePlatform platform) { - assert(upb_strtable_count(&s->files) == 0); - s->platform = platform; -} + // Validate. + if ((singular || required) && field->offset != kHasbitPresence) { + upb_MdDecoder_ErrorJmp(&d->base, + "Invalid modifier(s) for repeated field %" PRIu32, + field->number); + } + if (singular && required) { + upb_MdDecoder_ErrorJmp( + &d->base, "Field %" PRIu32 " cannot be both singular and required", + field->number); + } -const upb_MessageDef* upb_DefPool_FindMessageByName(const upb_DefPool* s, - const char* sym) { - return _upb_DefPool_Unpack(s, sym, strlen(sym), UPB_DEFTYPE_MSG); + if (singular) field->offset = kNoPresence; + if (required) { + field->offset = kRequiredPresence; + } } -const upb_MessageDef* upb_DefPool_FindMessageByNameWithSize( - const upb_DefPool* s, const char* sym, size_t len) { - return _upb_DefPool_Unpack(s, sym, len, UPB_DEFTYPE_MSG); +static void upb_MtDecoder_PushItem(upb_MtDecoder* d, upb_LayoutItem item) { + if (d->vec.size == d->vec.capacity) { + size_t new_cap = UPB_MAX(8, d->vec.size * 2); + d->vec.data = realloc(d->vec.data, new_cap * sizeof(*d->vec.data)); + upb_MdDecoder_CheckOutOfMemory(&d->base, d->vec.data); + d->vec.capacity = new_cap; + } + d->vec.data[d->vec.size++] = item; } -const upb_EnumDef* upb_DefPool_FindEnumByName(const upb_DefPool* s, - const char* sym) { - return _upb_DefPool_Unpack(s, sym, strlen(sym), UPB_DEFTYPE_ENUM); -} +static void upb_MtDecoder_PushOneof(upb_MtDecoder* d, upb_LayoutItem item) { + if (item.field_index == kUpb_LayoutItem_IndexSentinel) { + upb_MdDecoder_ErrorJmp(&d->base, "Empty oneof"); + } + item.field_index -= kOneofBase; -const upb_EnumValueDef* upb_DefPool_FindEnumByNameval(const upb_DefPool* s, - const char* sym) { - return _upb_DefPool_Unpack(s, sym, strlen(sym), UPB_DEFTYPE_ENUMVAL); + // Push oneof data. + item.type = kUpb_LayoutItemType_OneofField; + upb_MtDecoder_PushItem(d, item); + + // Push oneof case. + item.rep = kUpb_FieldRep_4Byte; // Field Number. + item.type = kUpb_LayoutItemType_OneofCase; + upb_MtDecoder_PushItem(d, item); } -const upb_FileDef* upb_DefPool_FindFileByName(const upb_DefPool* s, - const char* name) { - upb_value v; - return upb_strtable_lookup(&s->files, name, &v) ? upb_value_getconstptr(v) - : NULL; +size_t upb_MtDecoder_SizeOfRep(upb_FieldRep rep, + upb_MiniTablePlatform platform) { + static const uint8_t kRepToSize32[] = { + [kUpb_FieldRep_1Byte] = 1, + [kUpb_FieldRep_4Byte] = 4, + [kUpb_FieldRep_StringView] = 8, + [kUpb_FieldRep_8Byte] = 8, + }; + static const uint8_t kRepToSize64[] = { + [kUpb_FieldRep_1Byte] = 1, + [kUpb_FieldRep_4Byte] = 4, + [kUpb_FieldRep_StringView] = 16, + [kUpb_FieldRep_8Byte] = 8, + }; + UPB_ASSERT(sizeof(upb_StringView) == + UPB_SIZE(kRepToSize32, kRepToSize64)[kUpb_FieldRep_StringView]); + return platform == kUpb_MiniTablePlatform_32Bit ? kRepToSize32[rep] + : kRepToSize64[rep]; } -const upb_FileDef* upb_DefPool_FindFileByNameWithSize(const upb_DefPool* s, - const char* name, - size_t len) { - upb_value v; - return upb_strtable_lookup2(&s->files, name, len, &v) - ? upb_value_getconstptr(v) - : NULL; +size_t upb_MtDecoder_AlignOfRep(upb_FieldRep rep, + upb_MiniTablePlatform platform) { + static const uint8_t kRepToAlign32[] = { + [kUpb_FieldRep_1Byte] = 1, + [kUpb_FieldRep_4Byte] = 4, + [kUpb_FieldRep_StringView] = 4, + [kUpb_FieldRep_8Byte] = 8, + }; + static const uint8_t kRepToAlign64[] = { + [kUpb_FieldRep_1Byte] = 1, + [kUpb_FieldRep_4Byte] = 4, + [kUpb_FieldRep_StringView] = 8, + [kUpb_FieldRep_8Byte] = 8, + }; + UPB_ASSERT(UPB_ALIGN_OF(upb_StringView) == + UPB_SIZE(kRepToAlign32, kRepToAlign64)[kUpb_FieldRep_StringView]); + return platform == kUpb_MiniTablePlatform_32Bit ? kRepToAlign32[rep] + : kRepToAlign64[rep]; } -const upb_FieldDef* upb_DefPool_FindExtensionByNameWithSize( - const upb_DefPool* s, const char* name, size_t size) { - upb_value v; - if (!upb_strtable_lookup2(&s->syms, name, size, &v)) return NULL; +static const char* upb_MtDecoder_DecodeOneofField(upb_MtDecoder* d, + const char* ptr, + char first_ch, + upb_LayoutItem* item) { + uint32_t field_num; + ptr = upb_MdDecoder_DecodeBase92Varint( + &d->base, ptr, first_ch, kUpb_EncodedValue_MinOneofField, + kUpb_EncodedValue_MaxOneofField, &field_num); + upb_MiniTableField* f = + (void*)upb_MiniTable_FindFieldByNumber(d->table, field_num); - switch (_upb_DefType_Type(v)) { - case UPB_DEFTYPE_FIELD: - return _upb_DefType_Unpack(v, UPB_DEFTYPE_FIELD); - case UPB_DEFTYPE_MSG: { - const upb_MessageDef* m = _upb_DefType_Unpack(v, UPB_DEFTYPE_MSG); - return _upb_MessageDef_InMessageSet(m) - ? upb_MessageDef_NestedExtension(m, 0) - : NULL; - } - default: - break; + if (!f) { + upb_MdDecoder_ErrorJmp(&d->base, + "Couldn't add field number %" PRIu32 + " to oneof, no such field number.", + field_num); + } + if (f->offset != kHasbitPresence) { + upb_MdDecoder_ErrorJmp( + &d->base, + "Cannot add repeated, required, or singular field %" PRIu32 + " to oneof.", + field_num); } - return NULL; + // Oneof storage must be large enough to accommodate the largest member. + int rep = f->mode >> kUpb_FieldRep_Shift; + if (upb_MtDecoder_SizeOfRep(rep, d->platform) > + upb_MtDecoder_SizeOfRep(item->rep, d->platform)) { + item->rep = rep; + } + // Prepend this field to the linked list. + f->offset = item->field_index; + item->field_index = (f - d->fields) + kOneofBase; + return ptr; } -const upb_FieldDef* upb_DefPool_FindExtensionByName(const upb_DefPool* s, - const char* sym) { - return upb_DefPool_FindExtensionByNameWithSize(s, sym, strlen(sym)); -} +static const char* upb_MtDecoder_DecodeOneofs(upb_MtDecoder* d, + const char* ptr) { + upb_LayoutItem item = {.rep = 0, + .field_index = kUpb_LayoutItem_IndexSentinel}; + while (ptr < d->base.end) { + char ch = *ptr++; + if (ch == kUpb_EncodedValue_FieldSeparator) { + // Field separator, no action needed. + } else if (ch == kUpb_EncodedValue_OneofSeparator) { + // End of oneof. + upb_MtDecoder_PushOneof(d, item); + item.field_index = kUpb_LayoutItem_IndexSentinel; // Move to next oneof. + } else { + ptr = upb_MtDecoder_DecodeOneofField(d, ptr, ch, &item); + } + } -const upb_ServiceDef* upb_DefPool_FindServiceByName(const upb_DefPool* s, - const char* name) { - return _upb_DefPool_Unpack(s, name, strlen(name), UPB_DEFTYPE_SERVICE); + // Push final oneof. + upb_MtDecoder_PushOneof(d, item); + return ptr; } -const upb_ServiceDef* upb_DefPool_FindServiceByNameWithSize( - const upb_DefPool* s, const char* name, size_t size) { - return _upb_DefPool_Unpack(s, name, size, UPB_DEFTYPE_SERVICE); +static const char* upb_MtDecoder_ParseModifier(upb_MtDecoder* d, + const char* ptr, char first_ch, + upb_MiniTableField* last_field, + uint64_t* msg_modifiers) { + uint32_t mod; + ptr = upb_MdDecoder_DecodeBase92Varint(&d->base, ptr, first_ch, + kUpb_EncodedValue_MinModifier, + kUpb_EncodedValue_MaxModifier, &mod); + if (last_field) { + upb_MtDecoder_ModifyField(d, *msg_modifiers, mod, last_field); + } else { + if (!d->table) { + upb_MdDecoder_ErrorJmp(&d->base, + "Extensions cannot have message modifiers"); + } + *msg_modifiers = mod; + } + + return ptr; } -const upb_FileDef* upb_DefPool_FindFileContainingSymbol(const upb_DefPool* s, - const char* name) { - upb_value v; - // TODO(haberman): non-extension fields and oneofs. - if (upb_strtable_lookup(&s->syms, name, &v)) { - switch (_upb_DefType_Type(v)) { - case UPB_DEFTYPE_EXT: { - const upb_FieldDef* f = _upb_DefType_Unpack(v, UPB_DEFTYPE_EXT); - return upb_FieldDef_File(f); +static void upb_MtDecoder_AllocateSubs(upb_MtDecoder* d, + upb_SubCounts sub_counts) { + uint32_t total_count = sub_counts.submsg_count + sub_counts.subenum_count; + size_t subs_bytes = sizeof(*d->table->subs) * total_count; + upb_MiniTableSub* subs = upb_Arena_Malloc(d->arena, subs_bytes); + upb_MdDecoder_CheckOutOfMemory(&d->base, subs); + uint32_t i = 0; + for (; i < sub_counts.submsg_count; i++) { + subs[i].submsg = &_kUpb_MiniTable_Empty; + } + if (sub_counts.subenum_count) { + upb_MiniTableField* f = d->fields; + upb_MiniTableField* end_f = f + d->table->field_count; + for (; f < end_f; f++) { + if (f->UPB_PRIVATE(descriptortype) == kUpb_FieldType_Enum) { + f->UPB_PRIVATE(submsg_index) += sub_counts.submsg_count; } - case UPB_DEFTYPE_MSG: { - const upb_MessageDef* m = _upb_DefType_Unpack(v, UPB_DEFTYPE_MSG); - return upb_MessageDef_File(m); + } + for (; i < sub_counts.submsg_count + sub_counts.subenum_count; i++) { + subs[i].subenum = NULL; + } + } + d->table->subs = subs; +} + +static const char* upb_MtDecoder_Parse(upb_MtDecoder* d, const char* ptr, + size_t len, void* fields, + size_t field_size, uint16_t* field_count, + upb_SubCounts* sub_counts) { + uint64_t msg_modifiers = 0; + uint32_t last_field_number = 0; + upb_MiniTableField* last_field = NULL; + bool need_dense_below = d->table != NULL; + + d->base.end = UPB_PTRADD(ptr, len); + + while (ptr < d->base.end) { + char ch = *ptr++; + if (ch <= kUpb_EncodedValue_MaxField) { + if (!d->table && last_field) { + // For extensions, consume only a single field and then return. + return --ptr; } - case UPB_DEFTYPE_ENUM: { - const upb_EnumDef* e = _upb_DefType_Unpack(v, UPB_DEFTYPE_ENUM); - return upb_EnumDef_File(e); + upb_MiniTableField* field = fields; + *field_count += 1; + fields = (char*)fields + field_size; + field->number = ++last_field_number; + last_field = field; + upb_MiniTable_SetField(d, ch, field, msg_modifiers, sub_counts); + } else if (kUpb_EncodedValue_MinModifier <= ch && + ch <= kUpb_EncodedValue_MaxModifier) { + ptr = upb_MtDecoder_ParseModifier(d, ptr, ch, last_field, &msg_modifiers); + if (msg_modifiers & kUpb_MessageModifier_IsExtendable) { + d->table->ext |= kUpb_ExtMode_Extendable; } - case UPB_DEFTYPE_ENUMVAL: { - const upb_EnumValueDef* ev = - _upb_DefType_Unpack(v, UPB_DEFTYPE_ENUMVAL); - return upb_EnumDef_File(upb_EnumValueDef_Enum(ev)); + } else if (ch == kUpb_EncodedValue_End) { + if (!d->table) { + upb_MdDecoder_ErrorJmp(&d->base, "Extensions cannot have oneofs."); } - case UPB_DEFTYPE_SERVICE: { - const upb_ServiceDef* service = - _upb_DefType_Unpack(v, UPB_DEFTYPE_SERVICE); - return upb_ServiceDef_File(service); + ptr = upb_MtDecoder_DecodeOneofs(d, ptr); + } else if (kUpb_EncodedValue_MinSkip <= ch && + ch <= kUpb_EncodedValue_MaxSkip) { + if (need_dense_below) { + d->table->dense_below = d->table->field_count; + need_dense_below = false; } - default: - UPB_UNREACHABLE(); + uint32_t skip; + ptr = upb_MdDecoder_DecodeBase92Varint(&d->base, ptr, ch, + kUpb_EncodedValue_MinSkip, + kUpb_EncodedValue_MaxSkip, &skip); + last_field_number += skip; + last_field_number--; // Next field seen will increment. + } else { + upb_MdDecoder_ErrorJmp(&d->base, "Invalid char: %c", ch); } } - const char* last_dot = strrchr(name, '.'); - if (last_dot) { - const upb_MessageDef* parent = - upb_DefPool_FindMessageByNameWithSize(s, name, last_dot - name); - if (parent) { - const char* shortname = last_dot + 1; - if (upb_MessageDef_FindByNameWithSize(parent, shortname, - strlen(shortname), NULL, NULL)) { - return upb_MessageDef_File(parent); - } - } + if (need_dense_below) { + d->table->dense_below = d->table->field_count; } - return NULL; + return ptr; } -static void remove_filedef(upb_DefPool* s, upb_FileDef* file) { - intptr_t iter = UPB_INTTABLE_BEGIN; - upb_StringView key; - upb_value val; - while (upb_strtable_next2(&s->syms, &key, &val, &iter)) { - const upb_FileDef* f; - switch (_upb_DefType_Type(val)) { - case UPB_DEFTYPE_EXT: - f = upb_FieldDef_File(_upb_DefType_Unpack(val, UPB_DEFTYPE_EXT)); - break; - case UPB_DEFTYPE_MSG: - f = upb_MessageDef_File(_upb_DefType_Unpack(val, UPB_DEFTYPE_MSG)); - break; - case UPB_DEFTYPE_ENUM: - f = upb_EnumDef_File(_upb_DefType_Unpack(val, UPB_DEFTYPE_ENUM)); - break; - case UPB_DEFTYPE_ENUMVAL: - f = upb_EnumDef_File(upb_EnumValueDef_Enum( - _upb_DefType_Unpack(val, UPB_DEFTYPE_ENUMVAL))); - break; - case UPB_DEFTYPE_SERVICE: - f = upb_ServiceDef_File(_upb_DefType_Unpack(val, UPB_DEFTYPE_SERVICE)); - break; - default: - UPB_UNREACHABLE(); - } +static void upb_MtDecoder_ParseMessage(upb_MtDecoder* d, const char* data, + size_t len) { + // Buffer length is an upper bound on the number of fields. We will return + // what we don't use. + d->fields = upb_Arena_Malloc(d->arena, sizeof(*d->fields) * len); + upb_MdDecoder_CheckOutOfMemory(&d->base, d->fields); - if (f == file) upb_strtable_removeiter(&s->syms, &iter); - } + upb_SubCounts sub_counts = {0, 0}; + d->table->field_count = 0; + d->table->fields = d->fields; + upb_MtDecoder_Parse(d, data, len, d->fields, sizeof(*d->fields), + &d->table->field_count, &sub_counts); + + upb_Arena_ShrinkLast(d->arena, d->fields, sizeof(*d->fields) * len, + sizeof(*d->fields) * d->table->field_count); + d->table->fields = d->fields; + upb_MtDecoder_AllocateSubs(d, sub_counts); } -static const upb_FileDef* upb_DefBuilder_AddFileToPool( - upb_DefBuilder* const builder, upb_DefPool* const s, - const UPB_DESC(FileDescriptorProto) * const file_proto, - const upb_StringView name, upb_Status* const status) { - if (UPB_SETJMP(builder->err) != 0) { - UPB_ASSERT(!upb_Status_IsOk(status)); - if (builder->file) { - remove_filedef(s, builder->file); - builder->file = NULL; - } - } else if (!builder->arena || !builder->tmp_arena) { - _upb_DefBuilder_OomErr(builder); - } else { - _upb_FileDef_Create(builder, file_proto); - upb_strtable_insert(&s->files, name.data, name.size, - upb_value_constptr(builder->file), builder->arena); - UPB_ASSERT(upb_Status_IsOk(status)); - upb_Arena_Fuse(s->arena, builder->arena); +int upb_MtDecoder_CompareFields(const void* _a, const void* _b) { + const upb_LayoutItem* a = _a; + const upb_LayoutItem* b = _b; + // Currently we just sort by: + // 1. rep (smallest fields first) + // 2. type (oneof cases first) + // 2. field_index (smallest numbers first) + // The main goal of this is to reduce space lost to padding. + // Later we may have more subtle reasons to prefer a different ordering. + const int rep_bits = upb_Log2Ceiling(kUpb_FieldRep_Max); + const int type_bits = upb_Log2Ceiling(kUpb_LayoutItemType_Max); + const int idx_bits = (sizeof(a->field_index) * 8); + UPB_ASSERT(idx_bits + rep_bits + type_bits < 32); +#define UPB_COMBINE(rep, ty, idx) (((rep << type_bits) | ty) << idx_bits) | idx + uint32_t a_packed = UPB_COMBINE(a->rep, a->type, a->field_index); + uint32_t b_packed = UPB_COMBINE(b->rep, b->type, b->field_index); + assert(a_packed != b_packed); +#undef UPB_COMBINE + return a_packed < b_packed ? -1 : 1; +} + +static bool upb_MtDecoder_SortLayoutItems(upb_MtDecoder* d) { + // Add items for all non-oneof fields (oneofs were already added). + int n = d->table->field_count; + for (int i = 0; i < n; i++) { + upb_MiniTableField* f = &d->fields[i]; + if (f->offset >= kOneofBase) continue; + upb_LayoutItem item = {.field_index = i, + .rep = f->mode >> kUpb_FieldRep_Shift, + .type = kUpb_LayoutItemType_Field}; + upb_MtDecoder_PushItem(d, item); } - if (builder->arena) upb_Arena_Free(builder->arena); - if (builder->tmp_arena) upb_Arena_Free(builder->tmp_arena); - return builder->file; + if (d->vec.size) { + qsort(d->vec.data, d->vec.size, sizeof(*d->vec.data), + upb_MtDecoder_CompareFields); + } + + return true; } -static const upb_FileDef* _upb_DefPool_AddFile( - upb_DefPool* s, const UPB_DESC(FileDescriptorProto) * file_proto, - const upb_MiniTableFile* layout, upb_Status* status) { - const upb_StringView name = UPB_DESC(FileDescriptorProto_name)(file_proto); +static size_t upb_MiniTable_DivideRoundUp(size_t n, size_t d) { + return (n + d - 1) / d; +} - // Determine whether we already know about this file. - { - upb_value v; - if (upb_strtable_lookup2(&s->files, name.data, name.size, &v)) { - upb_Status_SetErrorFormat(status, - "duplicate file name " UPB_STRINGVIEW_FORMAT, - UPB_STRINGVIEW_ARGS(name)); - return NULL; +static void upb_MtDecoder_AssignHasbits(upb_MtDecoder* d) { + upb_MiniTable* ret = d->table; + int n = ret->field_count; + int last_hasbit = 0; // 0 cannot be used. + + // First assign required fields, which must have the lowest hasbits. + for (int i = 0; i < n; i++) { + upb_MiniTableField* field = (upb_MiniTableField*)&ret->fields[i]; + if (field->offset == kRequiredPresence) { + field->presence = ++last_hasbit; + } else if (field->offset == kNoPresence) { + field->presence = 0; } } + ret->required_count = last_hasbit; - upb_DefBuilder ctx = { - .symtab = s, - .layout = layout, - .platform = s->platform, - .msg_count = 0, - .enum_count = 0, - .ext_count = 0, - .status = status, - .file = NULL, - .arena = upb_Arena_New(), - .tmp_arena = upb_Arena_New(), - }; + if (ret->required_count > 63) { + upb_MdDecoder_ErrorJmp(&d->base, "Too many required fields"); + } - return upb_DefBuilder_AddFileToPool(&ctx, s, file_proto, name, status); -} + // Next assign non-required hasbit fields. + for (int i = 0; i < n; i++) { + upb_MiniTableField* field = (upb_MiniTableField*)&ret->fields[i]; + if (field->offset == kHasbitPresence) { + field->presence = ++last_hasbit; + } + } -const upb_FileDef* upb_DefPool_AddFile(upb_DefPool* s, - const UPB_DESC(FileDescriptorProto) * - file_proto, - upb_Status* status) { - return _upb_DefPool_AddFile(s, file_proto, NULL, status); + ret->size = last_hasbit ? upb_MiniTable_DivideRoundUp(last_hasbit + 1, 8) : 0; } -bool _upb_DefPool_LoadDefInitEx(upb_DefPool* s, const _upb_DefPool_Init* init, - bool rebuild_minitable) { - /* Since this function should never fail (it would indicate a bug in upb) we - * print errors to stderr instead of returning error status to the user. */ - _upb_DefPool_Init** deps = init->deps; - UPB_DESC(FileDescriptorProto) * file; - upb_Arena* arena; - upb_Status status; - - upb_Status_Clear(&status); - - if (upb_DefPool_FindFileByName(s, init->filename)) { - return true; +size_t upb_MtDecoder_Place(upb_MtDecoder* d, upb_FieldRep rep) { + size_t size = upb_MtDecoder_SizeOfRep(rep, d->platform); + size_t align = upb_MtDecoder_AlignOfRep(rep, d->platform); + size_t ret = UPB_ALIGN_UP(d->table->size, align); + static const size_t max = UINT16_MAX; + size_t new_size = ret + size; + if (new_size > max) { + upb_MdDecoder_ErrorJmp( + &d->base, "Message size exceeded maximum size of %zu bytes", max); } + d->table->size = new_size; + return ret; +} - arena = upb_Arena_New(); +static void upb_MtDecoder_AssignOffsets(upb_MtDecoder* d) { + upb_LayoutItem* end = UPB_PTRADD(d->vec.data, d->vec.size); - for (; *deps; deps++) { - if (!_upb_DefPool_LoadDefInitEx(s, *deps, rebuild_minitable)) goto err; + // Compute offsets. + for (upb_LayoutItem* item = d->vec.data; item < end; item++) { + item->offset = upb_MtDecoder_Place(d, item->rep); } - file = UPB_DESC(FileDescriptorProto_parse_ex)( - init->descriptor.data, init->descriptor.size, NULL, - kUpb_DecodeOption_AliasString, arena); - s->bytes_loaded += init->descriptor.size; - - if (!file) { - upb_Status_SetErrorFormat( - &status, - "Failed to parse compiled-in descriptor for file '%s'. This should " - "never happen.", - init->filename); - goto err; + // Assign oneof case offsets. We must do these first, since assigning + // actual offsets will overwrite the links of the linked list. + for (upb_LayoutItem* item = d->vec.data; item < end; item++) { + if (item->type != kUpb_LayoutItemType_OneofCase) continue; + upb_MiniTableField* f = &d->fields[item->field_index]; + while (true) { + f->presence = ~item->offset; + if (f->offset == kUpb_LayoutItem_IndexSentinel) break; + UPB_ASSERT(f->offset - kOneofBase < d->table->field_count); + f = &d->fields[f->offset - kOneofBase]; + } } - const upb_MiniTableFile* mt = rebuild_minitable ? NULL : init->layout; - if (!_upb_DefPool_AddFile(s, file, mt, &status)) { - goto err; + // Assign offsets. + for (upb_LayoutItem* item = d->vec.data; item < end; item++) { + upb_MiniTableField* f = &d->fields[item->field_index]; + switch (item->type) { + case kUpb_LayoutItemType_OneofField: + while (true) { + uint16_t next_offset = f->offset; + f->offset = item->offset; + if (next_offset == kUpb_LayoutItem_IndexSentinel) break; + f = &d->fields[next_offset - kOneofBase]; + } + break; + case kUpb_LayoutItemType_Field: + f->offset = item->offset; + break; + default: + break; + } } - upb_Arena_Free(arena); - return true; - -err: - fprintf(stderr, - "Error loading compiled-in descriptor for file '%s' (this should " - "never happen): %s\n", - init->filename, upb_Status_ErrorMessage(&status)); - upb_Arena_Free(arena); - return false; + // The fasttable parser (supported on 64-bit only) depends on this being a + // multiple of 8 in order to satisfy UPB_MALLOC_ALIGN, which is also 8. + // + // On 32-bit we could potentially make this smaller, but there is no + // compelling reason to optimize this right now. + d->table->size = UPB_ALIGN_UP(d->table->size, 8); } -size_t _upb_DefPool_BytesLoaded(const upb_DefPool* s) { - return s->bytes_loaded; -} +static void upb_MtDecoder_ValidateEntryField(upb_MtDecoder* d, + const upb_MiniTableField* f, + uint32_t expected_num) { + const char* name = expected_num == 1 ? "key" : "val"; + if (f->number != expected_num) { + upb_MdDecoder_ErrorJmp(&d->base, + "map %s did not have expected number (%d vs %d)", + name, expected_num, (int)f->number); + } -upb_Arena* _upb_DefPool_Arena(const upb_DefPool* s) { return s->arena; } + if (upb_IsRepeatedOrMap(f)) { + upb_MdDecoder_ErrorJmp( + &d->base, "map %s cannot be repeated or map, or be in oneof", name); + } -const upb_FieldDef* upb_DefPool_FindExtensionByMiniTable( - const upb_DefPool* s, const upb_MiniTableExtension* ext) { - upb_value v; - bool ok = upb_inttable_lookup(&s->exts, (uintptr_t)ext, &v); - UPB_ASSERT(ok); - return upb_value_getconstptr(v); -} + uint32_t not_ok_types; + if (expected_num == 1) { + not_ok_types = (1 << kUpb_FieldType_Float) | (1 << kUpb_FieldType_Double) | + (1 << kUpb_FieldType_Message) | (1 << kUpb_FieldType_Group) | + (1 << kUpb_FieldType_Bytes) | (1 << kUpb_FieldType_Enum); + } else { + not_ok_types = 1 << kUpb_FieldType_Group; + } -const upb_FieldDef* upb_DefPool_FindExtensionByNumber(const upb_DefPool* s, - const upb_MessageDef* m, - int32_t fieldnum) { - const upb_MiniTable* t = upb_MessageDef_MiniTable(m); - const upb_MiniTableExtension* ext = - upb_ExtensionRegistry_Lookup(s->extreg, t, fieldnum); - return ext ? upb_DefPool_FindExtensionByMiniTable(s, ext) : NULL; + if ((1 << upb_MiniTableField_Type(f)) & not_ok_types) { + upb_MdDecoder_ErrorJmp(&d->base, "map %s cannot have type %d", name, + (int)f->UPB_PRIVATE(descriptortype)); + } } -const upb_ExtensionRegistry* upb_DefPool_ExtensionRegistry( - const upb_DefPool* s) { - return s->extreg; -} +static void upb_MtDecoder_ParseMap(upb_MtDecoder* d, const char* data, + size_t len) { + upb_MtDecoder_ParseMessage(d, data, len); + upb_MtDecoder_AssignHasbits(d); -const upb_FieldDef** upb_DefPool_GetAllExtensions(const upb_DefPool* s, - const upb_MessageDef* m, - size_t* count) { - size_t n = 0; - intptr_t iter = UPB_INTTABLE_BEGIN; - uintptr_t key; - upb_value val; - // This is O(all exts) instead of O(exts for m). If we need this to be - // efficient we may need to make extreg into a two-level table, or have a - // second per-message index. - while (upb_inttable_next(&s->exts, &key, &val, &iter)) { - const upb_FieldDef* f = upb_value_getconstptr(val); - if (upb_FieldDef_ContainingType(f) == m) n++; - } - const upb_FieldDef** exts = malloc(n * sizeof(*exts)); - iter = UPB_INTTABLE_BEGIN; - size_t i = 0; - while (upb_inttable_next(&s->exts, &key, &val, &iter)) { - const upb_FieldDef* f = upb_value_getconstptr(val); - if (upb_FieldDef_ContainingType(f) == m) exts[i++] = f; + if (UPB_UNLIKELY(d->table->field_count != 2)) { + upb_MdDecoder_ErrorJmp(&d->base, "%hu fields in map", + d->table->field_count); + UPB_UNREACHABLE(); } - *count = n; - return exts; -} -bool _upb_DefPool_LoadDefInit(upb_DefPool* s, const _upb_DefPool_Init* init) { - return _upb_DefPool_LoadDefInitEx(s, init, false); -} + upb_LayoutItem* end = UPB_PTRADD(d->vec.data, d->vec.size); + for (upb_LayoutItem* item = d->vec.data; item < end; item++) { + if (item->type == kUpb_LayoutItemType_OneofCase) { + upb_MdDecoder_ErrorJmp(&d->base, "Map entry cannot have oneof"); + } + } + upb_MtDecoder_ValidateEntryField(d, &d->table->fields[0], 1); + upb_MtDecoder_ValidateEntryField(d, &d->table->fields[1], 2); -// Must be last. + // Map entries have a pre-determined layout, regardless of types. + // NOTE: sync with mini_table/message_internal.h. + const size_t kv_size = d->platform == kUpb_MiniTablePlatform_32Bit ? 8 : 16; + const size_t hasbit_size = 8; + d->fields[0].offset = hasbit_size; + d->fields[1].offset = hasbit_size + kv_size; + d->table->size = UPB_ALIGN_UP(hasbit_size + kv_size + kv_size, 8); -upb_deftype_t _upb_DefType_Type(upb_value v) { - const uintptr_t num = (uintptr_t)upb_value_getconstptr(v); - return num & UPB_DEFTYPE_MASK; + // Map entries have a special bit set to signal it's a map entry, used in + // upb_MiniTable_SetSubMessage() below. + d->table->ext |= kUpb_ExtMode_IsMapEntry; } -upb_value _upb_DefType_Pack(const void* ptr, upb_deftype_t type) { - uintptr_t num = (uintptr_t)ptr; - UPB_ASSERT((num & UPB_DEFTYPE_MASK) == 0); - num |= type; - return upb_value_constptr((const void*)num); -} +static void upb_MtDecoder_ParseMessageSet(upb_MtDecoder* d, const char* data, + size_t len) { + if (len > 0) { + upb_MdDecoder_ErrorJmp(&d->base, "Invalid message set encode length: %zu", + len); + } -const void* _upb_DefType_Unpack(upb_value v, upb_deftype_t type) { - uintptr_t num = (uintptr_t)upb_value_getconstptr(v); - return (num & UPB_DEFTYPE_MASK) == type - ? (const void*)(num & ~UPB_DEFTYPE_MASK) - : NULL; + upb_MiniTable* ret = d->table; + ret->size = 0; + ret->field_count = 0; + ret->ext = kUpb_ExtMode_IsMessageSet; + ret->dense_below = 0; + ret->table_mask = -1; + ret->required_count = 0; } +static upb_MiniTable* upb_MtDecoder_DoBuildMiniTableWithBuf( + upb_MtDecoder* decoder, const char* data, size_t len, void** buf, + size_t* buf_size) { + upb_MdDecoder_CheckOutOfMemory(&decoder->base, decoder->table); -// Must be last. + decoder->table->size = 0; + decoder->table->field_count = 0; + decoder->table->ext = kUpb_ExtMode_NonExtendable; + decoder->table->dense_below = 0; + decoder->table->table_mask = -1; + decoder->table->required_count = 0; -bool _upb_DescState_Grow(upb_DescState* d, upb_Arena* a) { - const size_t oldbufsize = d->bufsize; - const int used = d->ptr - d->buf; + // Strip off and verify the version tag. + if (!len--) goto done; + const char vers = *data++; - if (!d->buf) { - d->buf = upb_Arena_Malloc(a, d->bufsize); - if (!d->buf) return false; - d->ptr = d->buf; - d->e.end = d->buf + d->bufsize; - } + switch (vers) { + case kUpb_EncodedVersion_MapV1: + upb_MtDecoder_ParseMap(decoder, data, len); + break; - if (oldbufsize - used < kUpb_MtDataEncoder_MinSize) { - d->bufsize *= 2; - d->buf = upb_Arena_Realloc(a, d->buf, oldbufsize, d->bufsize); - if (!d->buf) return false; - d->ptr = d->buf + used; - d->e.end = d->buf + d->bufsize; - } + case kUpb_EncodedVersion_MessageV1: + upb_MtDecoder_ParseMessage(decoder, data, len); + upb_MtDecoder_AssignHasbits(decoder); + upb_MtDecoder_SortLayoutItems(decoder); + upb_MtDecoder_AssignOffsets(decoder); + break; - return true; -} + case kUpb_EncodedVersion_MessageSetV1: + upb_MtDecoder_ParseMessageSet(decoder, data, len); + break; + default: + upb_MdDecoder_ErrorJmp(&decoder->base, "Invalid message version: %c", + vers); + } -// Must be last. +done: + *buf = decoder->vec.data; + *buf_size = decoder->vec.capacity * sizeof(*decoder->vec.data); + return decoder->table; +} -struct upb_EnumDef { - const UPB_DESC(EnumOptions) * opts; - const upb_MiniTableEnum* layout; // Only for proto2. - const upb_FileDef* file; - const upb_MessageDef* containing_type; // Could be merged with "file". - const char* full_name; - upb_strtable ntoi; - upb_inttable iton; - const upb_EnumValueDef* values; - const upb_EnumReservedRange* res_ranges; - const upb_StringView* res_names; - int value_count; - int res_range_count; - int res_name_count; - int32_t defaultval; - bool is_closed; - bool is_sorted; // Whether all of the values are defined in ascending order. -}; +static upb_MiniTable* upb_MtDecoder_BuildMiniTableWithBuf( + upb_MtDecoder* const decoder, const char* const data, const size_t len, + void** const buf, size_t* const buf_size) { + if (UPB_SETJMP(decoder->base.err) != 0) { + *buf = decoder->vec.data; + *buf_size = decoder->vec.capacity * sizeof(*decoder->vec.data); + return NULL; + } -upb_EnumDef* _upb_EnumDef_At(const upb_EnumDef* e, int i) { - return (upb_EnumDef*)&e[i]; + return upb_MtDecoder_DoBuildMiniTableWithBuf(decoder, data, len, buf, + buf_size); } -const upb_MiniTableEnum* _upb_EnumDef_MiniTable(const upb_EnumDef* e) { - return e->layout; -} +upb_MiniTable* upb_MiniTable_BuildWithBuf(const char* data, size_t len, + upb_MiniTablePlatform platform, + upb_Arena* arena, void** buf, + size_t* buf_size, + upb_Status* status) { + upb_MtDecoder decoder = { + .base = {.status = status}, + .platform = platform, + .vec = + { + .data = *buf, + .capacity = *buf_size / sizeof(*decoder.vec.data), + .size = 0, + }, + .arena = arena, + .table = upb_Arena_Malloc(arena, sizeof(*decoder.table)), + }; -bool _upb_EnumDef_Insert(upb_EnumDef* e, upb_EnumValueDef* v, upb_Arena* a) { - const char* name = upb_EnumValueDef_Name(v); - const upb_value val = upb_value_constptr(v); - bool ok = upb_strtable_insert(&e->ntoi, name, strlen(name), val, a); - if (!ok) return false; + return upb_MtDecoder_BuildMiniTableWithBuf(&decoder, data, len, buf, + buf_size); +} - // Multiple enumerators can have the same number, first one wins. - const int number = upb_EnumValueDef_Number(v); - if (!upb_inttable_lookup(&e->iton, number, NULL)) { - return upb_inttable_insert(&e->iton, number, val, a); +static const char* upb_MtDecoder_DoBuildMiniTableExtension( + upb_MtDecoder* decoder, const char* data, size_t len, + upb_MiniTableExtension* ext, const upb_MiniTable* extendee, + upb_MiniTableSub sub) { + // If the string is non-empty then it must begin with a version tag. + if (len) { + if (*data != kUpb_EncodedVersion_ExtensionV1) { + upb_MdDecoder_ErrorJmp(&decoder->base, "Invalid ext version: %c", *data); + } + data++; + len--; } - return true; -} -const UPB_DESC(EnumOptions) * upb_EnumDef_Options(const upb_EnumDef* e) { - return e->opts; -} + uint16_t count = 0; + upb_SubCounts sub_counts = {0, 0}; + const char* ret = upb_MtDecoder_Parse(decoder, data, len, ext, sizeof(*ext), + &count, &sub_counts); + if (!ret || count != 1) return NULL; -bool upb_EnumDef_HasOptions(const upb_EnumDef* e) { - return e->opts != (void*)kUpbDefOptDefault; -} + upb_MiniTableField* f = &ext->field; -const char* upb_EnumDef_FullName(const upb_EnumDef* e) { return e->full_name; } + f->mode |= kUpb_LabelFlags_IsExtension; + f->offset = 0; + f->presence = 0; -const char* upb_EnumDef_Name(const upb_EnumDef* e) { - return _upb_DefBuilder_FullToShort(e->full_name); -} + if (extendee->ext & kUpb_ExtMode_IsMessageSet) { + // Extensions of MessageSet must be messages. + if (!upb_IsSubMessage(f)) return NULL; -const upb_FileDef* upb_EnumDef_File(const upb_EnumDef* e) { return e->file; } + // Extensions of MessageSet must be non-repeating. + if ((f->mode & kUpb_FieldMode_Mask) == kUpb_FieldMode_Array) return NULL; + } -const upb_MessageDef* upb_EnumDef_ContainingType(const upb_EnumDef* e) { - return e->containing_type; -} + ext->extendee = extendee; + ext->sub = sub; -int32_t upb_EnumDef_Default(const upb_EnumDef* e) { - UPB_ASSERT(upb_EnumDef_FindValueByNumber(e, e->defaultval)); - return e->defaultval; + return ret; } -int upb_EnumDef_ReservedRangeCount(const upb_EnumDef* e) { - return e->res_range_count; +static const char* upb_MtDecoder_BuildMiniTableExtension( + upb_MtDecoder* const decoder, const char* const data, const size_t len, + upb_MiniTableExtension* const ext, const upb_MiniTable* const extendee, + const upb_MiniTableSub sub) { + if (UPB_SETJMP(decoder->base.err) != 0) return NULL; + return upb_MtDecoder_DoBuildMiniTableExtension(decoder, data, len, ext, + extendee, sub); } -const upb_EnumReservedRange* upb_EnumDef_ReservedRange(const upb_EnumDef* e, - int i) { - UPB_ASSERT(0 <= i && i < e->res_range_count); - return _upb_EnumReservedRange_At(e->res_ranges, i); -} +const char* _upb_MiniTableExtension_Init(const char* data, size_t len, + upb_MiniTableExtension* ext, + const upb_MiniTable* extendee, + upb_MiniTableSub sub, + upb_MiniTablePlatform platform, + upb_Status* status) { + upb_MtDecoder decoder = { + .base = {.status = status}, + .arena = NULL, + .table = NULL, + .platform = platform, + }; -int upb_EnumDef_ReservedNameCount(const upb_EnumDef* e) { - return e->res_name_count; + return upb_MtDecoder_BuildMiniTableExtension(&decoder, data, len, ext, + extendee, sub); } -upb_StringView upb_EnumDef_ReservedName(const upb_EnumDef* e, int i) { - UPB_ASSERT(0 <= i && i < e->res_name_count); - return e->res_names[i]; -} +upb_MiniTableExtension* _upb_MiniTableExtension_Build( + const char* data, size_t len, const upb_MiniTable* extendee, + upb_MiniTableSub sub, upb_MiniTablePlatform platform, upb_Arena* arena, + upb_Status* status) { + upb_MiniTableExtension* ext = + upb_Arena_Malloc(arena, sizeof(upb_MiniTableExtension)); + if (UPB_UNLIKELY(!ext)) return NULL; -int upb_EnumDef_ValueCount(const upb_EnumDef* e) { return e->value_count; } + const char* ptr = _upb_MiniTableExtension_Init(data, len, ext, extendee, sub, + platform, status); + if (UPB_UNLIKELY(!ptr)) return NULL; -const upb_EnumValueDef* upb_EnumDef_FindValueByName(const upb_EnumDef* e, - const char* name) { - return upb_EnumDef_FindValueByNameWithSize(e, name, strlen(name)); + return ext; } -const upb_EnumValueDef* upb_EnumDef_FindValueByNameWithSize( - const upb_EnumDef* e, const char* name, size_t size) { - upb_value v; - return upb_strtable_lookup2(&e->ntoi, name, size, &v) - ? upb_value_getconstptr(v) - : NULL; +upb_MiniTable* _upb_MiniTable_Build(const char* data, size_t len, + upb_MiniTablePlatform platform, + upb_Arena* arena, upb_Status* status) { + void* buf = NULL; + size_t size = 0; + upb_MiniTable* ret = upb_MiniTable_BuildWithBuf(data, len, platform, arena, + &buf, &size, status); + free(buf); + return ret; } -const upb_EnumValueDef* upb_EnumDef_FindValueByNumber(const upb_EnumDef* e, - int32_t num) { - upb_value v; - return upb_inttable_lookup(&e->iton, num, &v) ? upb_value_getconstptr(v) - : NULL; -} - -bool upb_EnumDef_CheckNumber(const upb_EnumDef* e, int32_t num) { - // We could use upb_EnumDef_FindValueByNumber(e, num) != NULL, but we expect - // this to be faster (especially for small numbers). - return upb_MiniTableEnum_CheckValue(e->layout, num); -} - -const upb_EnumValueDef* upb_EnumDef_Value(const upb_EnumDef* e, int i) { - UPB_ASSERT(0 <= i && i < e->value_count); - return _upb_EnumValueDef_At(e->values, i); -} -bool upb_EnumDef_IsClosed(const upb_EnumDef* e) { return e->is_closed; } +// Must be last. -bool upb_EnumDef_MiniDescriptorEncode(const upb_EnumDef* e, upb_Arena* a, - upb_StringView* out) { - upb_DescState s; - _upb_DescState_Init(&s); +bool upb_MiniTable_SetSubMessage(upb_MiniTable* table, + upb_MiniTableField* field, + const upb_MiniTable* sub) { + UPB_ASSERT((uintptr_t)table->fields <= (uintptr_t)field && + (uintptr_t)field < + (uintptr_t)(table->fields + table->field_count)); + UPB_ASSERT(sub); - const upb_EnumValueDef** sorted = NULL; - if (!e->is_sorted) { - sorted = _upb_EnumValueDefs_Sorted(e->values, e->value_count, a); - if (!sorted) return false; - } + const bool sub_is_map = sub->ext & kUpb_ExtMode_IsMapEntry; - if (!_upb_DescState_Grow(&s, a)) return false; - s.ptr = upb_MtDataEncoder_StartEnum(&s.e, s.ptr); + switch (field->UPB_PRIVATE(descriptortype)) { + case kUpb_FieldType_Message: + if (sub_is_map) { + const bool table_is_map = table->ext & kUpb_ExtMode_IsMapEntry; + if (UPB_UNLIKELY(table_is_map)) return false; - // Duplicate values are allowed but we only encode each value once. - uint32_t previous = 0; + field->mode = (field->mode & ~kUpb_FieldMode_Mask) | kUpb_FieldMode_Map; + } + break; - for (size_t i = 0; i < e->value_count; i++) { - const uint32_t current = - upb_EnumValueDef_Number(sorted ? sorted[i] : upb_EnumDef_Value(e, i)); - if (i != 0 && previous == current) continue; + case kUpb_FieldType_Group: + if (UPB_UNLIKELY(sub_is_map)) return false; + break; - if (!_upb_DescState_Grow(&s, a)) return false; - s.ptr = upb_MtDataEncoder_PutEnumValue(&s.e, s.ptr, current); - previous = current; + default: + return false; } - if (!_upb_DescState_Grow(&s, a)) return false; - s.ptr = upb_MtDataEncoder_EndEnum(&s.e, s.ptr); + upb_MiniTableSub* table_sub = + (void*)&table->subs[field->UPB_PRIVATE(submsg_index)]; + // TODO(haberman): Add this assert back once YouTube is updated to not call + // this function repeatedly. + // UPB_ASSERT(table_sub->submsg == &_kUpb_MiniTable_Empty); + table_sub->submsg = sub; + return true; +} - // There will always be room for this '\0' in the encoder buffer because - // kUpb_MtDataEncoder_MinSize is overkill for upb_MtDataEncoder_EndEnum(). - UPB_ASSERT(s.ptr < s.buf + s.bufsize); - *s.ptr = '\0'; +bool upb_MiniTable_SetSubEnum(upb_MiniTable* table, upb_MiniTableField* field, + const upb_MiniTableEnum* sub) { + UPB_ASSERT((uintptr_t)table->fields <= (uintptr_t)field && + (uintptr_t)field < + (uintptr_t)(table->fields + table->field_count)); + UPB_ASSERT(sub); - out->data = s.buf; - out->size = s.ptr - s.buf; + upb_MiniTableSub* table_sub = + (void*)&table->subs[field->UPB_PRIVATE(submsg_index)]; + table_sub->subenum = sub; return true; } -static upb_MiniTableEnum* create_enumlayout(upb_DefBuilder* ctx, - const upb_EnumDef* e) { - upb_StringView sv; - bool ok = upb_EnumDef_MiniDescriptorEncode(e, ctx->tmp_arena, &sv); - if (!ok) _upb_DefBuilder_Errf(ctx, "OOM while building enum MiniDescriptor"); +uint32_t upb_MiniTable_GetSubList(const upb_MiniTable* mt, + const upb_MiniTableField** subs) { + uint32_t msg_count = 0; + uint32_t enum_count = 0; - upb_Status status; - upb_MiniTableEnum* layout = - upb_MiniTableEnum_Build(sv.data, sv.size, ctx->arena, &status); - if (!layout) - _upb_DefBuilder_Errf(ctx, "Error building enum MiniTable: %s", status.msg); - return layout; -} + for (int i = 0; i < mt->field_count; i++) { + const upb_MiniTableField* f = &mt->fields[i]; + if (upb_MiniTableField_CType(f) == kUpb_CType_Message) { + *subs = f; + ++subs; + msg_count++; + } + } -static upb_StringView* _upb_EnumReservedNames_New( - upb_DefBuilder* ctx, int n, const upb_StringView* protos) { - upb_StringView* sv = _upb_DefBuilder_Alloc(ctx, sizeof(upb_StringView) * n); - for (size_t i = 0; i < n; i++) { - sv[i].data = - upb_strdup2(protos[i].data, protos[i].size, _upb_DefBuilder_Arena(ctx)); - sv[i].size = protos[i].size; + for (int i = 0; i < mt->field_count; i++) { + const upb_MiniTableField* f = &mt->fields[i]; + if (upb_MiniTableField_CType(f) == kUpb_CType_Enum) { + *subs = f; + ++subs; + enum_count++; + } } - return sv; -} -static void create_enumdef(upb_DefBuilder* ctx, const char* prefix, - const UPB_DESC(EnumDescriptorProto) * enum_proto, - upb_EnumDef* e) { - const UPB_DESC(EnumValueDescriptorProto)* const* values; - const UPB_DESC(EnumDescriptorProto_EnumReservedRange)* const* res_ranges; - const upb_StringView* res_names; - upb_StringView name; - size_t n_value, n_res_range, n_res_name; + return (msg_count << 16) | enum_count; +} - // Must happen before _upb_DefBuilder_Add() - e->file = _upb_DefBuilder_File(ctx); +// The list of sub_tables and sub_enums must exactly match the number and order +// of sub-message fields and sub-enum fields given by upb_MiniTable_GetSubList() +// above. +bool upb_MiniTable_Link(upb_MiniTable* mt, const upb_MiniTable** sub_tables, + size_t sub_table_count, + const upb_MiniTableEnum** sub_enums, + size_t sub_enum_count) { + uint32_t msg_count = 0; + uint32_t enum_count = 0; - name = UPB_DESC(EnumDescriptorProto_name)(enum_proto); + for (int i = 0; i < mt->field_count; i++) { + upb_MiniTableField* f = (upb_MiniTableField*)&mt->fields[i]; + if (upb_MiniTableField_CType(f) == kUpb_CType_Message) { + const upb_MiniTable* sub = sub_tables[msg_count++]; + if (msg_count > sub_table_count) return false; + if (sub != NULL) { + if (!upb_MiniTable_SetSubMessage(mt, f, sub)) return false; + } + } + } - e->full_name = _upb_DefBuilder_MakeFullName(ctx, prefix, name); - _upb_DefBuilder_Add(ctx, e->full_name, - _upb_DefType_Pack(e, UPB_DEFTYPE_ENUM)); + for (int i = 0; i < mt->field_count; i++) { + upb_MiniTableField* f = (upb_MiniTableField*)&mt->fields[i]; + if (upb_MiniTableField_IsClosedEnum(f)) { + const upb_MiniTableEnum* sub = sub_enums[enum_count++]; + if (enum_count > sub_enum_count) return false; + if (sub != NULL) { + if (!upb_MiniTable_SetSubEnum(mt, f, sub)) return false; + } + } + } - e->is_closed = (!UPB_TREAT_PROTO2_ENUMS_LIKE_PROTO3) && - (upb_FileDef_Syntax(e->file) == kUpb_Syntax_Proto2); + return true; +} - values = UPB_DESC(EnumDescriptorProto_value)(enum_proto, &n_value); - bool ok = upb_strtable_init(&e->ntoi, n_value, ctx->arena); - if (!ok) _upb_DefBuilder_OomErr(ctx); - ok = upb_inttable_init(&e->iton, ctx->arena); - if (!ok) _upb_DefBuilder_OomErr(ctx); +// Must be last. - e->defaultval = 0; - e->value_count = n_value; - e->values = - _upb_EnumValueDefs_New(ctx, prefix, n_value, values, e, &e->is_sorted); +typedef struct { + uint64_t present_values_mask; + uint32_t last_written_value; +} upb_MtDataEncoderInternal_EnumState; - if (n_value == 0) { - _upb_DefBuilder_Errf(ctx, "enums must contain at least one value (%s)", - e->full_name); - } +typedef struct { + uint64_t msg_modifiers; + uint32_t last_field_num; + enum { + kUpb_OneofState_NotStarted, + kUpb_OneofState_StartedOneof, + kUpb_OneofState_EmittedOneofField, + } oneof_state; +} upb_MtDataEncoderInternal_MsgState; - res_ranges = - UPB_DESC(EnumDescriptorProto_reserved_range)(enum_proto, &n_res_range); - e->res_range_count = n_res_range; - e->res_ranges = _upb_EnumReservedRanges_New(ctx, n_res_range, res_ranges, e); +typedef struct { + char* buf_start; // Only for checking kUpb_MtDataEncoder_MinSize. + union { + upb_MtDataEncoderInternal_EnumState enum_state; + upb_MtDataEncoderInternal_MsgState msg_state; + } state; +} upb_MtDataEncoderInternal; - res_names = - UPB_DESC(EnumDescriptorProto_reserved_name)(enum_proto, &n_res_name); - e->res_name_count = n_res_name; - e->res_names = _upb_EnumReservedNames_New(ctx, n_res_name, res_names); +static upb_MtDataEncoderInternal* upb_MtDataEncoder_GetInternal( + upb_MtDataEncoder* e, char* buf_start) { + UPB_ASSERT(sizeof(upb_MtDataEncoderInternal) <= sizeof(e->internal)); + upb_MtDataEncoderInternal* ret = (upb_MtDataEncoderInternal*)e->internal; + ret->buf_start = buf_start; + return ret; +} - UPB_DEF_SET_OPTIONS(e->opts, EnumDescriptorProto, EnumOptions, enum_proto); +static char* upb_MtDataEncoder_PutRaw(upb_MtDataEncoder* e, char* ptr, + char ch) { + upb_MtDataEncoderInternal* in = (upb_MtDataEncoderInternal*)e->internal; + UPB_ASSERT(ptr - in->buf_start < kUpb_MtDataEncoder_MinSize); + if (ptr == e->end) return NULL; + *ptr++ = ch; + return ptr; +} - upb_inttable_compact(&e->iton, ctx->arena); +static char* upb_MtDataEncoder_Put(upb_MtDataEncoder* e, char* ptr, char ch) { + return upb_MtDataEncoder_PutRaw(e, ptr, _upb_ToBase92(ch)); +} - if (e->is_closed) { - if (ctx->layout) { - UPB_ASSERT(ctx->enum_count < ctx->layout->enum_count); - e->layout = ctx->layout->enums[ctx->enum_count++]; - } else { - e->layout = create_enumlayout(ctx, e); - } - } else { - e->layout = NULL; - } +static char* upb_MtDataEncoder_PutBase92Varint(upb_MtDataEncoder* e, char* ptr, + uint32_t val, int min, int max) { + int shift = upb_Log2Ceiling(_upb_FromBase92(max) - _upb_FromBase92(min) + 1); + UPB_ASSERT(shift <= 6); + uint32_t mask = (1 << shift) - 1; + do { + uint32_t bits = val & mask; + ptr = upb_MtDataEncoder_Put(e, ptr, bits + _upb_FromBase92(min)); + if (!ptr) return NULL; + val >>= shift; + } while (val); + return ptr; } -upb_EnumDef* _upb_EnumDefs_New( - upb_DefBuilder* ctx, int n, - const UPB_DESC(EnumDescriptorProto) * const* protos, - const upb_MessageDef* containing_type) { - _upb_DefType_CheckPadding(sizeof(upb_EnumDef)); +char* upb_MtDataEncoder_PutModifier(upb_MtDataEncoder* e, char* ptr, + uint64_t mod) { + if (mod) { + ptr = upb_MtDataEncoder_PutBase92Varint(e, ptr, mod, + kUpb_EncodedValue_MinModifier, + kUpb_EncodedValue_MaxModifier); + } + return ptr; +} - // If a containing type is defined then get the full name from that. - // Otherwise use the package name from the file def. - const char* name = containing_type ? upb_MessageDef_FullName(containing_type) - : _upb_FileDef_RawPackage(ctx->file); +char* upb_MtDataEncoder_EncodeExtension(upb_MtDataEncoder* e, char* ptr, + upb_FieldType type, uint32_t field_num, + uint64_t field_mod) { + upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr); + in->state.msg_state.msg_modifiers = 0; + in->state.msg_state.last_field_num = 0; + in->state.msg_state.oneof_state = kUpb_OneofState_NotStarted; - upb_EnumDef* e = _upb_DefBuilder_Alloc(ctx, sizeof(upb_EnumDef) * n); - for (size_t i = 0; i < n; i++) { - create_enumdef(ctx, name, protos[i], &e[i]); - e[i].containing_type = containing_type; - } - return e; + ptr = upb_MtDataEncoder_PutRaw(e, ptr, kUpb_EncodedVersion_ExtensionV1); + if (!ptr) return NULL; + + return upb_MtDataEncoder_PutField(e, ptr, type, field_num, field_mod); } -// #include "upb/reflection/extension_range_internal.h" -// #include "upb/reflection/message_def.h" +char* upb_MtDataEncoder_EncodeMap(upb_MtDataEncoder* e, char* ptr, + upb_FieldType key_type, + upb_FieldType value_type, uint64_t key_mod, + uint64_t value_mod) { + upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr); + in->state.msg_state.msg_modifiers = 0; + in->state.msg_state.last_field_num = 0; + in->state.msg_state.oneof_state = kUpb_OneofState_NotStarted; -// Must be last. + ptr = upb_MtDataEncoder_PutRaw(e, ptr, kUpb_EncodedVersion_MapV1); + if (!ptr) return NULL; -struct upb_EnumReservedRange { - int32_t start; - int32_t end; -}; + ptr = upb_MtDataEncoder_PutField(e, ptr, key_type, 1, key_mod); + if (!ptr) return NULL; -upb_EnumReservedRange* _upb_EnumReservedRange_At(const upb_EnumReservedRange* r, - int i) { - return (upb_EnumReservedRange*)&r[i]; + return upb_MtDataEncoder_PutField(e, ptr, value_type, 2, value_mod); } -int32_t upb_EnumReservedRange_Start(const upb_EnumReservedRange* r) { - return r->start; -} -int32_t upb_EnumReservedRange_End(const upb_EnumReservedRange* r) { - return r->end; +char* upb_MtDataEncoder_EncodeMessageSet(upb_MtDataEncoder* e, char* ptr) { + (void)upb_MtDataEncoder_GetInternal(e, ptr); + return upb_MtDataEncoder_PutRaw(e, ptr, kUpb_EncodedVersion_MessageSetV1); } -upb_EnumReservedRange* _upb_EnumReservedRanges_New( - upb_DefBuilder* ctx, int n, - const UPB_DESC(EnumDescriptorProto_EnumReservedRange) * const* protos, - const upb_EnumDef* e) { - upb_EnumReservedRange* r = - _upb_DefBuilder_Alloc(ctx, sizeof(upb_EnumReservedRange) * n); - - for (int i = 0; i < n; i++) { - const int32_t start = - UPB_DESC(EnumDescriptorProto_EnumReservedRange_start)(protos[i]); - const int32_t end = - UPB_DESC(EnumDescriptorProto_EnumReservedRange_end)(protos[i]); +char* upb_MtDataEncoder_StartMessage(upb_MtDataEncoder* e, char* ptr, + uint64_t msg_mod) { + upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr); + in->state.msg_state.msg_modifiers = msg_mod; + in->state.msg_state.last_field_num = 0; + in->state.msg_state.oneof_state = kUpb_OneofState_NotStarted; - // A full validation would also check that each range is disjoint, and that - // none of the fields overlap with the extension ranges, but we are just - // sanity checking here. + ptr = upb_MtDataEncoder_PutRaw(e, ptr, kUpb_EncodedVersion_MessageV1); + if (!ptr) return NULL; - // Note: Not a typo! Unlike extension ranges and message reserved ranges, - // the end value of an enum reserved range is *inclusive*! - if (end < start) { - _upb_DefBuilder_Errf(ctx, "Reserved range (%d, %d) is invalid, enum=%s\n", - (int)start, (int)end, upb_EnumDef_FullName(e)); - } + return upb_MtDataEncoder_PutModifier(e, ptr, msg_mod); +} - r[i].start = start; - r[i].end = end; +static char* _upb_MtDataEncoder_MaybePutFieldSkip(upb_MtDataEncoder* e, + char* ptr, + uint32_t field_num) { + upb_MtDataEncoderInternal* in = (upb_MtDataEncoderInternal*)e->internal; + if (field_num <= in->state.msg_state.last_field_num) return NULL; + if (in->state.msg_state.last_field_num + 1 != field_num) { + // Put skip. + UPB_ASSERT(field_num > in->state.msg_state.last_field_num); + uint32_t skip = field_num - in->state.msg_state.last_field_num; + ptr = upb_MtDataEncoder_PutBase92Varint( + e, ptr, skip, kUpb_EncodedValue_MinSkip, kUpb_EncodedValue_MaxSkip); + if (!ptr) return NULL; } - - return r; + in->state.msg_state.last_field_num = field_num; + return ptr; } +static char* _upb_MtDataEncoder_PutFieldType(upb_MtDataEncoder* e, char* ptr, + upb_FieldType type, + uint64_t field_mod) { + static const char kUpb_TypeToEncoded[] = { + [kUpb_FieldType_Double] = kUpb_EncodedType_Double, + [kUpb_FieldType_Float] = kUpb_EncodedType_Float, + [kUpb_FieldType_Int64] = kUpb_EncodedType_Int64, + [kUpb_FieldType_UInt64] = kUpb_EncodedType_UInt64, + [kUpb_FieldType_Int32] = kUpb_EncodedType_Int32, + [kUpb_FieldType_Fixed64] = kUpb_EncodedType_Fixed64, + [kUpb_FieldType_Fixed32] = kUpb_EncodedType_Fixed32, + [kUpb_FieldType_Bool] = kUpb_EncodedType_Bool, + [kUpb_FieldType_String] = kUpb_EncodedType_String, + [kUpb_FieldType_Group] = kUpb_EncodedType_Group, + [kUpb_FieldType_Message] = kUpb_EncodedType_Message, + [kUpb_FieldType_Bytes] = kUpb_EncodedType_Bytes, + [kUpb_FieldType_UInt32] = kUpb_EncodedType_UInt32, + [kUpb_FieldType_Enum] = kUpb_EncodedType_OpenEnum, + [kUpb_FieldType_SFixed32] = kUpb_EncodedType_SFixed32, + [kUpb_FieldType_SFixed64] = kUpb_EncodedType_SFixed64, + [kUpb_FieldType_SInt32] = kUpb_EncodedType_SInt32, + [kUpb_FieldType_SInt64] = kUpb_EncodedType_SInt64, + }; + + int encoded_type = kUpb_TypeToEncoded[type]; -// Must be last. + if (field_mod & kUpb_FieldModifier_IsClosedEnum) { + UPB_ASSERT(type == kUpb_FieldType_Enum); + encoded_type = kUpb_EncodedType_ClosedEnum; + } -struct upb_EnumValueDef { - const UPB_DESC(EnumValueOptions) * opts; - const upb_EnumDef* parent; - const char* full_name; - int32_t number; -}; + if (field_mod & kUpb_FieldModifier_IsRepeated) { + // Repeated fields shift the type number up (unlike other modifiers which + // are bit flags). + encoded_type += kUpb_EncodedType_RepeatedBase; + } -upb_EnumValueDef* _upb_EnumValueDef_At(const upb_EnumValueDef* v, int i) { - return (upb_EnumValueDef*)&v[i]; + return upb_MtDataEncoder_Put(e, ptr, encoded_type); } -static int _upb_EnumValueDef_Compare(const void* p1, const void* p2) { - const uint32_t v1 = (*(const upb_EnumValueDef**)p1)->number; - const uint32_t v2 = (*(const upb_EnumValueDef**)p2)->number; - return (v1 < v2) ? -1 : (v1 > v2); -} +static char* _upb_MtDataEncoder_MaybePutModifiers(upb_MtDataEncoder* e, + char* ptr, upb_FieldType type, + uint64_t field_mod) { + upb_MtDataEncoderInternal* in = (upb_MtDataEncoderInternal*)e->internal; + uint32_t encoded_modifiers = 0; + if ((field_mod & kUpb_FieldModifier_IsRepeated) && + upb_FieldType_IsPackable(type)) { + bool field_is_packed = field_mod & kUpb_FieldModifier_IsPacked; + bool default_is_packed = in->state.msg_state.msg_modifiers & + kUpb_MessageModifier_DefaultIsPacked; + if (field_is_packed != default_is_packed) { + encoded_modifiers |= kUpb_EncodedFieldModifier_FlipPacked; + } + } -const upb_EnumValueDef** _upb_EnumValueDefs_Sorted(const upb_EnumValueDef* v, - int n, upb_Arena* a) { - // TODO: Try to replace this arena alloc with a persistent scratch buffer. - upb_EnumValueDef** out = - (upb_EnumValueDef**)upb_Arena_Malloc(a, n * sizeof(void*)); - if (!out) return NULL; + if (field_mod & kUpb_FieldModifier_IsProto3Singular) { + encoded_modifiers |= kUpb_EncodedFieldModifier_IsProto3Singular; + } - for (int i = 0; i < n; i++) { - out[i] = (upb_EnumValueDef*)&v[i]; + if (field_mod & kUpb_FieldModifier_IsRequired) { + encoded_modifiers |= kUpb_EncodedFieldModifier_IsRequired; } - qsort(out, n, sizeof(void*), _upb_EnumValueDef_Compare); - return (const upb_EnumValueDef**)out; + return upb_MtDataEncoder_PutModifier(e, ptr, encoded_modifiers); } -const UPB_DESC(EnumValueOptions) * - upb_EnumValueDef_Options(const upb_EnumValueDef* v) { - return v->opts; -} +char* upb_MtDataEncoder_PutField(upb_MtDataEncoder* e, char* ptr, + upb_FieldType type, uint32_t field_num, + uint64_t field_mod) { + upb_MtDataEncoder_GetInternal(e, ptr); -bool upb_EnumValueDef_HasOptions(const upb_EnumValueDef* v) { - return v->opts != (void*)kUpbDefOptDefault; -} + ptr = _upb_MtDataEncoder_MaybePutFieldSkip(e, ptr, field_num); + if (!ptr) return NULL; -const upb_EnumDef* upb_EnumValueDef_Enum(const upb_EnumValueDef* v) { - return v->parent; + ptr = _upb_MtDataEncoder_PutFieldType(e, ptr, type, field_mod); + if (!ptr) return NULL; + + return _upb_MtDataEncoder_MaybePutModifiers(e, ptr, type, field_mod); } -const char* upb_EnumValueDef_FullName(const upb_EnumValueDef* v) { - return v->full_name; +char* upb_MtDataEncoder_StartOneof(upb_MtDataEncoder* e, char* ptr) { + upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr); + if (in->state.msg_state.oneof_state == kUpb_OneofState_NotStarted) { + ptr = upb_MtDataEncoder_Put(e, ptr, _upb_FromBase92(kUpb_EncodedValue_End)); + } else { + ptr = upb_MtDataEncoder_Put( + e, ptr, _upb_FromBase92(kUpb_EncodedValue_OneofSeparator)); + } + in->state.msg_state.oneof_state = kUpb_OneofState_StartedOneof; + return ptr; } -const char* upb_EnumValueDef_Name(const upb_EnumValueDef* v) { - return _upb_DefBuilder_FullToShort(v->full_name); +char* upb_MtDataEncoder_PutOneofField(upb_MtDataEncoder* e, char* ptr, + uint32_t field_num) { + upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr); + if (in->state.msg_state.oneof_state == kUpb_OneofState_EmittedOneofField) { + ptr = upb_MtDataEncoder_Put( + e, ptr, _upb_FromBase92(kUpb_EncodedValue_FieldSeparator)); + if (!ptr) return NULL; + } + ptr = upb_MtDataEncoder_PutBase92Varint(e, ptr, field_num, _upb_ToBase92(0), + _upb_ToBase92(63)); + in->state.msg_state.oneof_state = kUpb_OneofState_EmittedOneofField; + return ptr; } -int32_t upb_EnumValueDef_Number(const upb_EnumValueDef* v) { return v->number; } +char* upb_MtDataEncoder_StartEnum(upb_MtDataEncoder* e, char* ptr) { + upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr); + in->state.enum_state.present_values_mask = 0; + in->state.enum_state.last_written_value = 0; -uint32_t upb_EnumValueDef_Index(const upb_EnumValueDef* v) { - // Compute index in our parent's array. - return v - upb_EnumDef_Value(v->parent, 0); + return upb_MtDataEncoder_PutRaw(e, ptr, kUpb_EncodedVersion_EnumV1); } -static void create_enumvaldef(upb_DefBuilder* ctx, const char* prefix, - const UPB_DESC(EnumValueDescriptorProto) * - val_proto, - upb_EnumDef* e, upb_EnumValueDef* v) { - upb_StringView name = UPB_DESC(EnumValueDescriptorProto_name)(val_proto); +static char* upb_MtDataEncoder_FlushDenseEnumMask(upb_MtDataEncoder* e, + char* ptr) { + upb_MtDataEncoderInternal* in = (upb_MtDataEncoderInternal*)e->internal; + ptr = upb_MtDataEncoder_Put(e, ptr, in->state.enum_state.present_values_mask); + in->state.enum_state.present_values_mask = 0; + in->state.enum_state.last_written_value += 5; + return ptr; +} - v->parent = e; // Must happen prior to _upb_DefBuilder_Add() - v->full_name = _upb_DefBuilder_MakeFullName(ctx, prefix, name); - v->number = UPB_DESC(EnumValueDescriptorProto_number)(val_proto); - _upb_DefBuilder_Add(ctx, v->full_name, - _upb_DefType_Pack(v, UPB_DEFTYPE_ENUMVAL)); +char* upb_MtDataEncoder_PutEnumValue(upb_MtDataEncoder* e, char* ptr, + uint32_t val) { + // TODO(b/229641772): optimize this encoding. + upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr); + UPB_ASSERT(val >= in->state.enum_state.last_written_value); + uint32_t delta = val - in->state.enum_state.last_written_value; + if (delta >= 5 && in->state.enum_state.present_values_mask) { + ptr = upb_MtDataEncoder_FlushDenseEnumMask(e, ptr); + if (!ptr) { + return NULL; + } + delta -= 5; + } - UPB_DEF_SET_OPTIONS(v->opts, EnumValueDescriptorProto, EnumValueOptions, - val_proto); + if (delta >= 5) { + ptr = upb_MtDataEncoder_PutBase92Varint( + e, ptr, delta, kUpb_EncodedValue_MinSkip, kUpb_EncodedValue_MaxSkip); + in->state.enum_state.last_written_value += delta; + delta = 0; + } - bool ok = _upb_EnumDef_Insert(e, v, ctx->arena); - if (!ok) _upb_DefBuilder_OomErr(ctx); + UPB_ASSERT((in->state.enum_state.present_values_mask >> delta) == 0); + in->state.enum_state.present_values_mask |= 1ULL << delta; + return ptr; } -// Allocate and initialize an array of |n| enum value defs owned by |e|. -upb_EnumValueDef* _upb_EnumValueDefs_New( - upb_DefBuilder* ctx, const char* prefix, int n, - const UPB_DESC(EnumValueDescriptorProto) * const* protos, upb_EnumDef* e, - bool* is_sorted) { - _upb_DefType_CheckPadding(sizeof(upb_EnumValueDef)); - - upb_EnumValueDef* v = - _upb_DefBuilder_Alloc(ctx, sizeof(upb_EnumValueDef) * n); +char* upb_MtDataEncoder_EndEnum(upb_MtDataEncoder* e, char* ptr) { + upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr); + if (!in->state.enum_state.present_values_mask) return ptr; + return upb_MtDataEncoder_FlushDenseEnumMask(e, ptr); +} - *is_sorted = true; - uint32_t previous = 0; - for (size_t i = 0; i < n; i++) { - create_enumvaldef(ctx, prefix, protos[i], e, &v[i]); - const uint32_t current = v[i].number; - if (previous > current) *is_sorted = false; - previous = current; - } +const char _kUpb_ToBase92[] = { + ' ', '!', '#', '$', '%', '&', '(', ')', '*', '+', ',', '-', '.', '/', + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<', '=', + '>', '?', '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', + 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', + 'Z', '[', ']', '^', '_', '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', + 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', + 'w', 'x', 'y', 'z', '{', '|', '}', '~', +}; - if (upb_FileDef_Syntax(ctx->file) == kUpb_Syntax_Proto3 && n > 0 && - v[0].number != 0) { - _upb_DefBuilder_Errf(ctx, - "for proto3, the first enum value must be zero (%s)", - upb_EnumDef_FullName(e)); - } +const int8_t _kUpb_FromBase92[] = { + 0, 1, -1, 2, 3, 4, 5, -1, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, + 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, + 55, 56, 57, -1, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, + 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, +}; - return v; -} // Must be last. -struct upb_ExtensionRange { - const UPB_DESC(ExtensionRangeOptions) * opts; - int32_t start; - int32_t end; +#define EXTREG_KEY_SIZE (sizeof(upb_MiniTable*) + sizeof(uint32_t)) + +struct upb_ExtensionRegistry { + upb_Arena* arena; + upb_strtable exts; // Key is upb_MiniTable* concatenated with fieldnum. }; -upb_ExtensionRange* _upb_ExtensionRange_At(const upb_ExtensionRange* r, int i) { - return (upb_ExtensionRange*)&r[i]; +static void extreg_key(char* buf, const upb_MiniTable* l, uint32_t fieldnum) { + memcpy(buf, &l, sizeof(l)); + memcpy(buf + sizeof(l), &fieldnum, sizeof(fieldnum)); } -const UPB_DESC(ExtensionRangeOptions) * - upb_ExtensionRange_Options(const upb_ExtensionRange* r) { - return r->opts; +upb_ExtensionRegistry* upb_ExtensionRegistry_New(upb_Arena* arena) { + upb_ExtensionRegistry* r = upb_Arena_Malloc(arena, sizeof(*r)); + if (!r) return NULL; + r->arena = arena; + if (!upb_strtable_init(&r->exts, 8, arena)) return NULL; + return r; } -bool upb_ExtensionRange_HasOptions(const upb_ExtensionRange* r) { - return r->opts != (void*)kUpbDefOptDefault; +UPB_API bool upb_ExtensionRegistry_Add(upb_ExtensionRegistry* r, + const upb_MiniTableExtension* e) { + char buf[EXTREG_KEY_SIZE]; + extreg_key(buf, e->extendee, e->field.number); + if (upb_strtable_lookup2(&r->exts, buf, EXTREG_KEY_SIZE, NULL)) return false; + return upb_strtable_insert(&r->exts, buf, EXTREG_KEY_SIZE, + upb_value_constptr(e), r->arena); } -int32_t upb_ExtensionRange_Start(const upb_ExtensionRange* r) { - return r->start; +bool upb_ExtensionRegistry_AddArray(upb_ExtensionRegistry* r, + const upb_MiniTableExtension** e, + size_t count) { + const upb_MiniTableExtension** start = e; + const upb_MiniTableExtension** end = UPB_PTRADD(e, count); + for (; e < end; e++) { + if (!upb_ExtensionRegistry_Add(r, *e)) goto failure; + } + return true; + +failure: + // Back out the entries previously added. + for (end = e, e = start; e < end; e++) { + const upb_MiniTableExtension* ext = *e; + char buf[EXTREG_KEY_SIZE]; + extreg_key(buf, ext->extendee, ext->field.number); + upb_strtable_remove2(&r->exts, buf, EXTREG_KEY_SIZE, NULL); + } + return false; } -int32_t upb_ExtensionRange_End(const upb_ExtensionRange* r) { return r->end; } +const upb_MiniTableExtension* upb_ExtensionRegistry_Lookup( + const upb_ExtensionRegistry* r, const upb_MiniTable* t, uint32_t num) { + char buf[EXTREG_KEY_SIZE]; + upb_value v; + extreg_key(buf, t, num); + if (upb_strtable_lookup2(&r->exts, buf, EXTREG_KEY_SIZE, &v)) { + return upb_value_getconstptr(v); + } else { + return NULL; + } +} -upb_ExtensionRange* _upb_ExtensionRanges_New( - upb_DefBuilder* ctx, int n, - const UPB_DESC(DescriptorProto_ExtensionRange) * const* protos, - const upb_MessageDef* m) { - upb_ExtensionRange* r = - _upb_DefBuilder_Alloc(ctx, sizeof(upb_ExtensionRange) * n); - for (int i = 0; i < n; i++) { - const int32_t start = - UPB_DESC(DescriptorProto_ExtensionRange_start)(protos[i]); - const int32_t end = UPB_DESC(DescriptorProto_ExtensionRange_end)(protos[i]); - const int32_t max = UPB_DESC(MessageOptions_message_set_wire_format)( - upb_MessageDef_Options(m)) - ? INT32_MAX - : kUpb_MaxFieldNumber + 1; +#include - // A full validation would also check that each range is disjoint, and that - // none of the fields overlap with the extension ranges, but we are just - // sanity checking here. - if (start < 1 || end <= start || end > max) { - _upb_DefBuilder_Errf(ctx, - "Extension range (%d, %d) is invalid, message=%s\n", - (int)start, (int)end, upb_MessageDef_FullName(m)); + +// Must be last. + +const upb_MiniTableField* upb_MiniTable_FindFieldByNumber( + const upb_MiniTable* t, uint32_t number) { + const size_t i = ((size_t)number) - 1; // 0 wraps to SIZE_MAX + + // Ideal case: index into dense fields + if (i < t->dense_below) { + UPB_ASSERT(t->fields[i].number == number); + return &t->fields[i]; + } + + // Slow case: binary search + int lo = t->dense_below; + int hi = t->field_count - 1; + while (lo <= hi) { + int mid = (lo + hi) / 2; + uint32_t num = t->fields[mid].number; + if (num < number) { + lo = mid + 1; + continue; + } + if (num > number) { + hi = mid - 1; + continue; } + return &t->fields[mid]; + } + return NULL; +} - r[i].start = start; - r[i].end = end; - UPB_DEF_SET_OPTIONS(r[i].opts, DescriptorProto_ExtensionRange, - ExtensionRangeOptions, protos[i]); +static bool upb_MiniTable_Is_Oneof(const upb_MiniTableField* f) { + return f->presence < 0; +} + +const upb_MiniTableField* upb_MiniTable_GetOneof(const upb_MiniTable* m, + const upb_MiniTableField* f) { + if (UPB_UNLIKELY(!upb_MiniTable_Is_Oneof(f))) { + return NULL; + } + const upb_MiniTableField* ptr = &m->fields[0]; + const upb_MiniTableField* end = &m->fields[m->field_count]; + while (++ptr < end) { + if (ptr->presence == (*f).presence) { + return ptr; + } } + return NULL; +} - return r; +bool upb_MiniTable_NextOneofField(const upb_MiniTable* m, + const upb_MiniTableField** f) { + const upb_MiniTableField* ptr = *f; + const upb_MiniTableField* end = &m->fields[m->field_count]; + while (++ptr < end) { + if (ptr->presence == (*f)->presence) { + *f = ptr; + return true; + } + } + return false; } -#include -#include +const struct upb_MiniTable _kUpb_MiniTable_Empty = { + .subs = NULL, + .fields = NULL, + .size = 0, + .field_count = 0, + .ext = kUpb_ExtMode_NonExtendable, + .dense_below = 0, + .table_mask = -1, + .required_count = 0, +}; -// Must be last. -#define UPB_FIELD_TYPE_UNSPECIFIED 0 +#include -typedef struct { - size_t len; - char str[1]; // Null-terminated string data follows. -} str_t; -struct upb_FieldDef { - const UPB_DESC(FieldOptions) * opts; - const upb_FileDef* file; - const upb_MessageDef* msgdef; - const char* full_name; - const char* json_name; - union { - int64_t sint; - uint64_t uint; - double dbl; - float flt; - bool boolean; - str_t* str; - void* msg; // Always NULL. - } defaultval; - union { - const upb_OneofDef* oneof; - const upb_MessageDef* extension_scope; - } scope; - union { - const upb_MessageDef* msgdef; - const upb_EnumDef* enumdef; - const UPB_DESC(FieldDescriptorProto) * unresolved; - } sub; - uint32_t number_; - uint16_t index_; - uint16_t layout_index; // Index into msgdef->layout->fields or file->exts - bool has_default; - bool has_json_name; - bool has_presence; - bool is_extension; - bool is_packed; - bool is_proto3_optional; - upb_FieldType type_; - upb_Label label_; -#if UINTPTR_MAX == 0xffffffff - uint32_t padding; // Increase size to a multiple of 8. -#endif -}; +// Must be last. -upb_FieldDef* _upb_FieldDef_At(const upb_FieldDef* f, int i) { - return (upb_FieldDef*)&f[i]; -} +/* The upb core does not generally have a concept of default instances. However + * for descriptor options we make an exception since the max size is known and + * modest (<200 bytes). All types can share a default instance since it is + * initialized to zeroes. + * + * We have to allocate an extra pointer for upb's internal metadata. */ +static const char opt_default_buf[_UPB_MAXOPT_SIZE + sizeof(void*)] = {0}; +const char* kUpbDefOptDefault = &opt_default_buf[sizeof(void*)]; -const UPB_DESC(FieldOptions) * upb_FieldDef_Options(const upb_FieldDef* f) { - return f->opts; -} +const char* _upb_DefBuilder_FullToShort(const char* fullname) { + const char* p; -bool upb_FieldDef_HasOptions(const upb_FieldDef* f) { - return f->opts != (void*)kUpbDefOptDefault; + if (fullname == NULL) { + return NULL; + } else if ((p = strrchr(fullname, '.')) == NULL) { + /* No '.' in the name, return the full string. */ + return fullname; + } else { + /* Return one past the last '.'. */ + return p + 1; + } } -const char* upb_FieldDef_FullName(const upb_FieldDef* f) { - return f->full_name; +void _upb_DefBuilder_FailJmp(upb_DefBuilder* ctx) { UPB_LONGJMP(ctx->err, 1); } + +void _upb_DefBuilder_Errf(upb_DefBuilder* ctx, const char* fmt, ...) { + va_list argp; + va_start(argp, fmt); + upb_Status_VSetErrorFormat(ctx->status, fmt, argp); + va_end(argp); + _upb_DefBuilder_FailJmp(ctx); } -upb_CType upb_FieldDef_CType(const upb_FieldDef* f) { - switch (f->type_) { - case kUpb_FieldType_Double: - return kUpb_CType_Double; - case kUpb_FieldType_Float: - return kUpb_CType_Float; - case kUpb_FieldType_Int64: - case kUpb_FieldType_SInt64: - case kUpb_FieldType_SFixed64: - return kUpb_CType_Int64; - case kUpb_FieldType_Int32: - case kUpb_FieldType_SFixed32: - case kUpb_FieldType_SInt32: - return kUpb_CType_Int32; - case kUpb_FieldType_UInt64: - case kUpb_FieldType_Fixed64: - return kUpb_CType_UInt64; - case kUpb_FieldType_UInt32: - case kUpb_FieldType_Fixed32: - return kUpb_CType_UInt32; - case kUpb_FieldType_Enum: - return kUpb_CType_Enum; - case kUpb_FieldType_Bool: - return kUpb_CType_Bool; - case kUpb_FieldType_String: - return kUpb_CType_String; - case kUpb_FieldType_Bytes: - return kUpb_CType_Bytes; - case kUpb_FieldType_Group: - case kUpb_FieldType_Message: - return kUpb_CType_Message; - } - UPB_UNREACHABLE(); +void _upb_DefBuilder_OomErr(upb_DefBuilder* ctx) { + upb_Status_SetErrorMessage(ctx->status, "out of memory"); + _upb_DefBuilder_FailJmp(ctx); } -upb_FieldType upb_FieldDef_Type(const upb_FieldDef* f) { return f->type_; } +// Verify a relative identifier string. The loop is branchless for speed. +static void _upb_DefBuilder_CheckIdentNotFull(upb_DefBuilder* ctx, + upb_StringView name) { + bool good = name.size > 0; -uint32_t upb_FieldDef_Index(const upb_FieldDef* f) { return f->index_; } + for (size_t i = 0; i < name.size; i++) { + const char c = name.data[i]; + const char d = c | 0x20; // force lowercase + const bool is_alpha = (('a' <= d) & (d <= 'z')) | (c == '_'); + const bool is_numer = ('0' <= c) & (c <= '9') & (i != 0); -upb_Label upb_FieldDef_Label(const upb_FieldDef* f) { return f->label_; } + good &= is_alpha | is_numer; + } -uint32_t upb_FieldDef_Number(const upb_FieldDef* f) { return f->number_; } + if (!good) _upb_DefBuilder_CheckIdentSlow(ctx, name, false); +} -bool upb_FieldDef_IsExtension(const upb_FieldDef* f) { return f->is_extension; } +const char* _upb_DefBuilder_MakeFullName(upb_DefBuilder* ctx, + const char* prefix, + upb_StringView name) { + _upb_DefBuilder_CheckIdentNotFull(ctx, name); + if (prefix) { + // ret = prefix + '.' + name; + size_t n = strlen(prefix); + char* ret = _upb_DefBuilder_Alloc(ctx, n + name.size + 2); + strcpy(ret, prefix); + ret[n] = '.'; + memcpy(&ret[n + 1], name.data, name.size); + ret[n + 1 + name.size] = '\0'; + return ret; + } else { + char* ret = upb_strdup2(name.data, name.size, ctx->arena); + if (!ret) _upb_DefBuilder_OomErr(ctx); + return ret; + } +} -bool upb_FieldDef_IsPacked(const upb_FieldDef* f) { return f->is_packed; } +static bool remove_component(char* base, size_t* len) { + if (*len == 0) return false; -const char* upb_FieldDef_Name(const upb_FieldDef* f) { - return _upb_DefBuilder_FullToShort(f->full_name); + for (size_t i = *len - 1; i > 0; i--) { + if (base[i] == '.') { + *len = i; + return true; + } + } + + *len = 0; + return true; } -const char* upb_FieldDef_JsonName(const upb_FieldDef* f) { - return f->json_name; +const void* _upb_DefBuilder_ResolveAny(upb_DefBuilder* ctx, + const char* from_name_dbg, + const char* base, upb_StringView sym, + upb_deftype_t* type) { + if (sym.size == 0) goto notfound; + upb_value v; + if (sym.data[0] == '.') { + /* Symbols starting with '.' are absolute, so we do a single lookup. + * Slice to omit the leading '.' */ + if (!_upb_DefPool_LookupSym(ctx->symtab, sym.data + 1, sym.size - 1, &v)) { + goto notfound; + } + } else { + /* Remove components from base until we find an entry or run out. */ + size_t baselen = base ? strlen(base) : 0; + char* tmp = malloc(sym.size + baselen + 1); + while (1) { + char* p = tmp; + if (baselen) { + memcpy(p, base, baselen); + p[baselen] = '.'; + p += baselen + 1; + } + memcpy(p, sym.data, sym.size); + p += sym.size; + if (_upb_DefPool_LookupSym(ctx->symtab, tmp, p - tmp, &v)) { + break; + } + if (!remove_component(tmp, &baselen)) { + free(tmp); + goto notfound; + } + } + free(tmp); + } + + *type = _upb_DefType_Type(v); + return _upb_DefType_Unpack(v, *type); + +notfound: + _upb_DefBuilder_Errf(ctx, "couldn't resolve name '" UPB_STRINGVIEW_FORMAT "'", + UPB_STRINGVIEW_ARGS(sym)); } -bool upb_FieldDef_HasJsonName(const upb_FieldDef* f) { - return f->has_json_name; +const void* _upb_DefBuilder_Resolve(upb_DefBuilder* ctx, + const char* from_name_dbg, const char* base, + upb_StringView sym, upb_deftype_t type) { + upb_deftype_t found_type; + const void* ret = + _upb_DefBuilder_ResolveAny(ctx, from_name_dbg, base, sym, &found_type); + if (ret && found_type != type) { + _upb_DefBuilder_Errf(ctx, + "type mismatch when resolving %s: couldn't find " + "name " UPB_STRINGVIEW_FORMAT " with type=%d", + from_name_dbg, UPB_STRINGVIEW_ARGS(sym), (int)type); + } + return ret; } -const upb_FileDef* upb_FieldDef_File(const upb_FieldDef* f) { return f->file; } +// Per ASCII this will lower-case a letter. If the result is a letter, the +// input was definitely a letter. If the output is not a letter, this may +// have transformed the character unpredictably. +static char upb_ascii_lower(char ch) { return ch | 0x20; } -const upb_MessageDef* upb_FieldDef_ContainingType(const upb_FieldDef* f) { - return f->msgdef; +// isalpha() etc. from are locale-dependent, which we don't want. +static bool upb_isbetween(uint8_t c, uint8_t low, uint8_t high) { + return low <= c && c <= high; } -const upb_MessageDef* upb_FieldDef_ExtensionScope(const upb_FieldDef* f) { - return f->is_extension ? f->scope.extension_scope : NULL; +static bool upb_isletter(char c) { + char lower = upb_ascii_lower(c); + return upb_isbetween(lower, 'a', 'z') || c == '_'; } -const upb_OneofDef* upb_FieldDef_ContainingOneof(const upb_FieldDef* f) { - return f->is_extension ? NULL : f->scope.oneof; +static bool upb_isalphanum(char c) { + return upb_isletter(c) || upb_isbetween(c, '0', '9'); } -const upb_OneofDef* upb_FieldDef_RealContainingOneof(const upb_FieldDef* f) { - const upb_OneofDef* oneof = upb_FieldDef_ContainingOneof(f); - if (!oneof || upb_OneofDef_IsSynthetic(oneof)) return NULL; - return oneof; +static bool TryGetChar(const char** src, const char* end, char* ch) { + if (*src == end) return false; + *ch = **src; + *src += 1; + return true; } -upb_MessageValue upb_FieldDef_Default(const upb_FieldDef* f) { - upb_MessageValue ret; - - if (upb_FieldDef_IsRepeated(f) || upb_FieldDef_IsSubMessage(f)) { - return (upb_MessageValue){.msg_val = NULL}; +static int TryGetHexDigit(const char** src, const char* end) { + char ch; + if (!TryGetChar(src, end, &ch)) return -1; + if ('0' <= ch && ch <= '9') { + return ch - '0'; } - - switch (upb_FieldDef_CType(f)) { - case kUpb_CType_Bool: - return (upb_MessageValue){.bool_val = f->defaultval.boolean}; - case kUpb_CType_Int64: - return (upb_MessageValue){.int64_val = f->defaultval.sint}; - case kUpb_CType_UInt64: - return (upb_MessageValue){.uint64_val = f->defaultval.uint}; - case kUpb_CType_Enum: - case kUpb_CType_Int32: - return (upb_MessageValue){.int32_val = (int32_t)f->defaultval.sint}; - case kUpb_CType_UInt32: - return (upb_MessageValue){.uint32_val = (uint32_t)f->defaultval.uint}; - case kUpb_CType_Float: - return (upb_MessageValue){.float_val = f->defaultval.flt}; - case kUpb_CType_Double: - return (upb_MessageValue){.double_val = f->defaultval.dbl}; - case kUpb_CType_String: - case kUpb_CType_Bytes: { - str_t* str = f->defaultval.str; - if (str) { - return (upb_MessageValue){ - .str_val = (upb_StringView){.data = str->str, .size = str->len}}; - } else { - return (upb_MessageValue){ - .str_val = (upb_StringView){.data = NULL, .size = 0}}; - } - } - default: - UPB_UNREACHABLE(); + ch = upb_ascii_lower(ch); + if ('a' <= ch && ch <= 'f') { + return ch - 'a' + 0xa; } + *src -= 1; // Char wasn't actually a hex digit. + return -1; +} +static char upb_DefBuilder_ParseHexEscape(upb_DefBuilder* ctx, + const upb_FieldDef* f, + const char** src, const char* end) { + int hex_digit = TryGetHexDigit(src, end); + if (hex_digit < 0) { + _upb_DefBuilder_Errf( + ctx, "\\x must be followed by at least one hex digit (field='%s')", + upb_FieldDef_FullName(f)); + return 0; + } + unsigned int ret = hex_digit; + while ((hex_digit = TryGetHexDigit(src, end)) >= 0) { + ret = (ret << 4) | hex_digit; + } + if (ret > 0xff) { + _upb_DefBuilder_Errf(ctx, "Value of hex escape in field %s exceeds 8 bits", + upb_FieldDef_FullName(f)); + return 0; + } return ret; } -const upb_MessageDef* upb_FieldDef_MessageSubDef(const upb_FieldDef* f) { - return upb_FieldDef_CType(f) == kUpb_CType_Message ? f->sub.msgdef : NULL; +static char TryGetOctalDigit(const char** src, const char* end) { + char ch; + if (!TryGetChar(src, end, &ch)) return -1; + if ('0' <= ch && ch <= '7') { + return ch - '0'; + } + *src -= 1; // Char wasn't actually an octal digit. + return -1; } -const upb_EnumDef* upb_FieldDef_EnumSubDef(const upb_FieldDef* f) { - return upb_FieldDef_CType(f) == kUpb_CType_Enum ? f->sub.enumdef : NULL; +static char upb_DefBuilder_ParseOctalEscape(upb_DefBuilder* ctx, + const upb_FieldDef* f, + const char** src, const char* end) { + char ch = 0; + for (int i = 0; i < 3; i++) { + char digit; + if ((digit = TryGetOctalDigit(src, end)) >= 0) { + ch = (ch << 3) | digit; + } + } + return ch; } -const upb_MiniTableField* upb_FieldDef_MiniTable(const upb_FieldDef* f) { - if (upb_FieldDef_IsExtension(f)) { - const upb_FileDef* file = upb_FieldDef_File(f); - return (upb_MiniTableField*)_upb_FileDef_ExtensionMiniTable( - file, f->layout_index); - } else { - const upb_MiniTable* layout = upb_MessageDef_MiniTable(f->msgdef); - return &layout->fields[f->layout_index]; +char _upb_DefBuilder_ParseEscape(upb_DefBuilder* ctx, const upb_FieldDef* f, + const char** src, const char* end) { + char ch; + if (!TryGetChar(src, end, &ch)) { + _upb_DefBuilder_Errf(ctx, "unterminated escape sequence in field %s", + upb_FieldDef_FullName(f)); + return 0; + } + switch (ch) { + case 'a': + return '\a'; + case 'b': + return '\b'; + case 'f': + return '\f'; + case 'n': + return '\n'; + case 'r': + return '\r'; + case 't': + return '\t'; + case 'v': + return '\v'; + case '\\': + return '\\'; + case '\'': + return '\''; + case '\"': + return '\"'; + case '?': + return '\?'; + case 'x': + case 'X': + return upb_DefBuilder_ParseHexEscape(ctx, f, src, end); + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + *src -= 1; + return upb_DefBuilder_ParseOctalEscape(ctx, f, src, end); } + _upb_DefBuilder_Errf(ctx, "Unknown escape sequence: \\%c", ch); } -const upb_MiniTableExtension* _upb_FieldDef_ExtensionMiniTable( - const upb_FieldDef* f) { - UPB_ASSERT(upb_FieldDef_IsExtension(f)); - const upb_FileDef* file = upb_FieldDef_File(f); - return _upb_FileDef_ExtensionMiniTable(file, f->layout_index); -} +void _upb_DefBuilder_CheckIdentSlow(upb_DefBuilder* ctx, upb_StringView name, + bool full) { + const char* str = name.data; + const size_t len = name.size; + bool start = true; + for (size_t i = 0; i < len; i++) { + const char c = str[i]; + if (c == '.') { + if (start || !full) { + _upb_DefBuilder_Errf( + ctx, "invalid name: unexpected '.' (" UPB_STRINGVIEW_FORMAT ")", + UPB_STRINGVIEW_ARGS(name)); + } + start = true; + } else if (start) { + if (!upb_isletter(c)) { + _upb_DefBuilder_Errf(ctx, + "invalid name: path components must start with a " + "letter (" UPB_STRINGVIEW_FORMAT ")", + UPB_STRINGVIEW_ARGS(name)); + } + start = false; + } else if (!upb_isalphanum(c)) { + _upb_DefBuilder_Errf( + ctx, + "invalid name: non-alphanumeric character (" UPB_STRINGVIEW_FORMAT + ")", + UPB_STRINGVIEW_ARGS(name)); + } + } + if (start) { + _upb_DefBuilder_Errf(ctx, + "invalid name: empty part (" UPB_STRINGVIEW_FORMAT ")", + UPB_STRINGVIEW_ARGS(name)); + } -bool _upb_FieldDef_IsClosedEnum(const upb_FieldDef* f) { - if (f->type_ != kUpb_FieldType_Enum) return false; - return upb_EnumDef_IsClosed(f->sub.enumdef); + // We should never reach this point. + UPB_ASSERT(false); } -bool _upb_FieldDef_IsProto3Optional(const upb_FieldDef* f) { - return f->is_proto3_optional; -} -int _upb_FieldDef_LayoutIndex(const upb_FieldDef* f) { return f->layout_index; } -uint64_t _upb_FieldDef_Modifiers(const upb_FieldDef* f) { - uint64_t out = f->is_packed ? kUpb_FieldModifier_IsPacked : 0; +// Must be last. - switch (f->label_) { - case kUpb_Label_Optional: - if (!upb_FieldDef_HasPresence(f)) { - out |= kUpb_FieldModifier_IsProto3Singular; - } - break; - case kUpb_Label_Repeated: - out |= kUpb_FieldModifier_IsRepeated; - break; - case kUpb_Label_Required: - out |= kUpb_FieldModifier_IsRequired; - break; - } +struct upb_DefPool { + upb_Arena* arena; + upb_strtable syms; // full_name -> packed def ptr + upb_strtable files; // file_name -> (upb_FileDef*) + upb_inttable exts; // (upb_MiniTableExtension*) -> (upb_FieldDef*) + upb_ExtensionRegistry* extreg; + upb_MiniTablePlatform platform; + void* scratch_data; + size_t scratch_size; + size_t bytes_loaded; +}; - if (_upb_FieldDef_IsClosedEnum(f)) { - out |= kUpb_FieldModifier_IsClosedEnum; - } - return out; +void upb_DefPool_Free(upb_DefPool* s) { + upb_Arena_Free(s->arena); + upb_gfree(s->scratch_data); + upb_gfree(s); } -bool upb_FieldDef_HasDefault(const upb_FieldDef* f) { return f->has_default; } -bool upb_FieldDef_HasPresence(const upb_FieldDef* f) { return f->has_presence; } +upb_DefPool* upb_DefPool_New(void) { + upb_DefPool* s = upb_gmalloc(sizeof(*s)); + if (!s) return NULL; -bool upb_FieldDef_HasSubDef(const upb_FieldDef* f) { - return upb_FieldDef_IsSubMessage(f) || - upb_FieldDef_CType(f) == kUpb_CType_Enum; -} + s->arena = upb_Arena_New(); + s->bytes_loaded = 0; -bool upb_FieldDef_IsMap(const upb_FieldDef* f) { - return upb_FieldDef_IsRepeated(f) && upb_FieldDef_IsSubMessage(f) && - upb_MessageDef_IsMapEntry(upb_FieldDef_MessageSubDef(f)); -} + s->scratch_size = 240; + s->scratch_data = upb_gmalloc(s->scratch_size); + if (!s->scratch_data) goto err; -bool upb_FieldDef_IsOptional(const upb_FieldDef* f) { - return upb_FieldDef_Label(f) == kUpb_Label_Optional; -} + if (!upb_strtable_init(&s->syms, 32, s->arena)) goto err; + if (!upb_strtable_init(&s->files, 4, s->arena)) goto err; + if (!upb_inttable_init(&s->exts, s->arena)) goto err; -bool upb_FieldDef_IsPrimitive(const upb_FieldDef* f) { - return !upb_FieldDef_IsString(f) && !upb_FieldDef_IsSubMessage(f); -} + s->extreg = upb_ExtensionRegistry_New(s->arena); + if (!s->extreg) goto err; -bool upb_FieldDef_IsRepeated(const upb_FieldDef* f) { - return upb_FieldDef_Label(f) == kUpb_Label_Repeated; -} + s->platform = kUpb_MiniTablePlatform_Native; -bool upb_FieldDef_IsRequired(const upb_FieldDef* f) { - return upb_FieldDef_Label(f) == kUpb_Label_Required; -} + return s; -bool upb_FieldDef_IsString(const upb_FieldDef* f) { - return upb_FieldDef_CType(f) == kUpb_CType_String || - upb_FieldDef_CType(f) == kUpb_CType_Bytes; +err: + upb_DefPool_Free(s); + return NULL; } -bool upb_FieldDef_IsSubMessage(const upb_FieldDef* f) { - return upb_FieldDef_CType(f) == kUpb_CType_Message; -} +bool _upb_DefPool_InsertExt(upb_DefPool* s, const upb_MiniTableExtension* ext, + const upb_FieldDef* f) { + return upb_inttable_insert(&s->exts, (uintptr_t)ext, upb_value_constptr(f), + s->arena); +} -static bool between(int32_t x, int32_t low, int32_t high) { - return x >= low && x <= high; +bool _upb_DefPool_InsertSym(upb_DefPool* s, upb_StringView sym, upb_value v, + upb_Status* status) { + // TODO: table should support an operation "tryinsert" to avoid the double + // lookup. + if (upb_strtable_lookup2(&s->syms, sym.data, sym.size, NULL)) { + upb_Status_SetErrorFormat(status, "duplicate symbol '%s'", sym.data); + return false; + } + if (!upb_strtable_insert(&s->syms, sym.data, sym.size, v, s->arena)) { + upb_Status_SetErrorMessage(status, "out of memory"); + return false; + } + return true; } -bool upb_FieldDef_checklabel(int32_t label) { return between(label, 1, 3); } -bool upb_FieldDef_checktype(int32_t type) { return between(type, 1, 11); } -bool upb_FieldDef_checkintfmt(int32_t fmt) { return between(fmt, 1, 3); } +static const void* _upb_DefPool_Unpack(const upb_DefPool* s, const char* sym, + size_t size, upb_deftype_t type) { + upb_value v; + return upb_strtable_lookup2(&s->syms, sym, size, &v) + ? _upb_DefType_Unpack(v, type) + : NULL; +} -bool upb_FieldDef_checkdescriptortype(int32_t type) { - return between(type, 1, 18); +bool _upb_DefPool_LookupSym(const upb_DefPool* s, const char* sym, size_t size, + upb_value* v) { + return upb_strtable_lookup2(&s->syms, sym, size, v); } -static bool streql2(const char* a, size_t n, const char* b) { - return n == strlen(b) && memcmp(a, b, n) == 0; +upb_ExtensionRegistry* _upb_DefPool_ExtReg(const upb_DefPool* s) { + return s->extreg; } -// Implement the transformation as described in the spec: -// 1. upper case all letters after an underscore. -// 2. remove all underscores. -static char* make_json_name(const char* name, size_t size, upb_Arena* a) { - char* out = upb_Arena_Malloc(a, size + 1); // +1 is to add a trailing '\0' - if (out == NULL) return NULL; +void** _upb_DefPool_ScratchData(const upb_DefPool* s) { + return (void**)&s->scratch_data; +} - bool ucase_next = false; - char* des = out; - for (size_t i = 0; i < size; i++) { - if (name[i] == '_') { - ucase_next = true; - } else { - *des++ = ucase_next ? toupper(name[i]) : name[i]; - ucase_next = false; - } - } - *des++ = '\0'; - return out; +size_t* _upb_DefPool_ScratchSize(const upb_DefPool* s) { + return (size_t*)&s->scratch_size; } -static str_t* newstr(upb_DefBuilder* ctx, const char* data, size_t len) { - str_t* ret = _upb_DefBuilder_Alloc(ctx, sizeof(*ret) + len); - if (!ret) _upb_DefBuilder_OomErr(ctx); - ret->len = len; - if (len) memcpy(ret->str, data, len); - ret->str[len] = '\0'; - return ret; +void _upb_DefPool_SetPlatform(upb_DefPool* s, upb_MiniTablePlatform platform) { + assert(upb_strtable_count(&s->files) == 0); + s->platform = platform; } -static str_t* unescape(upb_DefBuilder* ctx, const upb_FieldDef* f, - const char* data, size_t len) { - // Size here is an upper bound; escape sequences could ultimately shrink it. - str_t* ret = _upb_DefBuilder_Alloc(ctx, sizeof(*ret) + len); - char* dst = &ret->str[0]; - const char* src = data; - const char* end = data + len; +const upb_MessageDef* upb_DefPool_FindMessageByName(const upb_DefPool* s, + const char* sym) { + return _upb_DefPool_Unpack(s, sym, strlen(sym), UPB_DEFTYPE_MSG); +} - while (src < end) { - if (*src == '\\') { - src++; - *dst++ = _upb_DefBuilder_ParseEscape(ctx, f, &src, end); - } else { - *dst++ = *src++; - } - } +const upb_MessageDef* upb_DefPool_FindMessageByNameWithSize( + const upb_DefPool* s, const char* sym, size_t len) { + return _upb_DefPool_Unpack(s, sym, len, UPB_DEFTYPE_MSG); +} - ret->len = dst - &ret->str[0]; - return ret; +const upb_EnumDef* upb_DefPool_FindEnumByName(const upb_DefPool* s, + const char* sym) { + return _upb_DefPool_Unpack(s, sym, strlen(sym), UPB_DEFTYPE_ENUM); } -static void parse_default(upb_DefBuilder* ctx, const char* str, size_t len, - upb_FieldDef* f) { - char* end; - char nullz[64]; - errno = 0; +const upb_EnumValueDef* upb_DefPool_FindEnumByNameval(const upb_DefPool* s, + const char* sym) { + return _upb_DefPool_Unpack(s, sym, strlen(sym), UPB_DEFTYPE_ENUMVAL); +} - switch (upb_FieldDef_CType(f)) { - case kUpb_CType_Int32: - case kUpb_CType_Int64: - case kUpb_CType_UInt32: - case kUpb_CType_UInt64: - case kUpb_CType_Double: - case kUpb_CType_Float: - // Standard C number parsing functions expect null-terminated strings. - if (len >= sizeof(nullz) - 1) { - _upb_DefBuilder_Errf(ctx, "Default too long: %.*s", (int)len, str); - } - memcpy(nullz, str, len); - nullz[len] = '\0'; - str = nullz; - break; +const upb_FileDef* upb_DefPool_FindFileByName(const upb_DefPool* s, + const char* name) { + upb_value v; + return upb_strtable_lookup(&s->files, name, &v) ? upb_value_getconstptr(v) + : NULL; +} + +const upb_FileDef* upb_DefPool_FindFileByNameWithSize(const upb_DefPool* s, + const char* name, + size_t len) { + upb_value v; + return upb_strtable_lookup2(&s->files, name, len, &v) + ? upb_value_getconstptr(v) + : NULL; +} + +const upb_FieldDef* upb_DefPool_FindExtensionByNameWithSize( + const upb_DefPool* s, const char* name, size_t size) { + upb_value v; + if (!upb_strtable_lookup2(&s->syms, name, size, &v)) return NULL; + + switch (_upb_DefType_Type(v)) { + case UPB_DEFTYPE_FIELD: + return _upb_DefType_Unpack(v, UPB_DEFTYPE_FIELD); + case UPB_DEFTYPE_MSG: { + const upb_MessageDef* m = _upb_DefType_Unpack(v, UPB_DEFTYPE_MSG); + return _upb_MessageDef_InMessageSet(m) + ? upb_MessageDef_NestedExtension(m, 0) + : NULL; + } default: break; } - switch (upb_FieldDef_CType(f)) { - case kUpb_CType_Int32: { - long val = strtol(str, &end, 0); - if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || *end) { - goto invalid; + return NULL; +} + +const upb_FieldDef* upb_DefPool_FindExtensionByName(const upb_DefPool* s, + const char* sym) { + return upb_DefPool_FindExtensionByNameWithSize(s, sym, strlen(sym)); +} + +const upb_ServiceDef* upb_DefPool_FindServiceByName(const upb_DefPool* s, + const char* name) { + return _upb_DefPool_Unpack(s, name, strlen(name), UPB_DEFTYPE_SERVICE); +} + +const upb_ServiceDef* upb_DefPool_FindServiceByNameWithSize( + const upb_DefPool* s, const char* name, size_t size) { + return _upb_DefPool_Unpack(s, name, size, UPB_DEFTYPE_SERVICE); +} + +const upb_FileDef* upb_DefPool_FindFileContainingSymbol(const upb_DefPool* s, + const char* name) { + upb_value v; + // TODO(haberman): non-extension fields and oneofs. + if (upb_strtable_lookup(&s->syms, name, &v)) { + switch (_upb_DefType_Type(v)) { + case UPB_DEFTYPE_EXT: { + const upb_FieldDef* f = _upb_DefType_Unpack(v, UPB_DEFTYPE_EXT); + return upb_FieldDef_File(f); } - f->defaultval.sint = val; - break; - } - case kUpb_CType_Enum: { - const upb_EnumDef* e = f->sub.enumdef; - const upb_EnumValueDef* ev = - upb_EnumDef_FindValueByNameWithSize(e, str, len); - if (!ev) { - goto invalid; + case UPB_DEFTYPE_MSG: { + const upb_MessageDef* m = _upb_DefType_Unpack(v, UPB_DEFTYPE_MSG); + return upb_MessageDef_File(m); } - f->defaultval.sint = upb_EnumValueDef_Number(ev); - break; - } - case kUpb_CType_Int64: { - long long val = strtoll(str, &end, 0); - if (val > INT64_MAX || val < INT64_MIN || errno == ERANGE || *end) { - goto invalid; + case UPB_DEFTYPE_ENUM: { + const upb_EnumDef* e = _upb_DefType_Unpack(v, UPB_DEFTYPE_ENUM); + return upb_EnumDef_File(e); } - f->defaultval.sint = val; - break; - } - case kUpb_CType_UInt32: { - unsigned long val = strtoul(str, &end, 0); - if (val > UINT32_MAX || errno == ERANGE || *end) { - goto invalid; + case UPB_DEFTYPE_ENUMVAL: { + const upb_EnumValueDef* ev = + _upb_DefType_Unpack(v, UPB_DEFTYPE_ENUMVAL); + return upb_EnumDef_File(upb_EnumValueDef_Enum(ev)); } - f->defaultval.uint = val; - break; - } - case kUpb_CType_UInt64: { - unsigned long long val = strtoull(str, &end, 0); - if (val > UINT64_MAX || errno == ERANGE || *end) { - goto invalid; + case UPB_DEFTYPE_SERVICE: { + const upb_ServiceDef* service = + _upb_DefType_Unpack(v, UPB_DEFTYPE_SERVICE); + return upb_ServiceDef_File(service); } - f->defaultval.uint = val; - break; + default: + UPB_UNREACHABLE(); } - case kUpb_CType_Double: { - double val = strtod(str, &end); - if (errno == ERANGE || *end) { - goto invalid; + } + + const char* last_dot = strrchr(name, '.'); + if (last_dot) { + const upb_MessageDef* parent = + upb_DefPool_FindMessageByNameWithSize(s, name, last_dot - name); + if (parent) { + const char* shortname = last_dot + 1; + if (upb_MessageDef_FindByNameWithSize(parent, shortname, + strlen(shortname), NULL, NULL)) { + return upb_MessageDef_File(parent); } - f->defaultval.dbl = val; - break; } - case kUpb_CType_Float: { - float val = strtof(str, &end); - if (errno == ERANGE || *end) { - goto invalid; - } - f->defaultval.flt = val; - break; - } - case kUpb_CType_Bool: { - if (streql2(str, len, "false")) { - f->defaultval.boolean = false; - } else if (streql2(str, len, "true")) { - f->defaultval.boolean = true; - } else { - goto invalid; - } - break; - } - case kUpb_CType_String: - f->defaultval.str = newstr(ctx, str, len); - break; - case kUpb_CType_Bytes: - f->defaultval.str = unescape(ctx, f, str, len); - break; - case kUpb_CType_Message: - /* Should not have a default value. */ - _upb_DefBuilder_Errf(ctx, "Message should not have a default (%s)", - upb_FieldDef_FullName(f)); } - return; - -invalid: - _upb_DefBuilder_Errf(ctx, "Invalid default '%.*s' for field %s of type %d", - (int)len, str, upb_FieldDef_FullName(f), - (int)upb_FieldDef_Type(f)); + return NULL; } -static void set_default_default(upb_DefBuilder* ctx, upb_FieldDef* f) { - switch (upb_FieldDef_CType(f)) { - case kUpb_CType_Int32: - case kUpb_CType_Int64: - f->defaultval.sint = 0; - break; - case kUpb_CType_UInt64: - case kUpb_CType_UInt32: - f->defaultval.uint = 0; - break; - case kUpb_CType_Double: - case kUpb_CType_Float: - f->defaultval.dbl = 0; - break; - case kUpb_CType_String: - case kUpb_CType_Bytes: - f->defaultval.str = newstr(ctx, NULL, 0); - break; - case kUpb_CType_Bool: - f->defaultval.boolean = false; - break; - case kUpb_CType_Enum: { - const upb_EnumValueDef* v = upb_EnumDef_Value(f->sub.enumdef, 0); - f->defaultval.sint = upb_EnumValueDef_Number(v); - break; +static void remove_filedef(upb_DefPool* s, upb_FileDef* file) { + intptr_t iter = UPB_INTTABLE_BEGIN; + upb_StringView key; + upb_value val; + while (upb_strtable_next2(&s->syms, &key, &val, &iter)) { + const upb_FileDef* f; + switch (_upb_DefType_Type(val)) { + case UPB_DEFTYPE_EXT: + f = upb_FieldDef_File(_upb_DefType_Unpack(val, UPB_DEFTYPE_EXT)); + break; + case UPB_DEFTYPE_MSG: + f = upb_MessageDef_File(_upb_DefType_Unpack(val, UPB_DEFTYPE_MSG)); + break; + case UPB_DEFTYPE_ENUM: + f = upb_EnumDef_File(_upb_DefType_Unpack(val, UPB_DEFTYPE_ENUM)); + break; + case UPB_DEFTYPE_ENUMVAL: + f = upb_EnumDef_File(upb_EnumValueDef_Enum( + _upb_DefType_Unpack(val, UPB_DEFTYPE_ENUMVAL))); + break; + case UPB_DEFTYPE_SERVICE: + f = upb_ServiceDef_File(_upb_DefType_Unpack(val, UPB_DEFTYPE_SERVICE)); + break; + default: + UPB_UNREACHABLE(); } - case kUpb_CType_Message: - break; - } -} - -static void _upb_FieldDef_Create(upb_DefBuilder* ctx, const char* prefix, - const UPB_DESC(FieldDescriptorProto) * - field_proto, - upb_MessageDef* m, upb_FieldDef* f) { - // Must happen before _upb_DefBuilder_Add() - f->file = _upb_DefBuilder_File(ctx); - if (!UPB_DESC(FieldDescriptorProto_has_name)(field_proto)) { - _upb_DefBuilder_Errf(ctx, "field has no name"); + if (f == file) upb_strtable_removeiter(&s->syms, &iter); } +} - const upb_StringView name = UPB_DESC(FieldDescriptorProto_name)(field_proto); - - f->full_name = _upb_DefBuilder_MakeFullName(ctx, prefix, name); - f->label_ = (int)UPB_DESC(FieldDescriptorProto_label)(field_proto); - f->number_ = UPB_DESC(FieldDescriptorProto_number)(field_proto); - f->is_proto3_optional = - UPB_DESC(FieldDescriptorProto_proto3_optional)(field_proto); - f->msgdef = m; - f->scope.oneof = NULL; - - f->has_json_name = UPB_DESC(FieldDescriptorProto_has_json_name)(field_proto); - if (f->has_json_name) { - const upb_StringView sv = - UPB_DESC(FieldDescriptorProto_json_name)(field_proto); - f->json_name = upb_strdup2(sv.data, sv.size, ctx->arena); +static const upb_FileDef* upb_DefBuilder_AddFileToPool( + upb_DefBuilder* const builder, upb_DefPool* const s, + const UPB_DESC(FileDescriptorProto) * const file_proto, + const upb_StringView name, upb_Status* const status) { + if (UPB_SETJMP(builder->err) != 0) { + UPB_ASSERT(!upb_Status_IsOk(status)); + if (builder->file) { + remove_filedef(s, builder->file); + builder->file = NULL; + } + } else if (!builder->arena || !builder->tmp_arena) { + _upb_DefBuilder_OomErr(builder); } else { - f->json_name = make_json_name(name.data, name.size, ctx->arena); + _upb_FileDef_Create(builder, file_proto); + upb_strtable_insert(&s->files, name.data, name.size, + upb_value_constptr(builder->file), builder->arena); + UPB_ASSERT(upb_Status_IsOk(status)); + upb_Arena_Fuse(s->arena, builder->arena); } - if (!f->json_name) _upb_DefBuilder_OomErr(ctx); - const bool has_type = UPB_DESC(FieldDescriptorProto_has_type)(field_proto); - const bool has_type_name = - UPB_DESC(FieldDescriptorProto_has_type_name)(field_proto); + if (builder->arena) upb_Arena_Free(builder->arena); + if (builder->tmp_arena) upb_Arena_Free(builder->tmp_arena); + return builder->file; +} - f->type_ = (int)UPB_DESC(FieldDescriptorProto_type)(field_proto); +static const upb_FileDef* _upb_DefPool_AddFile( + upb_DefPool* s, const UPB_DESC(FileDescriptorProto) * file_proto, + const upb_MiniTableFile* layout, upb_Status* status) { + const upb_StringView name = UPB_DESC(FileDescriptorProto_name)(file_proto); - if (has_type) { - switch (f->type_) { - case kUpb_FieldType_Message: - case kUpb_FieldType_Group: - case kUpb_FieldType_Enum: - if (!has_type_name) { - _upb_DefBuilder_Errf(ctx, "field of type %d requires type name (%s)", - (int)f->type_, f->full_name); - } - break; - default: - if (has_type_name) { - _upb_DefBuilder_Errf( - ctx, "invalid type for field with type_name set (%s, %d)", - f->full_name, (int)f->type_); - } + // Determine whether we already know about this file. + { + upb_value v; + if (upb_strtable_lookup2(&s->files, name.data, name.size, &v)) { + upb_Status_SetErrorFormat(status, + "duplicate file name " UPB_STRINGVIEW_FORMAT, + UPB_STRINGVIEW_ARGS(name)); + return NULL; } } - if (!has_type && has_type_name) { - f->type_ = - UPB_FIELD_TYPE_UNSPECIFIED; // We'll assign this in resolve_subdef() - } else { - if (f->type_ < kUpb_FieldType_Double || f->type_ > kUpb_FieldType_SInt64) { - _upb_DefBuilder_Errf(ctx, "invalid type for field %s (%d)", f->full_name, - f->type_); - } - } + upb_DefBuilder ctx = { + .symtab = s, + .layout = layout, + .platform = s->platform, + .msg_count = 0, + .enum_count = 0, + .ext_count = 0, + .status = status, + .file = NULL, + .arena = upb_Arena_New(), + .tmp_arena = upb_Arena_New(), + }; - if (f->label_ < kUpb_Label_Optional || f->label_ > kUpb_Label_Repeated) { - _upb_DefBuilder_Errf(ctx, "invalid label for field %s (%d)", f->full_name, - f->label_); - } + return upb_DefBuilder_AddFileToPool(&ctx, s, file_proto, name, status); +} - /* We can't resolve the subdef or (in the case of extensions) the containing - * message yet, because it may not have been defined yet. We stash a pointer - * to the field_proto until later when we can properly resolve it. */ - f->sub.unresolved = field_proto; +const upb_FileDef* upb_DefPool_AddFile(upb_DefPool* s, + const UPB_DESC(FileDescriptorProto) * + file_proto, + upb_Status* status) { + return _upb_DefPool_AddFile(s, file_proto, NULL, status); +} - if (f->label_ == kUpb_Label_Required && - upb_FileDef_Syntax(f->file) == kUpb_Syntax_Proto3) { - _upb_DefBuilder_Errf(ctx, "proto3 fields cannot be required (%s)", - f->full_name); - } +bool _upb_DefPool_LoadDefInitEx(upb_DefPool* s, const _upb_DefPool_Init* init, + bool rebuild_minitable) { + /* Since this function should never fail (it would indicate a bug in upb) we + * print errors to stderr instead of returning error status to the user. */ + _upb_DefPool_Init** deps = init->deps; + UPB_DESC(FileDescriptorProto) * file; + upb_Arena* arena; + upb_Status status; - if (UPB_DESC(FieldDescriptorProto_has_oneof_index)(field_proto)) { - uint32_t oneof_index = - UPB_DESC(FieldDescriptorProto_oneof_index)(field_proto); + upb_Status_Clear(&status); - if (upb_FieldDef_Label(f) != kUpb_Label_Optional) { - _upb_DefBuilder_Errf(ctx, "fields in oneof must have OPTIONAL label (%s)", - f->full_name); - } + if (upb_DefPool_FindFileByName(s, init->filename)) { + return true; + } - if (!m) { - _upb_DefBuilder_Errf(ctx, "oneof field (%s) has no containing msg", - f->full_name); - } + arena = upb_Arena_New(); - if (oneof_index >= upb_MessageDef_OneofCount(m)) { - _upb_DefBuilder_Errf(ctx, "oneof_index out of range (%s)", f->full_name); - } + for (; *deps; deps++) { + if (!_upb_DefPool_LoadDefInitEx(s, *deps, rebuild_minitable)) goto err; + } - upb_OneofDef* oneof = (upb_OneofDef*)upb_MessageDef_Oneof(m, oneof_index); - f->scope.oneof = oneof; + file = UPB_DESC(FileDescriptorProto_parse_ex)( + init->descriptor.data, init->descriptor.size, NULL, + kUpb_DecodeOption_AliasString, arena); + s->bytes_loaded += init->descriptor.size; - _upb_OneofDef_Insert(ctx, oneof, f, name.data, name.size); + if (!file) { + upb_Status_SetErrorFormat( + &status, + "Failed to parse compiled-in descriptor for file '%s'. This should " + "never happen.", + init->filename); + goto err; } - UPB_DEF_SET_OPTIONS(f->opts, FieldDescriptorProto, FieldOptions, field_proto); + const upb_MiniTableFile* mt = rebuild_minitable ? NULL : init->layout; + if (!_upb_DefPool_AddFile(s, file, mt, &status)) { + goto err; + } - if (UPB_DESC(FieldOptions_has_packed)(f->opts)) { - f->is_packed = UPB_DESC(FieldOptions_packed)(f->opts); - } else { - // Repeated fields default to packed for proto3 only. - f->is_packed = has_type && upb_FieldDef_IsPrimitive(f) && - f->label_ == kUpb_Label_Repeated && - upb_FileDef_Syntax(f->file) == kUpb_Syntax_Proto3; - } + upb_Arena_Free(arena); + return true; - f->has_presence = - (!upb_FieldDef_IsRepeated(f)) && - (f->type_ == kUpb_FieldType_Message || f->type_ == kUpb_FieldType_Group || - upb_FieldDef_ContainingOneof(f) || - (upb_FileDef_Syntax(f->file) == kUpb_Syntax_Proto2)); +err: + fprintf(stderr, + "Error loading compiled-in descriptor for file '%s' (this should " + "never happen): %s\n", + init->filename, upb_Status_ErrorMessage(&status)); + upb_Arena_Free(arena); + return false; } -static void _upb_FieldDef_CreateExt(upb_DefBuilder* ctx, const char* prefix, - const UPB_DESC(FieldDescriptorProto) * - field_proto, - upb_MessageDef* m, upb_FieldDef* f) { - f->is_extension = true; - _upb_FieldDef_Create(ctx, prefix, field_proto, m, f); - - if (UPB_DESC(FieldDescriptorProto_has_oneof_index)(field_proto)) { - _upb_DefBuilder_Errf(ctx, "oneof_index provided for extension field (%s)", - f->full_name); - } - - f->scope.extension_scope = m; - _upb_DefBuilder_Add(ctx, f->full_name, _upb_DefType_Pack(f, UPB_DEFTYPE_EXT)); - f->layout_index = ctx->ext_count++; - - if (ctx->layout) { - UPB_ASSERT(_upb_FieldDef_ExtensionMiniTable(f)->field.number == f->number_); - } +size_t _upb_DefPool_BytesLoaded(const upb_DefPool* s) { + return s->bytes_loaded; } -static void _upb_FieldDef_CreateNotExt(upb_DefBuilder* ctx, const char* prefix, - const UPB_DESC(FieldDescriptorProto) * - field_proto, - upb_MessageDef* m, upb_FieldDef* f) { - f->is_extension = false; - _upb_FieldDef_Create(ctx, prefix, field_proto, m, f); - - if (!UPB_DESC(FieldDescriptorProto_has_oneof_index)(field_proto)) { - if (f->is_proto3_optional) { - _upb_DefBuilder_Errf( - ctx, - "non-extension field (%s) with proto3_optional was not in a oneof", - f->full_name); - } - } +upb_Arena* _upb_DefPool_Arena(const upb_DefPool* s) { return s->arena; } - _upb_MessageDef_InsertField(ctx, m, f); +const upb_FieldDef* upb_DefPool_FindExtensionByMiniTable( + const upb_DefPool* s, const upb_MiniTableExtension* ext) { + upb_value v; + bool ok = upb_inttable_lookup(&s->exts, (uintptr_t)ext, &v); + UPB_ASSERT(ok); + return upb_value_getconstptr(v); } -upb_FieldDef* _upb_Extensions_New( - upb_DefBuilder* ctx, int n, - const UPB_DESC(FieldDescriptorProto) * const* protos, const char* prefix, - upb_MessageDef* m) { - _upb_DefType_CheckPadding(sizeof(upb_FieldDef)); - upb_FieldDef* defs = - (upb_FieldDef*)_upb_DefBuilder_Alloc(ctx, sizeof(upb_FieldDef) * n); +const upb_FieldDef* upb_DefPool_FindExtensionByNumber(const upb_DefPool* s, + const upb_MessageDef* m, + int32_t fieldnum) { + const upb_MiniTable* t = upb_MessageDef_MiniTable(m); + const upb_MiniTableExtension* ext = + upb_ExtensionRegistry_Lookup(s->extreg, t, fieldnum); + return ext ? upb_DefPool_FindExtensionByMiniTable(s, ext) : NULL; +} - for (int i = 0; i < n; i++) { - upb_FieldDef* f = &defs[i]; +const upb_ExtensionRegistry* upb_DefPool_ExtensionRegistry( + const upb_DefPool* s) { + return s->extreg; +} - _upb_FieldDef_CreateExt(ctx, prefix, protos[i], m, f); - f->index_ = i; +const upb_FieldDef** upb_DefPool_GetAllExtensions(const upb_DefPool* s, + const upb_MessageDef* m, + size_t* count) { + size_t n = 0; + intptr_t iter = UPB_INTTABLE_BEGIN; + uintptr_t key; + upb_value val; + // This is O(all exts) instead of O(exts for m). If we need this to be + // efficient we may need to make extreg into a two-level table, or have a + // second per-message index. + while (upb_inttable_next(&s->exts, &key, &val, &iter)) { + const upb_FieldDef* f = upb_value_getconstptr(val); + if (upb_FieldDef_ContainingType(f) == m) n++; } - - return defs; + const upb_FieldDef** exts = malloc(n * sizeof(*exts)); + iter = UPB_INTTABLE_BEGIN; + size_t i = 0; + while (upb_inttable_next(&s->exts, &key, &val, &iter)) { + const upb_FieldDef* f = upb_value_getconstptr(val); + if (upb_FieldDef_ContainingType(f) == m) exts[i++] = f; + } + *count = n; + return exts; } -upb_FieldDef* _upb_FieldDefs_New( - upb_DefBuilder* ctx, int n, - const UPB_DESC(FieldDescriptorProto) * const* protos, const char* prefix, - upb_MessageDef* m, bool* is_sorted) { - _upb_DefType_CheckPadding(sizeof(upb_FieldDef)); - upb_FieldDef* defs = - (upb_FieldDef*)_upb_DefBuilder_Alloc(ctx, sizeof(upb_FieldDef) * n); - - uint32_t previous = 0; - for (int i = 0; i < n; i++) { - upb_FieldDef* f = &defs[i]; +bool _upb_DefPool_LoadDefInit(upb_DefPool* s, const _upb_DefPool_Init* init) { + return _upb_DefPool_LoadDefInitEx(s, init, false); +} - _upb_FieldDef_CreateNotExt(ctx, prefix, protos[i], m, f); - f->index_ = i; - if (!ctx->layout) { - // Speculate that the def fields are sorted. We will always sort the - // MiniTable fields, so if defs are sorted then indices will match. - // - // If this is incorrect, we will overwrite later. - f->layout_index = i; - } - const uint32_t current = f->number_; - if (previous > current) *is_sorted = false; - previous = current; - } +// Must be last. - return defs; +upb_deftype_t _upb_DefType_Type(upb_value v) { + const uintptr_t num = (uintptr_t)upb_value_getconstptr(v); + return num & UPB_DEFTYPE_MASK; } -static void resolve_subdef(upb_DefBuilder* ctx, const char* prefix, - upb_FieldDef* f) { - const UPB_DESC(FieldDescriptorProto)* field_proto = f->sub.unresolved; - upb_StringView name = UPB_DESC(FieldDescriptorProto_type_name)(field_proto); - bool has_name = UPB_DESC(FieldDescriptorProto_has_type_name)(field_proto); - switch ((int)f->type_) { - case UPB_FIELD_TYPE_UNSPECIFIED: { - // Type was not specified and must be inferred. - UPB_ASSERT(has_name); - upb_deftype_t type; - const void* def = - _upb_DefBuilder_ResolveAny(ctx, f->full_name, prefix, name, &type); - switch (type) { - case UPB_DEFTYPE_ENUM: - f->sub.enumdef = def; - f->type_ = kUpb_FieldType_Enum; - if (!UPB_DESC(FieldOptions_has_packed)(f->opts)) { - f->is_packed = f->label_ == kUpb_Label_Repeated && - upb_FileDef_Syntax(f->file) == kUpb_Syntax_Proto3; - } - break; - case UPB_DEFTYPE_MSG: - f->sub.msgdef = def; - f->type_ = kUpb_FieldType_Message; // It appears there is no way of - // this being a group. - f->has_presence = !upb_FieldDef_IsRepeated(f); - break; - default: - _upb_DefBuilder_Errf(ctx, "Couldn't resolve type name for field %s", - f->full_name); - } - break; - } - case kUpb_FieldType_Message: - case kUpb_FieldType_Group: - UPB_ASSERT(has_name); - f->sub.msgdef = _upb_DefBuilder_Resolve(ctx, f->full_name, prefix, name, - UPB_DEFTYPE_MSG); - break; - case kUpb_FieldType_Enum: - UPB_ASSERT(has_name); - f->sub.enumdef = _upb_DefBuilder_Resolve(ctx, f->full_name, prefix, name, - UPB_DEFTYPE_ENUM); - break; - default: - // No resolution necessary. - break; - } +upb_value _upb_DefType_Pack(const void* ptr, upb_deftype_t type) { + uintptr_t num = (uintptr_t)ptr; + UPB_ASSERT((num & UPB_DEFTYPE_MASK) == 0); + num |= type; + return upb_value_constptr((const void*)num); } -static int _upb_FieldDef_Compare(const void* p1, const void* p2) { - const uint32_t v1 = (*(upb_FieldDef**)p1)->number_; - const uint32_t v2 = (*(upb_FieldDef**)p2)->number_; - return (v1 < v2) ? -1 : (v1 > v2); +const void* _upb_DefType_Unpack(upb_value v, upb_deftype_t type) { + uintptr_t num = (uintptr_t)upb_value_getconstptr(v); + return (num & UPB_DEFTYPE_MASK) == type + ? (const void*)(num & ~UPB_DEFTYPE_MASK) + : NULL; } -// _upb_FieldDefs_Sorted() is mostly a pure function of its inputs, but has one -// critical side effect that we depend on: it sets layout_index appropriately -// for non-sorted lists of fields. -const upb_FieldDef** _upb_FieldDefs_Sorted(const upb_FieldDef* f, int n, - upb_Arena* a) { - // TODO(salo): Replace this arena alloc with a persistent scratch buffer. - upb_FieldDef** out = (upb_FieldDef**)upb_Arena_Malloc(a, n * sizeof(void*)); - if (!out) return NULL; - for (int i = 0; i < n; i++) { - out[i] = (upb_FieldDef*)&f[i]; - } - qsort(out, n, sizeof(void*), _upb_FieldDef_Compare); +// Must be last. - for (int i = 0; i < n; i++) { - out[i]->layout_index = i; +bool _upb_DescState_Grow(upb_DescState* d, upb_Arena* a) { + const size_t oldbufsize = d->bufsize; + const int used = d->ptr - d->buf; + + if (!d->buf) { + d->buf = upb_Arena_Malloc(a, d->bufsize); + if (!d->buf) return false; + d->ptr = d->buf; + d->e.end = d->buf + d->bufsize; } - return (const upb_FieldDef**)out; -} -bool upb_FieldDef_MiniDescriptorEncode(const upb_FieldDef* f, upb_Arena* a, - upb_StringView* out) { - UPB_ASSERT(f->is_extension); + if (oldbufsize - used < kUpb_MtDataEncoder_MinSize) { + d->bufsize *= 2; + d->buf = upb_Arena_Realloc(a, d->buf, oldbufsize, d->bufsize); + if (!d->buf) return false; + d->ptr = d->buf + used; + d->e.end = d->buf + d->bufsize; + } - upb_DescState s; - _upb_DescState_Init(&s); + return true; +} - const int number = upb_FieldDef_Number(f); - const uint64_t modifiers = _upb_FieldDef_Modifiers(f); - if (!_upb_DescState_Grow(&s, a)) return false; - s.ptr = upb_MtDataEncoder_EncodeExtension(&s.e, s.ptr, f->type_, number, - modifiers); - *s.ptr = '\0'; - out->data = s.buf; - out->size = s.ptr - s.buf; - return true; -} +// Must be last. -static void resolve_extension(upb_DefBuilder* ctx, const char* prefix, - upb_FieldDef* f, - const UPB_DESC(FieldDescriptorProto) * - field_proto) { - if (!UPB_DESC(FieldDescriptorProto_has_extendee)(field_proto)) { - _upb_DefBuilder_Errf(ctx, "extension for field '%s' had no extendee", - f->full_name); - } - - upb_StringView name = UPB_DESC(FieldDescriptorProto_extendee)(field_proto); - const upb_MessageDef* m = - _upb_DefBuilder_Resolve(ctx, f->full_name, prefix, name, UPB_DEFTYPE_MSG); - f->msgdef = m; +struct upb_EnumDef { + const UPB_DESC(EnumOptions) * opts; + const upb_MiniTableEnum* layout; // Only for proto2. + const upb_FileDef* file; + const upb_MessageDef* containing_type; // Could be merged with "file". + const char* full_name; + upb_strtable ntoi; + upb_inttable iton; + const upb_EnumValueDef* values; + const upb_EnumReservedRange* res_ranges; + const upb_StringView* res_names; + int value_count; + int res_range_count; + int res_name_count; + int32_t defaultval; + bool is_closed; + bool is_sorted; // Whether all of the values are defined in ascending order. +}; - if (!_upb_MessageDef_IsValidExtensionNumber(m, f->number_)) { - _upb_DefBuilder_Errf( - ctx, - "field number %u in extension %s has no extension range in message %s", - (unsigned)f->number_, f->full_name, upb_MessageDef_FullName(m)); - } +upb_EnumDef* _upb_EnumDef_At(const upb_EnumDef* e, int i) { + return (upb_EnumDef*)&e[i]; } -void _upb_FieldDef_BuildMiniTableExtension(upb_DefBuilder* ctx, - const upb_FieldDef* f) { - const upb_MiniTableExtension* ext = _upb_FieldDef_ExtensionMiniTable(f); - - if (ctx->layout) { - UPB_ASSERT(upb_FieldDef_Number(f) == ext->field.number); - } else { - upb_StringView desc; - if (!upb_FieldDef_MiniDescriptorEncode(f, ctx->tmp_arena, &desc)) { - _upb_DefBuilder_OomErr(ctx); - } - - upb_MiniTableExtension* mut_ext = (upb_MiniTableExtension*)ext; - upb_MiniTableSub sub = {NULL}; - if (upb_FieldDef_IsSubMessage(f)) { - sub.submsg = upb_MessageDef_MiniTable(f->sub.msgdef); - } else if (_upb_FieldDef_IsClosedEnum(f)) { - sub.subenum = _upb_EnumDef_MiniTable(f->sub.enumdef); - } - bool ok2 = upb_MiniTableExtension_Init(desc.data, desc.size, mut_ext, - upb_MessageDef_MiniTable(f->msgdef), - sub, ctx->status); - if (!ok2) _upb_DefBuilder_Errf(ctx, "Could not build extension mini table"); - } - - bool ok = _upb_DefPool_InsertExt(ctx->symtab, ext, f); - if (!ok) _upb_DefBuilder_OomErr(ctx); +const upb_MiniTableEnum* _upb_EnumDef_MiniTable(const upb_EnumDef* e) { + return e->layout; } -static void resolve_default(upb_DefBuilder* ctx, upb_FieldDef* f, - const UPB_DESC(FieldDescriptorProto) * - field_proto) { - // Have to delay resolving of the default value until now because of the enum - // case, since enum defaults are specified with a label. - if (UPB_DESC(FieldDescriptorProto_has_default_value)(field_proto)) { - upb_StringView defaultval = - UPB_DESC(FieldDescriptorProto_default_value)(field_proto); - - if (upb_FileDef_Syntax(f->file) == kUpb_Syntax_Proto3) { - _upb_DefBuilder_Errf(ctx, - "proto3 fields cannot have explicit defaults (%s)", - f->full_name); - } - - if (upb_FieldDef_IsSubMessage(f)) { - _upb_DefBuilder_Errf(ctx, - "message fields cannot have explicit defaults (%s)", - f->full_name); - } +bool _upb_EnumDef_Insert(upb_EnumDef* e, upb_EnumValueDef* v, upb_Arena* a) { + const char* name = upb_EnumValueDef_Name(v); + const upb_value val = upb_value_constptr(v); + bool ok = upb_strtable_insert(&e->ntoi, name, strlen(name), val, a); + if (!ok) return false; - parse_default(ctx, defaultval.data, defaultval.size, f); - f->has_default = true; - } else { - set_default_default(ctx, f); - f->has_default = false; + // Multiple enumerators can have the same number, first one wins. + const int number = upb_EnumValueDef_Number(v); + if (!upb_inttable_lookup(&e->iton, number, NULL)) { + return upb_inttable_insert(&e->iton, number, val, a); } + return true; } -void _upb_FieldDef_Resolve(upb_DefBuilder* ctx, const char* prefix, - upb_FieldDef* f) { - // We have to stash this away since resolve_subdef() may overwrite it. - const UPB_DESC(FieldDescriptorProto)* field_proto = f->sub.unresolved; - - resolve_subdef(ctx, prefix, f); - resolve_default(ctx, f, field_proto); - - if (f->is_extension) { - resolve_extension(ctx, prefix, f, field_proto); - } +const UPB_DESC(EnumOptions) * upb_EnumDef_Options(const upb_EnumDef* e) { + return e->opts; } +bool upb_EnumDef_HasOptions(const upb_EnumDef* e) { + return e->opts != (void*)kUpbDefOptDefault; +} -// Must be last. - -struct upb_FileDef { - const UPB_DESC(FileOptions) * opts; - const char* name; - const char* package; - const char* edition; +const char* upb_EnumDef_FullName(const upb_EnumDef* e) { return e->full_name; } - const upb_FileDef** deps; - const int32_t* public_deps; - const int32_t* weak_deps; - const upb_MessageDef* top_lvl_msgs; - const upb_EnumDef* top_lvl_enums; - const upb_FieldDef* top_lvl_exts; - const upb_ServiceDef* services; - const upb_MiniTableExtension** ext_layouts; - const upb_DefPool* symtab; +const char* upb_EnumDef_Name(const upb_EnumDef* e) { + return _upb_DefBuilder_FullToShort(e->full_name); +} - int dep_count; - int public_dep_count; - int weak_dep_count; - int top_lvl_msg_count; - int top_lvl_enum_count; - int top_lvl_ext_count; - int service_count; - int ext_count; // All exts in the file. - upb_Syntax syntax; -}; +const upb_FileDef* upb_EnumDef_File(const upb_EnumDef* e) { return e->file; } -const UPB_DESC(FileOptions) * upb_FileDef_Options(const upb_FileDef* f) { - return f->opts; +const upb_MessageDef* upb_EnumDef_ContainingType(const upb_EnumDef* e) { + return e->containing_type; } -bool upb_FileDef_HasOptions(const upb_FileDef* f) { - return f->opts != (void*)kUpbDefOptDefault; +int32_t upb_EnumDef_Default(const upb_EnumDef* e) { + UPB_ASSERT(upb_EnumDef_FindValueByNumber(e, e->defaultval)); + return e->defaultval; } -const char* upb_FileDef_Name(const upb_FileDef* f) { return f->name; } - -const char* upb_FileDef_Package(const upb_FileDef* f) { - return f->package ? f->package : ""; +int upb_EnumDef_ReservedRangeCount(const upb_EnumDef* e) { + return e->res_range_count; } -const char* upb_FileDef_Edition(const upb_FileDef* f) { - return f->edition ? f->edition : ""; +const upb_EnumReservedRange* upb_EnumDef_ReservedRange(const upb_EnumDef* e, + int i) { + UPB_ASSERT(0 <= i && i < e->res_range_count); + return _upb_EnumReservedRange_At(e->res_ranges, i); } -const char* _upb_FileDef_RawPackage(const upb_FileDef* f) { return f->package; } - -upb_Syntax upb_FileDef_Syntax(const upb_FileDef* f) { return f->syntax; } +int upb_EnumDef_ReservedNameCount(const upb_EnumDef* e) { + return e->res_name_count; +} -int upb_FileDef_TopLevelMessageCount(const upb_FileDef* f) { - return f->top_lvl_msg_count; +upb_StringView upb_EnumDef_ReservedName(const upb_EnumDef* e, int i) { + UPB_ASSERT(0 <= i && i < e->res_name_count); + return e->res_names[i]; } -int upb_FileDef_DependencyCount(const upb_FileDef* f) { return f->dep_count; } +int upb_EnumDef_ValueCount(const upb_EnumDef* e) { return e->value_count; } -int upb_FileDef_PublicDependencyCount(const upb_FileDef* f) { - return f->public_dep_count; +const upb_EnumValueDef* upb_EnumDef_FindValueByName(const upb_EnumDef* e, + const char* name) { + return upb_EnumDef_FindValueByNameWithSize(e, name, strlen(name)); } -int upb_FileDef_WeakDependencyCount(const upb_FileDef* f) { - return f->weak_dep_count; +const upb_EnumValueDef* upb_EnumDef_FindValueByNameWithSize( + const upb_EnumDef* e, const char* name, size_t size) { + upb_value v; + return upb_strtable_lookup2(&e->ntoi, name, size, &v) + ? upb_value_getconstptr(v) + : NULL; } -const int32_t* _upb_FileDef_PublicDependencyIndexes(const upb_FileDef* f) { - return f->public_deps; +const upb_EnumValueDef* upb_EnumDef_FindValueByNumber(const upb_EnumDef* e, + int32_t num) { + upb_value v; + return upb_inttable_lookup(&e->iton, num, &v) ? upb_value_getconstptr(v) + : NULL; } -const int32_t* _upb_FileDef_WeakDependencyIndexes(const upb_FileDef* f) { - return f->weak_deps; +bool upb_EnumDef_CheckNumber(const upb_EnumDef* e, int32_t num) { + // We could use upb_EnumDef_FindValueByNumber(e, num) != NULL, but we expect + // this to be faster (especially for small numbers). + return upb_MiniTableEnum_CheckValue(e->layout, num); } -int upb_FileDef_TopLevelEnumCount(const upb_FileDef* f) { - return f->top_lvl_enum_count; +const upb_EnumValueDef* upb_EnumDef_Value(const upb_EnumDef* e, int i) { + UPB_ASSERT(0 <= i && i < e->value_count); + return _upb_EnumValueDef_At(e->values, i); } -int upb_FileDef_TopLevelExtensionCount(const upb_FileDef* f) { - return f->top_lvl_ext_count; -} +bool upb_EnumDef_IsClosed(const upb_EnumDef* e) { return e->is_closed; } -int upb_FileDef_ServiceCount(const upb_FileDef* f) { return f->service_count; } +bool upb_EnumDef_MiniDescriptorEncode(const upb_EnumDef* e, upb_Arena* a, + upb_StringView* out) { + upb_DescState s; + _upb_DescState_Init(&s); -const upb_FileDef* upb_FileDef_Dependency(const upb_FileDef* f, int i) { - UPB_ASSERT(0 <= i && i < f->dep_count); - return f->deps[i]; -} + const upb_EnumValueDef** sorted = NULL; + if (!e->is_sorted) { + sorted = _upb_EnumValueDefs_Sorted(e->values, e->value_count, a); + if (!sorted) return false; + } -const upb_FileDef* upb_FileDef_PublicDependency(const upb_FileDef* f, int i) { - UPB_ASSERT(0 <= i && i < f->public_dep_count); - return f->deps[f->public_deps[i]]; -} + if (!_upb_DescState_Grow(&s, a)) return false; + s.ptr = upb_MtDataEncoder_StartEnum(&s.e, s.ptr); -const upb_FileDef* upb_FileDef_WeakDependency(const upb_FileDef* f, int i) { - UPB_ASSERT(0 <= i && i < f->public_dep_count); - return f->deps[f->weak_deps[i]]; -} + // Duplicate values are allowed but we only encode each value once. + uint32_t previous = 0; -const upb_MessageDef* upb_FileDef_TopLevelMessage(const upb_FileDef* f, int i) { - UPB_ASSERT(0 <= i && i < f->top_lvl_msg_count); - return _upb_MessageDef_At(f->top_lvl_msgs, i); -} + for (int i = 0; i < e->value_count; i++) { + const uint32_t current = + upb_EnumValueDef_Number(sorted ? sorted[i] : upb_EnumDef_Value(e, i)); + if (i != 0 && previous == current) continue; -const upb_EnumDef* upb_FileDef_TopLevelEnum(const upb_FileDef* f, int i) { - UPB_ASSERT(0 <= i && i < f->top_lvl_enum_count); - return _upb_EnumDef_At(f->top_lvl_enums, i); -} + if (!_upb_DescState_Grow(&s, a)) return false; + s.ptr = upb_MtDataEncoder_PutEnumValue(&s.e, s.ptr, current); + previous = current; + } -const upb_FieldDef* upb_FileDef_TopLevelExtension(const upb_FileDef* f, int i) { - UPB_ASSERT(0 <= i && i < f->top_lvl_ext_count); - return _upb_FieldDef_At(f->top_lvl_exts, i); -} + if (!_upb_DescState_Grow(&s, a)) return false; + s.ptr = upb_MtDataEncoder_EndEnum(&s.e, s.ptr); -const upb_ServiceDef* upb_FileDef_Service(const upb_FileDef* f, int i) { - UPB_ASSERT(0 <= i && i < f->service_count); - return _upb_ServiceDef_At(f->services, i); + // There will always be room for this '\0' in the encoder buffer because + // kUpb_MtDataEncoder_MinSize is overkill for upb_MtDataEncoder_EndEnum(). + UPB_ASSERT(s.ptr < s.buf + s.bufsize); + *s.ptr = '\0'; + + out->data = s.buf; + out->size = s.ptr - s.buf; + return true; } -const upb_DefPool* upb_FileDef_Pool(const upb_FileDef* f) { return f->symtab; } +static upb_MiniTableEnum* create_enumlayout(upb_DefBuilder* ctx, + const upb_EnumDef* e) { + upb_StringView sv; + bool ok = upb_EnumDef_MiniDescriptorEncode(e, ctx->tmp_arena, &sv); + if (!ok) _upb_DefBuilder_Errf(ctx, "OOM while building enum MiniDescriptor"); -const upb_MiniTableExtension* _upb_FileDef_ExtensionMiniTable( - const upb_FileDef* f, int i) { - return f->ext_layouts[i]; + upb_Status status; + upb_MiniTableEnum* layout = + upb_MiniTableEnum_Build(sv.data, sv.size, ctx->arena, &status); + if (!layout) + _upb_DefBuilder_Errf(ctx, "Error building enum MiniTable: %s", status.msg); + return layout; } -static char* strviewdup(upb_DefBuilder* ctx, upb_StringView view) { - char* ret = upb_strdup2(view.data, view.size, _upb_DefBuilder_Arena(ctx)); - if (!ret) _upb_DefBuilder_OomErr(ctx); - return ret; +static upb_StringView* _upb_EnumReservedNames_New( + upb_DefBuilder* ctx, int n, const upb_StringView* protos) { + upb_StringView* sv = _upb_DefBuilder_Alloc(ctx, sizeof(upb_StringView) * n); + for (int i = 0; i < n; i++) { + sv[i].data = + upb_strdup2(protos[i].data, protos[i].size, _upb_DefBuilder_Arena(ctx)); + sv[i].size = protos[i].size; + } + return sv; } -static bool streql_view(upb_StringView view, const char* b) { - return view.size == strlen(b) && memcmp(view.data, b, view.size) == 0; -} +static void create_enumdef(upb_DefBuilder* ctx, const char* prefix, + const UPB_DESC(EnumDescriptorProto) * enum_proto, + upb_EnumDef* e) { + const UPB_DESC(EnumValueDescriptorProto)* const* values; + const UPB_DESC(EnumDescriptorProto_EnumReservedRange)* const* res_ranges; + const upb_StringView* res_names; + upb_StringView name; + size_t n_value, n_res_range, n_res_name; -static int count_exts_in_msg(const UPB_DESC(DescriptorProto) * msg_proto) { - size_t n; - UPB_DESC(DescriptorProto_extension)(msg_proto, &n); - int ext_count = n; + // Must happen before _upb_DefBuilder_Add() + e->file = _upb_DefBuilder_File(ctx); - const UPB_DESC(DescriptorProto)* const* nested_msgs = - UPB_DESC(DescriptorProto_nested_type)(msg_proto, &n); - for (size_t i = 0; i < n; i++) { - ext_count += count_exts_in_msg(nested_msgs[i]); - } + name = UPB_DESC(EnumDescriptorProto_name)(enum_proto); - return ext_count; -} + e->full_name = _upb_DefBuilder_MakeFullName(ctx, prefix, name); + _upb_DefBuilder_Add(ctx, e->full_name, + _upb_DefType_Pack(e, UPB_DEFTYPE_ENUM)); -// Allocate and initialize one file def, and add it to the context object. -void _upb_FileDef_Create(upb_DefBuilder* ctx, - const UPB_DESC(FileDescriptorProto) * file_proto) { - upb_FileDef* file = _upb_DefBuilder_Alloc(ctx, sizeof(upb_FileDef)); - ctx->file = file; + e->is_closed = (!UPB_TREAT_PROTO2_ENUMS_LIKE_PROTO3) && + (upb_FileDef_Syntax(e->file) == kUpb_Syntax_Proto2); - const UPB_DESC(DescriptorProto)* const* msgs; - const UPB_DESC(EnumDescriptorProto)* const* enums; - const UPB_DESC(FieldDescriptorProto)* const* exts; - const UPB_DESC(ServiceDescriptorProto)* const* services; - const upb_StringView* strs; - const int32_t* public_deps; - const int32_t* weak_deps; - size_t n; + values = UPB_DESC(EnumDescriptorProto_value)(enum_proto, &n_value); - file->symtab = ctx->symtab; + bool ok = upb_strtable_init(&e->ntoi, n_value, ctx->arena); + if (!ok) _upb_DefBuilder_OomErr(ctx); - // Count all extensions in the file, to build a flat array of layouts. - UPB_DESC(FileDescriptorProto_extension)(file_proto, &n); - int ext_count = n; - msgs = UPB_DESC(FileDescriptorProto_message_type)(file_proto, &n); - for (int i = 0; i < n; i++) { - ext_count += count_exts_in_msg(msgs[i]); - } - file->ext_count = ext_count; + ok = upb_inttable_init(&e->iton, ctx->arena); + if (!ok) _upb_DefBuilder_OomErr(ctx); - if (ctx->layout) { - // We are using the ext layouts that were passed in. - file->ext_layouts = ctx->layout->exts; - if (ctx->layout->ext_count != file->ext_count) { - _upb_DefBuilder_Errf(ctx, - "Extension count did not match layout (%d vs %d)", - ctx->layout->ext_count, file->ext_count); - } - } else { - // We are building ext layouts from scratch. - file->ext_layouts = _upb_DefBuilder_Alloc( - ctx, sizeof(*file->ext_layouts) * file->ext_count); - upb_MiniTableExtension* ext = - _upb_DefBuilder_Alloc(ctx, sizeof(*ext) * file->ext_count); - for (int i = 0; i < file->ext_count; i++) { - file->ext_layouts[i] = &ext[i]; - } - } + e->defaultval = 0; + e->value_count = n_value; + e->values = + _upb_EnumValueDefs_New(ctx, prefix, n_value, values, e, &e->is_sorted); - upb_StringView name = UPB_DESC(FileDescriptorProto_name)(file_proto); - file->name = strviewdup(ctx, name); - if (strlen(file->name) != name.size) { - _upb_DefBuilder_Errf(ctx, "File name contained embedded NULL"); + if (n_value == 0) { + _upb_DefBuilder_Errf(ctx, "enums must contain at least one value (%s)", + e->full_name); } - upb_StringView package = UPB_DESC(FileDescriptorProto_package)(file_proto); - - if (package.size) { - _upb_DefBuilder_CheckIdentFull(ctx, package); - file->package = strviewdup(ctx, package); - } else { - file->package = NULL; - } + res_ranges = + UPB_DESC(EnumDescriptorProto_reserved_range)(enum_proto, &n_res_range); + e->res_range_count = n_res_range; + e->res_ranges = _upb_EnumReservedRanges_New(ctx, n_res_range, res_ranges, e); - upb_StringView edition = UPB_DESC(FileDescriptorProto_edition)(file_proto); + res_names = + UPB_DESC(EnumDescriptorProto_reserved_name)(enum_proto, &n_res_name); + e->res_name_count = n_res_name; + e->res_names = _upb_EnumReservedNames_New(ctx, n_res_name, res_names); - if (edition.size == 0) { - file->edition = NULL; - } else { - // TODO(b/267770604): How should we validate this? - file->edition = strviewdup(ctx, edition); - if (strlen(file->edition) != edition.size) { - _upb_DefBuilder_Errf(ctx, "Edition name contained embedded NULL"); - } - } + UPB_DEF_SET_OPTIONS(e->opts, EnumDescriptorProto, EnumOptions, enum_proto); - if (UPB_DESC(FileDescriptorProto_has_syntax)(file_proto)) { - upb_StringView syntax = UPB_DESC(FileDescriptorProto_syntax)(file_proto); + upb_inttable_compact(&e->iton, ctx->arena); - if (streql_view(syntax, "proto2")) { - file->syntax = kUpb_Syntax_Proto2; - } else if (streql_view(syntax, "proto3")) { - file->syntax = kUpb_Syntax_Proto3; + if (e->is_closed) { + if (ctx->layout) { + UPB_ASSERT(ctx->enum_count < ctx->layout->enum_count); + e->layout = ctx->layout->enums[ctx->enum_count++]; } else { - _upb_DefBuilder_Errf(ctx, "Invalid syntax '" UPB_STRINGVIEW_FORMAT "'", - UPB_STRINGVIEW_ARGS(syntax)); + e->layout = create_enumlayout(ctx, e); } } else { - file->syntax = kUpb_Syntax_Proto2; + e->layout = NULL; } +} - // Read options. - UPB_DEF_SET_OPTIONS(file->opts, FileDescriptorProto, FileOptions, file_proto); +upb_EnumDef* _upb_EnumDefs_New( + upb_DefBuilder* ctx, int n, + const UPB_DESC(EnumDescriptorProto) * const* protos, + const upb_MessageDef* containing_type) { + _upb_DefType_CheckPadding(sizeof(upb_EnumDef)); - // Verify dependencies. - strs = UPB_DESC(FileDescriptorProto_dependency)(file_proto, &n); - file->dep_count = n; - file->deps = _upb_DefBuilder_Alloc(ctx, sizeof(*file->deps) * n); + // If a containing type is defined then get the full name from that. + // Otherwise use the package name from the file def. + const char* name = containing_type ? upb_MessageDef_FullName(containing_type) + : _upb_FileDef_RawPackage(ctx->file); - for (size_t i = 0; i < n; i++) { - upb_StringView str = strs[i]; - file->deps[i] = - upb_DefPool_FindFileByNameWithSize(ctx->symtab, str.data, str.size); - if (!file->deps[i]) { - _upb_DefBuilder_Errf(ctx, - "Depends on file '" UPB_STRINGVIEW_FORMAT - "', but it has not been loaded", - UPB_STRINGVIEW_ARGS(str)); - } + upb_EnumDef* e = _upb_DefBuilder_Alloc(ctx, sizeof(upb_EnumDef) * n); + for (int i = 0; i < n; i++) { + create_enumdef(ctx, name, protos[i], &e[i]); + e[i].containing_type = containing_type; } + return e; +} - public_deps = UPB_DESC(FileDescriptorProto_public_dependency)(file_proto, &n); - file->public_dep_count = n; - file->public_deps = - _upb_DefBuilder_Alloc(ctx, sizeof(*file->public_deps) * n); - int32_t* mutable_public_deps = (int32_t*)file->public_deps; - for (size_t i = 0; i < n; i++) { - if (public_deps[i] >= file->dep_count) { - _upb_DefBuilder_Errf(ctx, "public_dep %d is out of range", - (int)public_deps[i]); - } - mutable_public_deps[i] = public_deps[i]; - } - weak_deps = UPB_DESC(FileDescriptorProto_weak_dependency)(file_proto, &n); - file->weak_dep_count = n; - file->weak_deps = _upb_DefBuilder_Alloc(ctx, sizeof(*file->weak_deps) * n); - int32_t* mutable_weak_deps = (int32_t*)file->weak_deps; - for (size_t i = 0; i < n; i++) { - if (weak_deps[i] >= file->dep_count) { - _upb_DefBuilder_Errf(ctx, "weak_dep %d is out of range", - (int)weak_deps[i]); - } - mutable_weak_deps[i] = weak_deps[i]; - } - // Create enums. - enums = UPB_DESC(FileDescriptorProto_enum_type)(file_proto, &n); - file->top_lvl_enum_count = n; - file->top_lvl_enums = _upb_EnumDefs_New(ctx, n, enums, NULL); +// Must be last. - // Create extensions. - exts = UPB_DESC(FileDescriptorProto_extension)(file_proto, &n); - file->top_lvl_ext_count = n; - file->top_lvl_exts = _upb_Extensions_New(ctx, n, exts, file->package, NULL); +struct upb_EnumReservedRange { + int32_t start; + int32_t end; +}; - // Create messages. - msgs = UPB_DESC(FileDescriptorProto_message_type)(file_proto, &n); - file->top_lvl_msg_count = n; - file->top_lvl_msgs = _upb_MessageDefs_New(ctx, n, msgs, NULL); +upb_EnumReservedRange* _upb_EnumReservedRange_At(const upb_EnumReservedRange* r, + int i) { + return (upb_EnumReservedRange*)&r[i]; +} - // Create services. - services = UPB_DESC(FileDescriptorProto_service)(file_proto, &n); - file->service_count = n; - file->services = _upb_ServiceDefs_New(ctx, n, services); - - // Now that all names are in the table, build layouts and resolve refs. +int32_t upb_EnumReservedRange_Start(const upb_EnumReservedRange* r) { + return r->start; +} +int32_t upb_EnumReservedRange_End(const upb_EnumReservedRange* r) { + return r->end; +} - for (int i = 0; i < file->top_lvl_msg_count; i++) { - upb_MessageDef* m = (upb_MessageDef*)upb_FileDef_TopLevelMessage(file, i); - _upb_MessageDef_Resolve(ctx, m); - } +upb_EnumReservedRange* _upb_EnumReservedRanges_New( + upb_DefBuilder* ctx, int n, + const UPB_DESC(EnumDescriptorProto_EnumReservedRange) * const* protos, + const upb_EnumDef* e) { + upb_EnumReservedRange* r = + _upb_DefBuilder_Alloc(ctx, sizeof(upb_EnumReservedRange) * n); - for (int i = 0; i < file->top_lvl_ext_count; i++) { - upb_FieldDef* f = (upb_FieldDef*)upb_FileDef_TopLevelExtension(file, i); - _upb_FieldDef_Resolve(ctx, file->package, f); - } + for (int i = 0; i < n; i++) { + const int32_t start = + UPB_DESC(EnumDescriptorProto_EnumReservedRange_start)(protos[i]); + const int32_t end = + UPB_DESC(EnumDescriptorProto_EnumReservedRange_end)(protos[i]); - for (int i = 0; i < file->top_lvl_msg_count; i++) { - upb_MessageDef* m = (upb_MessageDef*)upb_FileDef_TopLevelMessage(file, i); - _upb_MessageDef_CreateMiniTable(ctx, (upb_MessageDef*)m); - } + // A full validation would also check that each range is disjoint, and that + // none of the fields overlap with the extension ranges, but we are just + // sanity checking here. - for (int i = 0; i < file->top_lvl_ext_count; i++) { - upb_FieldDef* f = (upb_FieldDef*)upb_FileDef_TopLevelExtension(file, i); - _upb_FieldDef_BuildMiniTableExtension(ctx, f); - } + // Note: Not a typo! Unlike extension ranges and message reserved ranges, + // the end value of an enum reserved range is *inclusive*! + if (end < start) { + _upb_DefBuilder_Errf(ctx, "Reserved range (%d, %d) is invalid, enum=%s\n", + (int)start, (int)end, upb_EnumDef_FullName(e)); + } - for (int i = 0; i < file->top_lvl_msg_count; i++) { - upb_MessageDef* m = (upb_MessageDef*)upb_FileDef_TopLevelMessage(file, i); - _upb_MessageDef_LinkMiniTable(ctx, m); + r[i].start = start; + r[i].end = end; } - if (file->ext_count) { - bool ok = upb_ExtensionRegistry_AddArray( - _upb_DefPool_ExtReg(ctx->symtab), file->ext_layouts, file->ext_count); - if (!ok) _upb_DefBuilder_OomErr(ctx); - } + return r; } -#include - // Must be last. -bool upb_Message_HasFieldByDef(const upb_Message* msg, const upb_FieldDef* f) { - UPB_ASSERT(upb_FieldDef_HasPresence(f)); - return upb_Message_HasField(msg, upb_FieldDef_MiniTable(f)); -} +struct upb_EnumValueDef { + const UPB_DESC(EnumValueOptions) * opts; + const upb_EnumDef* parent; + const char* full_name; + int32_t number; +}; -const upb_FieldDef* upb_Message_WhichOneof(const upb_Message* msg, - const upb_OneofDef* o) { - const upb_FieldDef* f = upb_OneofDef_Field(o, 0); - if (upb_OneofDef_IsSynthetic(o)) { - UPB_ASSERT(upb_OneofDef_FieldCount(o) == 1); - return upb_Message_HasFieldByDef(msg, f) ? f : NULL; - } else { - const upb_MiniTableField* field = upb_FieldDef_MiniTable(f); - uint32_t oneof_case = upb_Message_WhichOneofFieldNumber(msg, field); - f = oneof_case ? upb_OneofDef_LookupNumber(o, oneof_case) : NULL; - UPB_ASSERT((f != NULL) == (oneof_case != 0)); - return f; - } +upb_EnumValueDef* _upb_EnumValueDef_At(const upb_EnumValueDef* v, int i) { + return (upb_EnumValueDef*)&v[i]; } -upb_MessageValue upb_Message_GetFieldByDef(const upb_Message* msg, - const upb_FieldDef* f) { - upb_MessageValue default_val = upb_FieldDef_Default(f); - upb_MessageValue ret; - _upb_Message_GetField(msg, upb_FieldDef_MiniTable(f), &default_val, &ret); - return ret; +static int _upb_EnumValueDef_Compare(const void* p1, const void* p2) { + const uint32_t v1 = (*(const upb_EnumValueDef**)p1)->number; + const uint32_t v2 = (*(const upb_EnumValueDef**)p2)->number; + return (v1 < v2) ? -1 : (v1 > v2); } -upb_MutableMessageValue upb_Message_Mutable(upb_Message* msg, - const upb_FieldDef* f, - upb_Arena* a) { - UPB_ASSERT(upb_FieldDef_IsSubMessage(f) || upb_FieldDef_IsRepeated(f)); - if (upb_FieldDef_HasPresence(f) && !upb_Message_HasFieldByDef(msg, f)) { - // We need to skip the upb_Message_GetFieldByDef() call in this case. - goto make; - } +const upb_EnumValueDef** _upb_EnumValueDefs_Sorted(const upb_EnumValueDef* v, + int n, upb_Arena* a) { + // TODO: Try to replace this arena alloc with a persistent scratch buffer. + upb_EnumValueDef** out = + (upb_EnumValueDef**)upb_Arena_Malloc(a, n * sizeof(void*)); + if (!out) return NULL; - upb_MessageValue val = upb_Message_GetFieldByDef(msg, f); - if (val.array_val) { - return (upb_MutableMessageValue){.array = (upb_Array*)val.array_val}; + for (int i = 0; i < n; i++) { + out[i] = (upb_EnumValueDef*)&v[i]; } + qsort(out, n, sizeof(void*), _upb_EnumValueDef_Compare); - upb_MutableMessageValue ret; -make: - if (!a) return (upb_MutableMessageValue){.array = NULL}; - if (upb_FieldDef_IsMap(f)) { - const upb_MessageDef* entry = upb_FieldDef_MessageSubDef(f); - const upb_FieldDef* key = - upb_MessageDef_FindFieldByNumber(entry, kUpb_MapEntry_KeyFieldNumber); - const upb_FieldDef* value = - upb_MessageDef_FindFieldByNumber(entry, kUpb_MapEntry_ValueFieldNumber); - ret.map = - upb_Map_New(a, upb_FieldDef_CType(key), upb_FieldDef_CType(value)); - } else if (upb_FieldDef_IsRepeated(f)) { - ret.array = upb_Array_New(a, upb_FieldDef_CType(f)); - } else { - UPB_ASSERT(upb_FieldDef_IsSubMessage(f)); - const upb_MessageDef* m = upb_FieldDef_MessageSubDef(f); - ret.msg = upb_Message_New(upb_MessageDef_MiniTable(m), a); - } + return (const upb_EnumValueDef**)out; +} - val.array_val = ret.array; - upb_Message_SetFieldByDef(msg, f, val, a); +const UPB_DESC(EnumValueOptions) * + upb_EnumValueDef_Options(const upb_EnumValueDef* v) { + return v->opts; +} - return ret; +bool upb_EnumValueDef_HasOptions(const upb_EnumValueDef* v) { + return v->opts != (void*)kUpbDefOptDefault; } -bool upb_Message_SetFieldByDef(upb_Message* msg, const upb_FieldDef* f, - upb_MessageValue val, upb_Arena* a) { - return _upb_Message_SetField(msg, upb_FieldDef_MiniTable(f), &val, a); +const upb_EnumDef* upb_EnumValueDef_Enum(const upb_EnumValueDef* v) { + return v->parent; } -void upb_Message_ClearFieldByDef(upb_Message* msg, const upb_FieldDef* f) { - upb_Message_ClearField(msg, upb_FieldDef_MiniTable(f)); +const char* upb_EnumValueDef_FullName(const upb_EnumValueDef* v) { + return v->full_name; } -void upb_Message_ClearByDef(upb_Message* msg, const upb_MessageDef* m) { - upb_Message_Clear(msg, upb_MessageDef_MiniTable(m)); +const char* upb_EnumValueDef_Name(const upb_EnumValueDef* v) { + return _upb_DefBuilder_FullToShort(v->full_name); } -bool upb_Message_Next(const upb_Message* msg, const upb_MessageDef* m, - const upb_DefPool* ext_pool, const upb_FieldDef** out_f, - upb_MessageValue* out_val, size_t* iter) { - size_t i = *iter; - size_t n = upb_MessageDef_FieldCount(m); - UPB_UNUSED(ext_pool); +int32_t upb_EnumValueDef_Number(const upb_EnumValueDef* v) { return v->number; } - // Iterate over normal fields, returning the first one that is set. - while (++i < n) { - const upb_FieldDef* f = upb_MessageDef_Field(m, i); - const upb_MiniTableField* field = upb_FieldDef_MiniTable(f); - upb_MessageValue val = upb_Message_GetFieldByDef(msg, f); +uint32_t upb_EnumValueDef_Index(const upb_EnumValueDef* v) { + // Compute index in our parent's array. + return v - upb_EnumDef_Value(v->parent, 0); +} - // Skip field if unset or empty. - if (upb_MiniTableField_HasPresence(field)) { - if (!upb_Message_HasFieldByDef(msg, f)) continue; - } else { - switch (upb_FieldMode_Get(field)) { - case kUpb_FieldMode_Map: - if (!val.map_val || upb_Map_Size(val.map_val) == 0) continue; - break; - case kUpb_FieldMode_Array: - if (!val.array_val || upb_Array_Size(val.array_val) == 0) continue; - break; - case kUpb_FieldMode_Scalar: - if (!_upb_MiniTable_ValueIsNonZero(&val, field)) continue; - break; - } - } +static void create_enumvaldef(upb_DefBuilder* ctx, const char* prefix, + const UPB_DESC(EnumValueDescriptorProto) * + val_proto, + upb_EnumDef* e, upb_EnumValueDef* v) { + upb_StringView name = UPB_DESC(EnumValueDescriptorProto_name)(val_proto); - *out_val = val; - *out_f = f; - *iter = i; - return true; + v->parent = e; // Must happen prior to _upb_DefBuilder_Add() + v->full_name = _upb_DefBuilder_MakeFullName(ctx, prefix, name); + v->number = UPB_DESC(EnumValueDescriptorProto_number)(val_proto); + _upb_DefBuilder_Add(ctx, v->full_name, + _upb_DefType_Pack(v, UPB_DEFTYPE_ENUMVAL)); + + UPB_DEF_SET_OPTIONS(v->opts, EnumValueDescriptorProto, EnumValueOptions, + val_proto); + + bool ok = _upb_EnumDef_Insert(e, v, ctx->arena); + if (!ok) _upb_DefBuilder_OomErr(ctx); +} + +// Allocate and initialize an array of |n| enum value defs owned by |e|. +upb_EnumValueDef* _upb_EnumValueDefs_New( + upb_DefBuilder* ctx, const char* prefix, int n, + const UPB_DESC(EnumValueDescriptorProto) * const* protos, upb_EnumDef* e, + bool* is_sorted) { + _upb_DefType_CheckPadding(sizeof(upb_EnumValueDef)); + + upb_EnumValueDef* v = + _upb_DefBuilder_Alloc(ctx, sizeof(upb_EnumValueDef) * n); + + *is_sorted = true; + uint32_t previous = 0; + for (int i = 0; i < n; i++) { + create_enumvaldef(ctx, prefix, protos[i], e, &v[i]); + + const uint32_t current = v[i].number; + if (previous > current) *is_sorted = false; + previous = current; } - if (ext_pool) { - // Return any extensions that are set. - size_t count; - const upb_Message_Extension* ext = _upb_Message_Getexts(msg, &count); - if (i - n < count) { - ext += count - 1 - (i - n); - memcpy(out_val, &ext->data, sizeof(*out_val)); - *out_f = upb_DefPool_FindExtensionByMiniTable(ext_pool, ext->ext); - *iter = i; - return true; - } + if (upb_FileDef_Syntax(ctx->file) == kUpb_Syntax_Proto3 && n > 0 && + v[0].number != 0) { + _upb_DefBuilder_Errf(ctx, + "for proto3, the first enum value must be zero (%s)", + upb_EnumDef_FullName(e)); } - *iter = i; - return false; + return v; } -bool _upb_Message_DiscardUnknown(upb_Message* msg, const upb_MessageDef* m, - int depth) { - size_t iter = kUpb_Message_Begin; - const upb_FieldDef* f; - upb_MessageValue val; - bool ret = true; - if (--depth == 0) return false; - _upb_Message_DiscardUnknown_shallow(msg); +// Must be last. - while (upb_Message_Next(msg, m, NULL /*ext_pool*/, &f, &val, &iter)) { - const upb_MessageDef* subm = upb_FieldDef_MessageSubDef(f); - if (!subm) continue; - if (upb_FieldDef_IsMap(f)) { - const upb_FieldDef* val_f = upb_MessageDef_FindFieldByNumber(subm, 2); - const upb_MessageDef* val_m = upb_FieldDef_MessageSubDef(val_f); - upb_Map* map = (upb_Map*)val.map_val; - size_t iter = kUpb_Map_Begin; - - if (!val_m) continue; - - upb_MessageValue map_key, map_val; - while (upb_Map_Next(map, &map_key, &map_val, &iter)) { - if (!_upb_Message_DiscardUnknown((upb_Message*)map_val.msg_val, val_m, - depth)) { - ret = false; - } - } - } else if (upb_FieldDef_IsRepeated(f)) { - const upb_Array* arr = val.array_val; - size_t i, n = upb_Array_Size(arr); - for (i = 0; i < n; i++) { - upb_MessageValue elem = upb_Array_Get(arr, i); - if (!_upb_Message_DiscardUnknown((upb_Message*)elem.msg_val, subm, - depth)) { - ret = false; - } - } - } else { - if (!_upb_Message_DiscardUnknown((upb_Message*)val.msg_val, subm, - depth)) { - ret = false; - } - } - } +struct upb_ExtensionRange { + const UPB_DESC(ExtensionRangeOptions) * opts; + int32_t start; + int32_t end; +}; - return ret; +upb_ExtensionRange* _upb_ExtensionRange_At(const upb_ExtensionRange* r, int i) { + return (upb_ExtensionRange*)&r[i]; } -bool upb_Message_DiscardUnknown(upb_Message* msg, const upb_MessageDef* m, - int maxdepth) { - return _upb_Message_DiscardUnknown(msg, m, maxdepth); +const UPB_DESC(ExtensionRangeOptions) * + upb_ExtensionRange_Options(const upb_ExtensionRange* r) { + return r->opts; } +bool upb_ExtensionRange_HasOptions(const upb_ExtensionRange* r) { + return r->opts != (void*)kUpbDefOptDefault; +} -// Must be last. +int32_t upb_ExtensionRange_Start(const upb_ExtensionRange* r) { + return r->start; +} -struct upb_MessageDef { - const UPB_DESC(MessageOptions) * opts; - const upb_MiniTable* layout; - const upb_FileDef* file; - const upb_MessageDef* containing_type; - const char* full_name; +int32_t upb_ExtensionRange_End(const upb_ExtensionRange* r) { return r->end; } - // Tables for looking up fields by number and name. - upb_inttable itof; - upb_strtable ntof; +upb_ExtensionRange* _upb_ExtensionRanges_New( + upb_DefBuilder* ctx, int n, + const UPB_DESC(DescriptorProto_ExtensionRange) * const* protos, + const upb_MessageDef* m) { + upb_ExtensionRange* r = + _upb_DefBuilder_Alloc(ctx, sizeof(upb_ExtensionRange) * n); - /* All nested defs. - * MEM: We could save some space here by putting nested defs in a contiguous - * region and calculating counts from offsets or vice-versa. */ - const upb_FieldDef* fields; - const upb_OneofDef* oneofs; - const upb_ExtensionRange* ext_ranges; - const upb_StringView* res_names; - const upb_MessageDef* nested_msgs; - const upb_MessageReservedRange* res_ranges; - const upb_EnumDef* nested_enums; - const upb_FieldDef* nested_exts; + for (int i = 0; i < n; i++) { + const int32_t start = + UPB_DESC(DescriptorProto_ExtensionRange_start)(protos[i]); + const int32_t end = UPB_DESC(DescriptorProto_ExtensionRange_end)(protos[i]); + const int32_t max = UPB_DESC(MessageOptions_message_set_wire_format)( + upb_MessageDef_Options(m)) + ? INT32_MAX + : kUpb_MaxFieldNumber + 1; - // TODO(salo): These counters don't need anywhere near 32 bits. - int field_count; - int real_oneof_count; - int oneof_count; - int ext_range_count; - int res_range_count; - int res_name_count; - int nested_msg_count; - int nested_enum_count; - int nested_ext_count; - bool in_message_set; - bool is_sorted; - upb_WellKnown well_known_type; -#if UINTPTR_MAX == 0xffffffff - uint32_t padding; // Increase size to a multiple of 8. -#endif -}; + // A full validation would also check that each range is disjoint, and that + // none of the fields overlap with the extension ranges, but we are just + // sanity checking here. + if (start < 1 || end <= start || end > max) { + _upb_DefBuilder_Errf(ctx, + "Extension range (%d, %d) is invalid, message=%s\n", + (int)start, (int)end, upb_MessageDef_FullName(m)); + } -static void assign_msg_wellknowntype(upb_MessageDef* m) { - const char* name = m->full_name; - if (name == NULL) { - m->well_known_type = kUpb_WellKnown_Unspecified; - return; - } - if (!strcmp(name, "google.protobuf.Any")) { - m->well_known_type = kUpb_WellKnown_Any; - } else if (!strcmp(name, "google.protobuf.FieldMask")) { - m->well_known_type = kUpb_WellKnown_FieldMask; - } else if (!strcmp(name, "google.protobuf.Duration")) { - m->well_known_type = kUpb_WellKnown_Duration; - } else if (!strcmp(name, "google.protobuf.Timestamp")) { - m->well_known_type = kUpb_WellKnown_Timestamp; - } else if (!strcmp(name, "google.protobuf.DoubleValue")) { - m->well_known_type = kUpb_WellKnown_DoubleValue; - } else if (!strcmp(name, "google.protobuf.FloatValue")) { - m->well_known_type = kUpb_WellKnown_FloatValue; - } else if (!strcmp(name, "google.protobuf.Int64Value")) { - m->well_known_type = kUpb_WellKnown_Int64Value; - } else if (!strcmp(name, "google.protobuf.UInt64Value")) { - m->well_known_type = kUpb_WellKnown_UInt64Value; - } else if (!strcmp(name, "google.protobuf.Int32Value")) { - m->well_known_type = kUpb_WellKnown_Int32Value; - } else if (!strcmp(name, "google.protobuf.UInt32Value")) { - m->well_known_type = kUpb_WellKnown_UInt32Value; - } else if (!strcmp(name, "google.protobuf.BoolValue")) { - m->well_known_type = kUpb_WellKnown_BoolValue; - } else if (!strcmp(name, "google.protobuf.StringValue")) { - m->well_known_type = kUpb_WellKnown_StringValue; - } else if (!strcmp(name, "google.protobuf.BytesValue")) { - m->well_known_type = kUpb_WellKnown_BytesValue; - } else if (!strcmp(name, "google.protobuf.Value")) { - m->well_known_type = kUpb_WellKnown_Value; - } else if (!strcmp(name, "google.protobuf.ListValue")) { - m->well_known_type = kUpb_WellKnown_ListValue; - } else if (!strcmp(name, "google.protobuf.Struct")) { - m->well_known_type = kUpb_WellKnown_Struct; - } else { - m->well_known_type = kUpb_WellKnown_Unspecified; + r[i].start = start; + r[i].end = end; + UPB_DEF_SET_OPTIONS(r[i].opts, DescriptorProto_ExtensionRange, + ExtensionRangeOptions, protos[i]); } -} -upb_MessageDef* _upb_MessageDef_At(const upb_MessageDef* m, int i) { - return (upb_MessageDef*)&m[i]; + return r; } -bool _upb_MessageDef_IsValidExtensionNumber(const upb_MessageDef* m, int n) { - for (int i = 0; i < m->ext_range_count; i++) { - const upb_ExtensionRange* r = upb_MessageDef_ExtensionRange(m, i); - if (upb_ExtensionRange_Start(r) <= n && n < upb_ExtensionRange_End(r)) { - return true; - } - } - return false; -} -const UPB_DESC(MessageOptions) * - upb_MessageDef_Options(const upb_MessageDef* m) { - return m->opts; -} +#include +#include -bool upb_MessageDef_HasOptions(const upb_MessageDef* m) { - return m->opts != (void*)kUpbDefOptDefault; -} -const char* upb_MessageDef_FullName(const upb_MessageDef* m) { - return m->full_name; -} +// Must be last. -const upb_FileDef* upb_MessageDef_File(const upb_MessageDef* m) { - return m->file; -} +#define UPB_FIELD_TYPE_UNSPECIFIED 0 -const upb_MessageDef* upb_MessageDef_ContainingType(const upb_MessageDef* m) { - return m->containing_type; +typedef struct { + size_t len; + char str[1]; // Null-terminated string data follows. +} str_t; + +struct upb_FieldDef { + const UPB_DESC(FieldOptions) * opts; + const upb_FileDef* file; + const upb_MessageDef* msgdef; + const char* full_name; + const char* json_name; + union { + int64_t sint; + uint64_t uint; + double dbl; + float flt; + bool boolean; + str_t* str; + void* msg; // Always NULL. + } defaultval; + union { + const upb_OneofDef* oneof; + const upb_MessageDef* extension_scope; + } scope; + union { + const upb_MessageDef* msgdef; + const upb_EnumDef* enumdef; + const UPB_DESC(FieldDescriptorProto) * unresolved; + } sub; + uint32_t number_; + uint16_t index_; + uint16_t layout_index; // Index into msgdef->layout->fields or file->exts + bool has_default; + bool has_json_name; + bool has_presence; + bool is_extension; + bool is_packed; + bool is_proto3_optional; + upb_FieldType type_; + upb_Label label_; +#if UINTPTR_MAX == 0xffffffff + uint32_t padding; // Increase size to a multiple of 8. +#endif +}; + +upb_FieldDef* _upb_FieldDef_At(const upb_FieldDef* f, int i) { + return (upb_FieldDef*)&f[i]; } -const char* upb_MessageDef_Name(const upb_MessageDef* m) { - return _upb_DefBuilder_FullToShort(m->full_name); +const UPB_DESC(FieldOptions) * upb_FieldDef_Options(const upb_FieldDef* f) { + return f->opts; } -upb_Syntax upb_MessageDef_Syntax(const upb_MessageDef* m) { - return upb_FileDef_Syntax(m->file); +bool upb_FieldDef_HasOptions(const upb_FieldDef* f) { + return f->opts != (void*)kUpbDefOptDefault; } -const upb_FieldDef* upb_MessageDef_FindFieldByNumber(const upb_MessageDef* m, - uint32_t i) { - upb_value val; - return upb_inttable_lookup(&m->itof, i, &val) ? upb_value_getconstptr(val) - : NULL; +const char* upb_FieldDef_FullName(const upb_FieldDef* f) { + return f->full_name; } -const upb_FieldDef* upb_MessageDef_FindFieldByNameWithSize( - const upb_MessageDef* m, const char* name, size_t size) { - upb_value val; - - if (!upb_strtable_lookup2(&m->ntof, name, size, &val)) { - return NULL; +upb_CType upb_FieldDef_CType(const upb_FieldDef* f) { + switch (f->type_) { + case kUpb_FieldType_Double: + return kUpb_CType_Double; + case kUpb_FieldType_Float: + return kUpb_CType_Float; + case kUpb_FieldType_Int64: + case kUpb_FieldType_SInt64: + case kUpb_FieldType_SFixed64: + return kUpb_CType_Int64; + case kUpb_FieldType_Int32: + case kUpb_FieldType_SFixed32: + case kUpb_FieldType_SInt32: + return kUpb_CType_Int32; + case kUpb_FieldType_UInt64: + case kUpb_FieldType_Fixed64: + return kUpb_CType_UInt64; + case kUpb_FieldType_UInt32: + case kUpb_FieldType_Fixed32: + return kUpb_CType_UInt32; + case kUpb_FieldType_Enum: + return kUpb_CType_Enum; + case kUpb_FieldType_Bool: + return kUpb_CType_Bool; + case kUpb_FieldType_String: + return kUpb_CType_String; + case kUpb_FieldType_Bytes: + return kUpb_CType_Bytes; + case kUpb_FieldType_Group: + case kUpb_FieldType_Message: + return kUpb_CType_Message; } - - return _upb_DefType_Unpack(val, UPB_DEFTYPE_FIELD); + UPB_UNREACHABLE(); } -const upb_OneofDef* upb_MessageDef_FindOneofByNameWithSize( - const upb_MessageDef* m, const char* name, size_t size) { - upb_value val; +upb_FieldType upb_FieldDef_Type(const upb_FieldDef* f) { return f->type_; } - if (!upb_strtable_lookup2(&m->ntof, name, size, &val)) { - return NULL; - } +uint32_t upb_FieldDef_Index(const upb_FieldDef* f) { return f->index_; } - return _upb_DefType_Unpack(val, UPB_DEFTYPE_ONEOF); -} +upb_Label upb_FieldDef_Label(const upb_FieldDef* f) { return f->label_; } -bool _upb_MessageDef_Insert(upb_MessageDef* m, const char* name, size_t len, - upb_value v, upb_Arena* a) { - return upb_strtable_insert(&m->ntof, name, len, v, a); -} +uint32_t upb_FieldDef_Number(const upb_FieldDef* f) { return f->number_; } -bool upb_MessageDef_FindByNameWithSize(const upb_MessageDef* m, - const char* name, size_t len, - const upb_FieldDef** out_f, - const upb_OneofDef** out_o) { - upb_value val; +bool upb_FieldDef_IsExtension(const upb_FieldDef* f) { return f->is_extension; } - if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) { - return false; - } +bool upb_FieldDef_IsPacked(const upb_FieldDef* f) { return f->is_packed; } - const upb_FieldDef* f = _upb_DefType_Unpack(val, UPB_DEFTYPE_FIELD); - const upb_OneofDef* o = _upb_DefType_Unpack(val, UPB_DEFTYPE_ONEOF); - if (out_f) *out_f = f; - if (out_o) *out_o = o; - return f || o; /* False if this was a JSON name. */ +const char* upb_FieldDef_Name(const upb_FieldDef* f) { + return _upb_DefBuilder_FullToShort(f->full_name); } -const upb_FieldDef* upb_MessageDef_FindByJsonNameWithSize( - const upb_MessageDef* m, const char* name, size_t size) { - upb_value val; - const upb_FieldDef* f; +const char* upb_FieldDef_JsonName(const upb_FieldDef* f) { + return f->json_name; +} - if (!upb_strtable_lookup2(&m->ntof, name, size, &val)) { - return NULL; - } +bool upb_FieldDef_HasJsonName(const upb_FieldDef* f) { + return f->has_json_name; +} - f = _upb_DefType_Unpack(val, UPB_DEFTYPE_FIELD); - if (!f) f = _upb_DefType_Unpack(val, UPB_DEFTYPE_FIELD_JSONNAME); +const upb_FileDef* upb_FieldDef_File(const upb_FieldDef* f) { return f->file; } - return f; +const upb_MessageDef* upb_FieldDef_ContainingType(const upb_FieldDef* f) { + return f->msgdef; } -int upb_MessageDef_ExtensionRangeCount(const upb_MessageDef* m) { - return m->ext_range_count; +const upb_MessageDef* upb_FieldDef_ExtensionScope(const upb_FieldDef* f) { + return f->is_extension ? f->scope.extension_scope : NULL; } -int upb_MessageDef_ReservedRangeCount(const upb_MessageDef* m) { - return m->res_range_count; +const upb_OneofDef* upb_FieldDef_ContainingOneof(const upb_FieldDef* f) { + return f->is_extension ? NULL : f->scope.oneof; } -int upb_MessageDef_ReservedNameCount(const upb_MessageDef* m) { - return m->res_name_count; +const upb_OneofDef* upb_FieldDef_RealContainingOneof(const upb_FieldDef* f) { + const upb_OneofDef* oneof = upb_FieldDef_ContainingOneof(f); + if (!oneof || upb_OneofDef_IsSynthetic(oneof)) return NULL; + return oneof; } -int upb_MessageDef_FieldCount(const upb_MessageDef* m) { - return m->field_count; -} +upb_MessageValue upb_FieldDef_Default(const upb_FieldDef* f) { + upb_MessageValue ret; -int upb_MessageDef_OneofCount(const upb_MessageDef* m) { - return m->oneof_count; -} + if (upb_FieldDef_IsRepeated(f) || upb_FieldDef_IsSubMessage(f)) { + return (upb_MessageValue){.msg_val = NULL}; + } -int upb_MessageDef_RealOneofCount(const upb_MessageDef* m) { - return m->real_oneof_count; -} + switch (upb_FieldDef_CType(f)) { + case kUpb_CType_Bool: + return (upb_MessageValue){.bool_val = f->defaultval.boolean}; + case kUpb_CType_Int64: + return (upb_MessageValue){.int64_val = f->defaultval.sint}; + case kUpb_CType_UInt64: + return (upb_MessageValue){.uint64_val = f->defaultval.uint}; + case kUpb_CType_Enum: + case kUpb_CType_Int32: + return (upb_MessageValue){.int32_val = (int32_t)f->defaultval.sint}; + case kUpb_CType_UInt32: + return (upb_MessageValue){.uint32_val = (uint32_t)f->defaultval.uint}; + case kUpb_CType_Float: + return (upb_MessageValue){.float_val = f->defaultval.flt}; + case kUpb_CType_Double: + return (upb_MessageValue){.double_val = f->defaultval.dbl}; + case kUpb_CType_String: + case kUpb_CType_Bytes: { + str_t* str = f->defaultval.str; + if (str) { + return (upb_MessageValue){ + .str_val = (upb_StringView){.data = str->str, .size = str->len}}; + } else { + return (upb_MessageValue){ + .str_val = (upb_StringView){.data = NULL, .size = 0}}; + } + } + default: + UPB_UNREACHABLE(); + } -int upb_MessageDef_NestedMessageCount(const upb_MessageDef* m) { - return m->nested_msg_count; + return ret; } -int upb_MessageDef_NestedEnumCount(const upb_MessageDef* m) { - return m->nested_enum_count; +const upb_MessageDef* upb_FieldDef_MessageSubDef(const upb_FieldDef* f) { + return upb_FieldDef_CType(f) == kUpb_CType_Message ? f->sub.msgdef : NULL; } -int upb_MessageDef_NestedExtensionCount(const upb_MessageDef* m) { - return m->nested_ext_count; +const upb_EnumDef* upb_FieldDef_EnumSubDef(const upb_FieldDef* f) { + return upb_FieldDef_CType(f) == kUpb_CType_Enum ? f->sub.enumdef : NULL; } -const upb_MiniTable* upb_MessageDef_MiniTable(const upb_MessageDef* m) { - return m->layout; +const upb_MiniTableField* upb_FieldDef_MiniTable(const upb_FieldDef* f) { + if (upb_FieldDef_IsExtension(f)) { + const upb_FileDef* file = upb_FieldDef_File(f); + return (upb_MiniTableField*)_upb_FileDef_ExtensionMiniTable( + file, f->layout_index); + } else { + const upb_MiniTable* layout = upb_MessageDef_MiniTable(f->msgdef); + return &layout->fields[f->layout_index]; + } } -const upb_ExtensionRange* upb_MessageDef_ExtensionRange(const upb_MessageDef* m, - int i) { - UPB_ASSERT(0 <= i && i < m->ext_range_count); - return _upb_ExtensionRange_At(m->ext_ranges, i); +const upb_MiniTableExtension* _upb_FieldDef_ExtensionMiniTable( + const upb_FieldDef* f) { + UPB_ASSERT(upb_FieldDef_IsExtension(f)); + const upb_FileDef* file = upb_FieldDef_File(f); + return _upb_FileDef_ExtensionMiniTable(file, f->layout_index); } -const upb_MessageReservedRange* upb_MessageDef_ReservedRange( - const upb_MessageDef* m, int i) { - UPB_ASSERT(0 <= i && i < m->res_range_count); - return _upb_MessageReservedRange_At(m->res_ranges, i); +bool _upb_FieldDef_IsClosedEnum(const upb_FieldDef* f) { + if (f->type_ != kUpb_FieldType_Enum) return false; + return upb_EnumDef_IsClosed(f->sub.enumdef); } -upb_StringView upb_MessageDef_ReservedName(const upb_MessageDef* m, int i) { - UPB_ASSERT(0 <= i && i < m->res_name_count); - return m->res_names[i]; +bool _upb_FieldDef_IsProto3Optional(const upb_FieldDef* f) { + return f->is_proto3_optional; } -const upb_FieldDef* upb_MessageDef_Field(const upb_MessageDef* m, int i) { - UPB_ASSERT(0 <= i && i < m->field_count); - return _upb_FieldDef_At(m->fields, i); -} +int _upb_FieldDef_LayoutIndex(const upb_FieldDef* f) { return f->layout_index; } -const upb_OneofDef* upb_MessageDef_Oneof(const upb_MessageDef* m, int i) { - UPB_ASSERT(0 <= i && i < m->oneof_count); - return _upb_OneofDef_At(m->oneofs, i); -} +uint64_t _upb_FieldDef_Modifiers(const upb_FieldDef* f) { + uint64_t out = f->is_packed ? kUpb_FieldModifier_IsPacked : 0; -const upb_MessageDef* upb_MessageDef_NestedMessage(const upb_MessageDef* m, - int i) { - UPB_ASSERT(0 <= i && i < m->nested_msg_count); - return &m->nested_msgs[i]; -} + switch (f->label_) { + case kUpb_Label_Optional: + if (!upb_FieldDef_HasPresence(f)) { + out |= kUpb_FieldModifier_IsProto3Singular; + } + break; + case kUpb_Label_Repeated: + out |= kUpb_FieldModifier_IsRepeated; + break; + case kUpb_Label_Required: + out |= kUpb_FieldModifier_IsRequired; + break; + } -const upb_EnumDef* upb_MessageDef_NestedEnum(const upb_MessageDef* m, int i) { - UPB_ASSERT(0 <= i && i < m->nested_enum_count); - return _upb_EnumDef_At(m->nested_enums, i); + if (_upb_FieldDef_IsClosedEnum(f)) { + out |= kUpb_FieldModifier_IsClosedEnum; + } + return out; } -const upb_FieldDef* upb_MessageDef_NestedExtension(const upb_MessageDef* m, - int i) { - UPB_ASSERT(0 <= i && i < m->nested_ext_count); - return _upb_FieldDef_At(m->nested_exts, i); +bool upb_FieldDef_HasDefault(const upb_FieldDef* f) { return f->has_default; } +bool upb_FieldDef_HasPresence(const upb_FieldDef* f) { return f->has_presence; } + +bool upb_FieldDef_HasSubDef(const upb_FieldDef* f) { + return upb_FieldDef_IsSubMessage(f) || + upb_FieldDef_CType(f) == kUpb_CType_Enum; } -upb_WellKnown upb_MessageDef_WellKnownType(const upb_MessageDef* m) { - return m->well_known_type; +bool upb_FieldDef_IsMap(const upb_FieldDef* f) { + return upb_FieldDef_IsRepeated(f) && upb_FieldDef_IsSubMessage(f) && + upb_MessageDef_IsMapEntry(upb_FieldDef_MessageSubDef(f)); } -bool _upb_MessageDef_InMessageSet(const upb_MessageDef* m) { - return m->in_message_set; +bool upb_FieldDef_IsOptional(const upb_FieldDef* f) { + return upb_FieldDef_Label(f) == kUpb_Label_Optional; } -const upb_FieldDef* upb_MessageDef_FindFieldByName(const upb_MessageDef* m, - const char* name) { - return upb_MessageDef_FindFieldByNameWithSize(m, name, strlen(name)); +bool upb_FieldDef_IsPrimitive(const upb_FieldDef* f) { + return !upb_FieldDef_IsString(f) && !upb_FieldDef_IsSubMessage(f); } -const upb_OneofDef* upb_MessageDef_FindOneofByName(const upb_MessageDef* m, - const char* name) { - return upb_MessageDef_FindOneofByNameWithSize(m, name, strlen(name)); +bool upb_FieldDef_IsRepeated(const upb_FieldDef* f) { + return upb_FieldDef_Label(f) == kUpb_Label_Repeated; } -bool upb_MessageDef_IsMapEntry(const upb_MessageDef* m) { - return UPB_DESC(MessageOptions_map_entry)(m->opts); +bool upb_FieldDef_IsRequired(const upb_FieldDef* f) { + return upb_FieldDef_Label(f) == kUpb_Label_Required; } -bool upb_MessageDef_IsMessageSet(const upb_MessageDef* m) { - return UPB_DESC(MessageOptions_message_set_wire_format)(m->opts); +bool upb_FieldDef_IsString(const upb_FieldDef* f) { + return upb_FieldDef_CType(f) == kUpb_CType_String || + upb_FieldDef_CType(f) == kUpb_CType_Bytes; } -static upb_MiniTable* _upb_MessageDef_MakeMiniTable(upb_DefBuilder* ctx, - const upb_MessageDef* m) { - upb_StringView desc; - // Note: this will assign layout_index for fields, so upb_FieldDef_MiniTable() - // is safe to call only after this call. - bool ok = upb_MessageDef_MiniDescriptorEncode(m, ctx->tmp_arena, &desc); - if (!ok) _upb_DefBuilder_OomErr(ctx); - - void** scratch_data = _upb_DefPool_ScratchData(ctx->symtab); - size_t* scratch_size = _upb_DefPool_ScratchSize(ctx->symtab); - upb_MiniTable* ret = upb_MiniTable_BuildWithBuf( - desc.data, desc.size, ctx->platform, ctx->arena, scratch_data, - scratch_size, ctx->status); - if (!ret) _upb_DefBuilder_FailJmp(ctx); - - return ret; +bool upb_FieldDef_IsSubMessage(const upb_FieldDef* f) { + return upb_FieldDef_CType(f) == kUpb_CType_Message; } -void _upb_MessageDef_Resolve(upb_DefBuilder* ctx, upb_MessageDef* m) { - for (int i = 0; i < m->field_count; i++) { - upb_FieldDef* f = (upb_FieldDef*)upb_MessageDef_Field(m, i); - _upb_FieldDef_Resolve(ctx, m->full_name, f); - } - - m->in_message_set = false; - for (int i = 0; i < upb_MessageDef_NestedExtensionCount(m); i++) { - upb_FieldDef* ext = (upb_FieldDef*)upb_MessageDef_NestedExtension(m, i); - _upb_FieldDef_Resolve(ctx, m->full_name, ext); - if (upb_FieldDef_Type(ext) == kUpb_FieldType_Message && - upb_FieldDef_Label(ext) == kUpb_Label_Optional && - upb_FieldDef_MessageSubDef(ext) == m && - UPB_DESC(MessageOptions_message_set_wire_format)( - upb_MessageDef_Options(upb_FieldDef_ContainingType(ext)))) { - m->in_message_set = true; - } - } - - for (int i = 0; i < upb_MessageDef_NestedMessageCount(m); i++) { - upb_MessageDef* n = (upb_MessageDef*)upb_MessageDef_NestedMessage(m, i); - _upb_MessageDef_Resolve(ctx, n); - } +static bool between(int32_t x, int32_t low, int32_t high) { + return x >= low && x <= high; } -void _upb_MessageDef_InsertField(upb_DefBuilder* ctx, upb_MessageDef* m, - const upb_FieldDef* f) { - const int32_t field_number = upb_FieldDef_Number(f); - - if (field_number <= 0 || field_number > kUpb_MaxFieldNumber) { - _upb_DefBuilder_Errf(ctx, "invalid field number (%u)", field_number); - } - - const char* json_name = upb_FieldDef_JsonName(f); - const char* shortname = upb_FieldDef_Name(f); - const size_t shortnamelen = strlen(shortname); +bool upb_FieldDef_checklabel(int32_t label) { return between(label, 1, 3); } +bool upb_FieldDef_checktype(int32_t type) { return between(type, 1, 11); } +bool upb_FieldDef_checkintfmt(int32_t fmt) { return between(fmt, 1, 3); } - upb_value v = upb_value_constptr(f); +bool upb_FieldDef_checkdescriptortype(int32_t type) { + return between(type, 1, 18); +} - upb_value existing_v; - if (upb_strtable_lookup(&m->ntof, shortname, &existing_v)) { - _upb_DefBuilder_Errf(ctx, "duplicate field name (%s)", shortname); - } +static bool streql2(const char* a, size_t n, const char* b) { + return n == strlen(b) && memcmp(a, b, n) == 0; +} - const upb_value field_v = _upb_DefType_Pack(f, UPB_DEFTYPE_FIELD); - bool ok = - _upb_MessageDef_Insert(m, shortname, shortnamelen, field_v, ctx->arena); - if (!ok) _upb_DefBuilder_OomErr(ctx); +// Implement the transformation as described in the spec: +// 1. upper case all letters after an underscore. +// 2. remove all underscores. +static char* make_json_name(const char* name, size_t size, upb_Arena* a) { + char* out = upb_Arena_Malloc(a, size + 1); // +1 is to add a trailing '\0' + if (out == NULL) return NULL; - if (strcmp(shortname, json_name) != 0) { - if (upb_strtable_lookup(&m->ntof, json_name, &v)) { - _upb_DefBuilder_Errf(ctx, "duplicate json_name (%s)", json_name); + bool ucase_next = false; + char* des = out; + for (size_t i = 0; i < size; i++) { + if (name[i] == '_') { + ucase_next = true; + } else { + *des++ = ucase_next ? toupper(name[i]) : name[i]; + ucase_next = false; } - - const size_t json_size = strlen(json_name); - const upb_value json_v = _upb_DefType_Pack(f, UPB_DEFTYPE_FIELD_JSONNAME); - ok = _upb_MessageDef_Insert(m, json_name, json_size, json_v, ctx->arena); - if (!ok) _upb_DefBuilder_OomErr(ctx); - } - - if (upb_inttable_lookup(&m->itof, field_number, NULL)) { - _upb_DefBuilder_Errf(ctx, "duplicate field number (%u)", field_number); } + *des++ = '\0'; + return out; +} - ok = upb_inttable_insert(&m->itof, field_number, v, ctx->arena); - if (!ok) _upb_DefBuilder_OomErr(ctx); +static str_t* newstr(upb_DefBuilder* ctx, const char* data, size_t len) { + str_t* ret = _upb_DefBuilder_Alloc(ctx, sizeof(*ret) + len); + if (!ret) _upb_DefBuilder_OomErr(ctx); + ret->len = len; + if (len) memcpy(ret->str, data, len); + ret->str[len] = '\0'; + return ret; } -void _upb_MessageDef_CreateMiniTable(upb_DefBuilder* ctx, upb_MessageDef* m) { - if (ctx->layout == NULL) { - m->layout = _upb_MessageDef_MakeMiniTable(ctx, m); - } else { - UPB_ASSERT(ctx->msg_count < ctx->layout->msg_count); - m->layout = ctx->layout->msgs[ctx->msg_count++]; - UPB_ASSERT(m->field_count == m->layout->field_count); +static str_t* unescape(upb_DefBuilder* ctx, const upb_FieldDef* f, + const char* data, size_t len) { + // Size here is an upper bound; escape sequences could ultimately shrink it. + str_t* ret = _upb_DefBuilder_Alloc(ctx, sizeof(*ret) + len); + char* dst = &ret->str[0]; + const char* src = data; + const char* end = data + len; - // We don't need the result of this call, but it will assign layout_index - // for all the fields in O(n lg n) time. - _upb_FieldDefs_Sorted(m->fields, m->field_count, ctx->tmp_arena); + while (src < end) { + if (*src == '\\') { + src++; + *dst++ = _upb_DefBuilder_ParseEscape(ctx, f, &src, end); + } else { + *dst++ = *src++; + } } - for (int i = 0; i < m->nested_msg_count; i++) { - upb_MessageDef* nested = - (upb_MessageDef*)upb_MessageDef_NestedMessage(m, i); - _upb_MessageDef_CreateMiniTable(ctx, nested); - } + ret->len = dst - &ret->str[0]; + return ret; } -void _upb_MessageDef_LinkMiniTable(upb_DefBuilder* ctx, - const upb_MessageDef* m) { - for (int i = 0; i < upb_MessageDef_NestedExtensionCount(m); i++) { - const upb_FieldDef* ext = upb_MessageDef_NestedExtension(m, i); - _upb_FieldDef_BuildMiniTableExtension(ctx, ext); - } +static void parse_default(upb_DefBuilder* ctx, const char* str, size_t len, + upb_FieldDef* f) { + char* end; + char nullz[64]; + errno = 0; - for (int i = 0; i < m->nested_msg_count; i++) { - _upb_MessageDef_LinkMiniTable(ctx, upb_MessageDef_NestedMessage(m, i)); + switch (upb_FieldDef_CType(f)) { + case kUpb_CType_Int32: + case kUpb_CType_Int64: + case kUpb_CType_UInt32: + case kUpb_CType_UInt64: + case kUpb_CType_Double: + case kUpb_CType_Float: + // Standard C number parsing functions expect null-terminated strings. + if (len >= sizeof(nullz) - 1) { + _upb_DefBuilder_Errf(ctx, "Default too long: %.*s", (int)len, str); + } + memcpy(nullz, str, len); + nullz[len] = '\0'; + str = nullz; + break; + default: + break; } - if (ctx->layout) return; - - for (int i = 0; i < m->field_count; i++) { - const upb_FieldDef* f = upb_MessageDef_Field(m, i); - const upb_MessageDef* sub_m = upb_FieldDef_MessageSubDef(f); - const upb_EnumDef* sub_e = upb_FieldDef_EnumSubDef(f); - const int layout_index = _upb_FieldDef_LayoutIndex(f); - upb_MiniTable* mt = (upb_MiniTable*)upb_MessageDef_MiniTable(m); - - UPB_ASSERT(layout_index < m->field_count); - upb_MiniTableField* mt_f = - (upb_MiniTableField*)&m->layout->fields[layout_index]; - if (sub_m) { - if (!mt->subs) { - _upb_DefBuilder_Errf(ctx, "unexpected submsg for (%s)", m->full_name); + switch (upb_FieldDef_CType(f)) { + case kUpb_CType_Int32: { + long val = strtol(str, &end, 0); + if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || *end) { + goto invalid; } - UPB_ASSERT(mt_f); - UPB_ASSERT(sub_m->layout); - if (UPB_UNLIKELY(!upb_MiniTable_SetSubMessage(mt, mt_f, sub_m->layout))) { - _upb_DefBuilder_Errf(ctx, "invalid submsg for (%s)", m->full_name); + f->defaultval.sint = val; + break; + } + case kUpb_CType_Enum: { + const upb_EnumDef* e = f->sub.enumdef; + const upb_EnumValueDef* ev = + upb_EnumDef_FindValueByNameWithSize(e, str, len); + if (!ev) { + goto invalid; } - } else if (_upb_FieldDef_IsClosedEnum(f)) { - const upb_MiniTableEnum* mt_e = _upb_EnumDef_MiniTable(sub_e); - if (UPB_UNLIKELY(!upb_MiniTable_SetSubEnum(mt, mt_f, mt_e))) { - _upb_DefBuilder_Errf(ctx, "invalid subenum for (%s)", m->full_name); + f->defaultval.sint = upb_EnumValueDef_Number(ev); + break; + } + case kUpb_CType_Int64: { + long long val = strtoll(str, &end, 0); + if (val > INT64_MAX || val < INT64_MIN || errno == ERANGE || *end) { + goto invalid; } + f->defaultval.sint = val; + break; } - } - -#ifndef NDEBUG - for (int i = 0; i < m->field_count; i++) { - const upb_FieldDef* f = upb_MessageDef_Field(m, i); - const int layout_index = _upb_FieldDef_LayoutIndex(f); - UPB_ASSERT(layout_index < m->layout->field_count); - const upb_MiniTableField* mt_f = &m->layout->fields[layout_index]; - UPB_ASSERT(upb_FieldDef_Type(f) == upb_MiniTableField_Type(mt_f)); - UPB_ASSERT(upb_FieldDef_CType(f) == upb_MiniTableField_CType(mt_f)); - UPB_ASSERT(upb_FieldDef_HasPresence(f) == - upb_MiniTableField_HasPresence(mt_f)); - } -#endif -} - -static uint64_t _upb_MessageDef_Modifiers(const upb_MessageDef* m) { - uint64_t out = 0; - if (upb_FileDef_Syntax(m->file) == kUpb_Syntax_Proto3) { - out |= kUpb_MessageModifier_ValidateUtf8; - out |= kUpb_MessageModifier_DefaultIsPacked; - } - if (m->ext_range_count) { - out |= kUpb_MessageModifier_IsExtendable; - } - return out; -} - -static bool _upb_MessageDef_EncodeMap(upb_DescState* s, const upb_MessageDef* m, - upb_Arena* a) { - if (m->field_count != 2) return false; - - const upb_FieldDef* key_field = upb_MessageDef_Field(m, 0); - const upb_FieldDef* val_field = upb_MessageDef_Field(m, 1); - if (key_field == NULL || val_field == NULL) return false; + case kUpb_CType_UInt32: { + unsigned long val = strtoul(str, &end, 0); + if (val > UINT32_MAX || errno == ERANGE || *end) { + goto invalid; + } + f->defaultval.uint = val; + break; + } + case kUpb_CType_UInt64: { + unsigned long long val = strtoull(str, &end, 0); + if (val > UINT64_MAX || errno == ERANGE || *end) { + goto invalid; + } + f->defaultval.uint = val; + break; + } + case kUpb_CType_Double: { + double val = strtod(str, &end); + if (errno == ERANGE || *end) { + goto invalid; + } + f->defaultval.dbl = val; + break; + } + case kUpb_CType_Float: { + float val = strtof(str, &end); + if (errno == ERANGE || *end) { + goto invalid; + } + f->defaultval.flt = val; + break; + } + case kUpb_CType_Bool: { + if (streql2(str, len, "false")) { + f->defaultval.boolean = false; + } else if (streql2(str, len, "true")) { + f->defaultval.boolean = true; + } else { + goto invalid; + } + break; + } + case kUpb_CType_String: + f->defaultval.str = newstr(ctx, str, len); + break; + case kUpb_CType_Bytes: + f->defaultval.str = unescape(ctx, f, str, len); + break; + case kUpb_CType_Message: + /* Should not have a default value. */ + _upb_DefBuilder_Errf(ctx, "Message should not have a default (%s)", + upb_FieldDef_FullName(f)); + } - UPB_ASSERT(_upb_FieldDef_LayoutIndex(key_field) == 0); - UPB_ASSERT(_upb_FieldDef_LayoutIndex(val_field) == 1); + return; - s->ptr = upb_MtDataEncoder_EncodeMap( - &s->e, s->ptr, upb_FieldDef_Type(key_field), upb_FieldDef_Type(val_field), - _upb_FieldDef_Modifiers(key_field), _upb_FieldDef_Modifiers(val_field)); - return true; +invalid: + _upb_DefBuilder_Errf(ctx, "Invalid default '%.*s' for field %s of type %d", + (int)len, str, upb_FieldDef_FullName(f), + (int)upb_FieldDef_Type(f)); } -static bool _upb_MessageDef_EncodeMessage(upb_DescState* s, - const upb_MessageDef* m, - upb_Arena* a) { - const upb_FieldDef** sorted = NULL; - if (!m->is_sorted) { - sorted = _upb_FieldDefs_Sorted(m->fields, m->field_count, a); - if (!sorted) return false; +static void set_default_default(upb_DefBuilder* ctx, upb_FieldDef* f) { + switch (upb_FieldDef_CType(f)) { + case kUpb_CType_Int32: + case kUpb_CType_Int64: + f->defaultval.sint = 0; + break; + case kUpb_CType_UInt64: + case kUpb_CType_UInt32: + f->defaultval.uint = 0; + break; + case kUpb_CType_Double: + case kUpb_CType_Float: + f->defaultval.dbl = 0; + break; + case kUpb_CType_String: + case kUpb_CType_Bytes: + f->defaultval.str = newstr(ctx, NULL, 0); + break; + case kUpb_CType_Bool: + f->defaultval.boolean = false; + break; + case kUpb_CType_Enum: { + const upb_EnumValueDef* v = upb_EnumDef_Value(f->sub.enumdef, 0); + f->defaultval.sint = upb_EnumValueDef_Number(v); + break; + } + case kUpb_CType_Message: + break; } +} - s->ptr = upb_MtDataEncoder_StartMessage(&s->e, s->ptr, - _upb_MessageDef_Modifiers(m)); - - for (int i = 0; i < m->field_count; i++) { - const upb_FieldDef* f = sorted ? sorted[i] : upb_MessageDef_Field(m, i); - const upb_FieldType type = upb_FieldDef_Type(f); - const int number = upb_FieldDef_Number(f); - const uint64_t modifiers = _upb_FieldDef_Modifiers(f); +static void _upb_FieldDef_Create(upb_DefBuilder* ctx, const char* prefix, + const UPB_DESC(FieldDescriptorProto) * + field_proto, + upb_MessageDef* m, upb_FieldDef* f) { + // Must happen before _upb_DefBuilder_Add() + f->file = _upb_DefBuilder_File(ctx); - if (!_upb_DescState_Grow(s, a)) return false; - s->ptr = upb_MtDataEncoder_PutField(&s->e, s->ptr, type, number, modifiers); + if (!UPB_DESC(FieldDescriptorProto_has_name)(field_proto)) { + _upb_DefBuilder_Errf(ctx, "field has no name"); } - for (int i = 0; i < m->real_oneof_count; i++) { - if (!_upb_DescState_Grow(s, a)) return false; - s->ptr = upb_MtDataEncoder_StartOneof(&s->e, s->ptr); + const upb_StringView name = UPB_DESC(FieldDescriptorProto_name)(field_proto); - const upb_OneofDef* o = upb_MessageDef_Oneof(m, i); - const int field_count = upb_OneofDef_FieldCount(o); - for (int j = 0; j < field_count; j++) { - const int number = upb_FieldDef_Number(upb_OneofDef_Field(o, j)); + f->full_name = _upb_DefBuilder_MakeFullName(ctx, prefix, name); + f->label_ = (int)UPB_DESC(FieldDescriptorProto_label)(field_proto); + f->number_ = UPB_DESC(FieldDescriptorProto_number)(field_proto); + f->is_proto3_optional = + UPB_DESC(FieldDescriptorProto_proto3_optional)(field_proto); + f->msgdef = m; + f->scope.oneof = NULL; - if (!_upb_DescState_Grow(s, a)) return false; - s->ptr = upb_MtDataEncoder_PutOneofField(&s->e, s->ptr, number); - } + f->has_json_name = UPB_DESC(FieldDescriptorProto_has_json_name)(field_proto); + if (f->has_json_name) { + const upb_StringView sv = + UPB_DESC(FieldDescriptorProto_json_name)(field_proto); + f->json_name = upb_strdup2(sv.data, sv.size, ctx->arena); + } else { + f->json_name = make_json_name(name.data, name.size, ctx->arena); } + if (!f->json_name) _upb_DefBuilder_OomErr(ctx); - return true; -} - -static bool _upb_MessageDef_EncodeMessageSet(upb_DescState* s, - const upb_MessageDef* m, - upb_Arena* a) { - s->ptr = upb_MtDataEncoder_EncodeMessageSet(&s->e, s->ptr); - - return true; -} + const bool has_type = UPB_DESC(FieldDescriptorProto_has_type)(field_proto); + const bool has_type_name = + UPB_DESC(FieldDescriptorProto_has_type_name)(field_proto); -bool upb_MessageDef_MiniDescriptorEncode(const upb_MessageDef* m, upb_Arena* a, - upb_StringView* out) { - upb_DescState s; - _upb_DescState_Init(&s); + f->type_ = (int)UPB_DESC(FieldDescriptorProto_type)(field_proto); - if (!_upb_DescState_Grow(&s, a)) return false; + if (has_type) { + switch (f->type_) { + case kUpb_FieldType_Message: + case kUpb_FieldType_Group: + case kUpb_FieldType_Enum: + if (!has_type_name) { + _upb_DefBuilder_Errf(ctx, "field of type %d requires type name (%s)", + (int)f->type_, f->full_name); + } + break; + default: + if (has_type_name) { + _upb_DefBuilder_Errf( + ctx, "invalid type for field with type_name set (%s, %d)", + f->full_name, (int)f->type_); + } + } + } - if (upb_MessageDef_IsMapEntry(m)) { - if (!_upb_MessageDef_EncodeMap(&s, m, a)) return false; - } else if (UPB_DESC(MessageOptions_message_set_wire_format)(m->opts)) { - if (!_upb_MessageDef_EncodeMessageSet(&s, m, a)) return false; + if (!has_type && has_type_name) { + f->type_ = + UPB_FIELD_TYPE_UNSPECIFIED; // We'll assign this in resolve_subdef() } else { - if (!_upb_MessageDef_EncodeMessage(&s, m, a)) return false; + if (f->type_ < kUpb_FieldType_Double || f->type_ > kUpb_FieldType_SInt64) { + _upb_DefBuilder_Errf(ctx, "invalid type for field %s (%d)", f->full_name, + f->type_); + } } - if (!_upb_DescState_Grow(&s, a)) return false; - *s.ptr = '\0'; + if (f->label_ < kUpb_Label_Optional || f->label_ > kUpb_Label_Repeated) { + _upb_DefBuilder_Errf(ctx, "invalid label for field %s (%d)", f->full_name, + f->label_); + } - out->data = s.buf; - out->size = s.ptr - s.buf; - return true; -} + /* We can't resolve the subdef or (in the case of extensions) the containing + * message yet, because it may not have been defined yet. We stash a pointer + * to the field_proto until later when we can properly resolve it. */ + f->sub.unresolved = field_proto; -static upb_StringView* _upb_ReservedNames_New(upb_DefBuilder* ctx, int n, - const upb_StringView* protos) { - upb_StringView* sv = _upb_DefBuilder_Alloc(ctx, sizeof(upb_StringView) * n); - for (size_t i = 0; i < n; i++) { - sv[i].data = - upb_strdup2(protos[i].data, protos[i].size, _upb_DefBuilder_Arena(ctx)); - sv[i].size = protos[i].size; + if (f->label_ == kUpb_Label_Required && + upb_FileDef_Syntax(f->file) == kUpb_Syntax_Proto3) { + _upb_DefBuilder_Errf(ctx, "proto3 fields cannot be required (%s)", + f->full_name); } - return sv; -} -static void create_msgdef(upb_DefBuilder* ctx, const char* prefix, - const UPB_DESC(DescriptorProto) * msg_proto, - const upb_MessageDef* containing_type, - upb_MessageDef* m) { - const UPB_DESC(OneofDescriptorProto)* const* oneofs; - const UPB_DESC(FieldDescriptorProto)* const* fields; - const UPB_DESC(DescriptorProto_ExtensionRange)* const* ext_ranges; - const UPB_DESC(DescriptorProto_ReservedRange)* const* res_ranges; - const upb_StringView* res_names; - size_t n_oneof, n_field, n_enum, n_ext, n_msg; - size_t n_ext_range, n_res_range, n_res_name; - upb_StringView name; + if (UPB_DESC(FieldDescriptorProto_has_oneof_index)(field_proto)) { + int oneof_index = UPB_DESC(FieldDescriptorProto_oneof_index)(field_proto); - // Must happen before _upb_DefBuilder_Add() - m->file = _upb_DefBuilder_File(ctx); + if (upb_FieldDef_Label(f) != kUpb_Label_Optional) { + _upb_DefBuilder_Errf(ctx, "fields in oneof must have OPTIONAL label (%s)", + f->full_name); + } - m->containing_type = containing_type; - m->is_sorted = true; + if (!m) { + _upb_DefBuilder_Errf(ctx, "oneof field (%s) has no containing msg", + f->full_name); + } - name = UPB_DESC(DescriptorProto_name)(msg_proto); + if (oneof_index >= upb_MessageDef_OneofCount(m)) { + _upb_DefBuilder_Errf(ctx, "oneof_index out of range (%s)", f->full_name); + } - m->full_name = _upb_DefBuilder_MakeFullName(ctx, prefix, name); - _upb_DefBuilder_Add(ctx, m->full_name, _upb_DefType_Pack(m, UPB_DEFTYPE_MSG)); - - oneofs = UPB_DESC(DescriptorProto_oneof_decl)(msg_proto, &n_oneof); - fields = UPB_DESC(DescriptorProto_field)(msg_proto, &n_field); - ext_ranges = - UPB_DESC(DescriptorProto_extension_range)(msg_proto, &n_ext_range); - res_ranges = - UPB_DESC(DescriptorProto_reserved_range)(msg_proto, &n_res_range); - res_names = UPB_DESC(DescriptorProto_reserved_name)(msg_proto, &n_res_name); - - bool ok = upb_inttable_init(&m->itof, ctx->arena); - if (!ok) _upb_DefBuilder_OomErr(ctx); - - ok = upb_strtable_init(&m->ntof, n_oneof + n_field, ctx->arena); - if (!ok) _upb_DefBuilder_OomErr(ctx); - - UPB_DEF_SET_OPTIONS(m->opts, DescriptorProto, MessageOptions, msg_proto); + upb_OneofDef* oneof = (upb_OneofDef*)upb_MessageDef_Oneof(m, oneof_index); + f->scope.oneof = oneof; - m->oneof_count = n_oneof; - m->oneofs = _upb_OneofDefs_New(ctx, n_oneof, oneofs, m); + _upb_OneofDef_Insert(ctx, oneof, f, name.data, name.size); + } - m->field_count = n_field; - m->fields = - _upb_FieldDefs_New(ctx, n_field, fields, m->full_name, m, &m->is_sorted); + UPB_DEF_SET_OPTIONS(f->opts, FieldDescriptorProto, FieldOptions, field_proto); - // Message Sets may not contain fields. - if (UPB_UNLIKELY(UPB_DESC(MessageOptions_message_set_wire_format)(m->opts))) { - if (UPB_UNLIKELY(n_field > 0)) { - _upb_DefBuilder_Errf(ctx, "invalid message set (%s)", m->full_name); - } + if (UPB_DESC(FieldOptions_has_packed)(f->opts)) { + f->is_packed = UPB_DESC(FieldOptions_packed)(f->opts); + } else { + // Repeated fields default to packed for proto3 only. + f->is_packed = has_type && upb_FieldDef_IsPrimitive(f) && + f->label_ == kUpb_Label_Repeated && + upb_FileDef_Syntax(f->file) == kUpb_Syntax_Proto3; } - m->ext_range_count = n_ext_range; - m->ext_ranges = _upb_ExtensionRanges_New(ctx, n_ext_range, ext_ranges, m); + f->has_presence = + (!upb_FieldDef_IsRepeated(f)) && + (f->type_ == kUpb_FieldType_Message || f->type_ == kUpb_FieldType_Group || + upb_FieldDef_ContainingOneof(f) || + (upb_FileDef_Syntax(f->file) == kUpb_Syntax_Proto2)); +} - m->res_range_count = n_res_range; - m->res_ranges = - _upb_MessageReservedRanges_New(ctx, n_res_range, res_ranges, m); +static void _upb_FieldDef_CreateExt(upb_DefBuilder* ctx, const char* prefix, + const UPB_DESC(FieldDescriptorProto) * + field_proto, + upb_MessageDef* m, upb_FieldDef* f) { + f->is_extension = true; + _upb_FieldDef_Create(ctx, prefix, field_proto, m, f); - m->res_name_count = n_res_name; - m->res_names = _upb_ReservedNames_New(ctx, n_res_name, res_names); + if (UPB_DESC(FieldDescriptorProto_has_oneof_index)(field_proto)) { + _upb_DefBuilder_Errf(ctx, "oneof_index provided for extension field (%s)", + f->full_name); + } - const size_t synthetic_count = _upb_OneofDefs_Finalize(ctx, m); - m->real_oneof_count = m->oneof_count - synthetic_count; + f->scope.extension_scope = m; + _upb_DefBuilder_Add(ctx, f->full_name, _upb_DefType_Pack(f, UPB_DEFTYPE_EXT)); + f->layout_index = ctx->ext_count++; - assign_msg_wellknowntype(m); - upb_inttable_compact(&m->itof, ctx->arena); + if (ctx->layout) { + UPB_ASSERT(_upb_FieldDef_ExtensionMiniTable(f)->field.number == f->number_); + } +} - const UPB_DESC(EnumDescriptorProto)* const* enums = - UPB_DESC(DescriptorProto_enum_type)(msg_proto, &n_enum); - m->nested_enum_count = n_enum; - m->nested_enums = _upb_EnumDefs_New(ctx, n_enum, enums, m); +static void _upb_FieldDef_CreateNotExt(upb_DefBuilder* ctx, const char* prefix, + const UPB_DESC(FieldDescriptorProto) * + field_proto, + upb_MessageDef* m, upb_FieldDef* f) { + f->is_extension = false; + _upb_FieldDef_Create(ctx, prefix, field_proto, m, f); - const UPB_DESC(FieldDescriptorProto)* const* exts = - UPB_DESC(DescriptorProto_extension)(msg_proto, &n_ext); - m->nested_ext_count = n_ext; - m->nested_exts = _upb_Extensions_New(ctx, n_ext, exts, m->full_name, m); + if (!UPB_DESC(FieldDescriptorProto_has_oneof_index)(field_proto)) { + if (f->is_proto3_optional) { + _upb_DefBuilder_Errf( + ctx, + "non-extension field (%s) with proto3_optional was not in a oneof", + f->full_name); + } + } - const UPB_DESC(DescriptorProto)* const* msgs = - UPB_DESC(DescriptorProto_nested_type)(msg_proto, &n_msg); - m->nested_msg_count = n_msg; - m->nested_msgs = _upb_MessageDefs_New(ctx, n_msg, msgs, m); + _upb_MessageDef_InsertField(ctx, m, f); } -// Allocate and initialize an array of |n| message defs. -upb_MessageDef* _upb_MessageDefs_New( - upb_DefBuilder* ctx, int n, const UPB_DESC(DescriptorProto) * const* protos, - const upb_MessageDef* containing_type) { - _upb_DefType_CheckPadding(sizeof(upb_MessageDef)); - - const char* name = containing_type ? containing_type->full_name - : _upb_FileDef_RawPackage(ctx->file); +upb_FieldDef* _upb_Extensions_New( + upb_DefBuilder* ctx, int n, + const UPB_DESC(FieldDescriptorProto) * const* protos, const char* prefix, + upb_MessageDef* m) { + _upb_DefType_CheckPadding(sizeof(upb_FieldDef)); + upb_FieldDef* defs = + (upb_FieldDef*)_upb_DefBuilder_Alloc(ctx, sizeof(upb_FieldDef) * n); - upb_MessageDef* m = _upb_DefBuilder_Alloc(ctx, sizeof(upb_MessageDef) * n); for (int i = 0; i < n; i++) { - create_msgdef(ctx, name, protos[i], containing_type, &m[i]); + upb_FieldDef* f = &defs[i]; + + _upb_FieldDef_CreateExt(ctx, prefix, protos[i], m, f); + f->index_ = i; } - return m; + + return defs; } +upb_FieldDef* _upb_FieldDefs_New( + upb_DefBuilder* ctx, int n, + const UPB_DESC(FieldDescriptorProto) * const* protos, const char* prefix, + upb_MessageDef* m, bool* is_sorted) { + _upb_DefType_CheckPadding(sizeof(upb_FieldDef)); + upb_FieldDef* defs = + (upb_FieldDef*)_upb_DefBuilder_Alloc(ctx, sizeof(upb_FieldDef) * n); + + uint32_t previous = 0; + for (int i = 0; i < n; i++) { + upb_FieldDef* f = &defs[i]; -// Must be last. + _upb_FieldDef_CreateNotExt(ctx, prefix, protos[i], m, f); + f->index_ = i; + if (!ctx->layout) { + // Speculate that the def fields are sorted. We will always sort the + // MiniTable fields, so if defs are sorted then indices will match. + // + // If this is incorrect, we will overwrite later. + f->layout_index = i; + } -struct upb_MessageReservedRange { - int32_t start; - int32_t end; -}; + const uint32_t current = f->number_; + if (previous > current) *is_sorted = false; + previous = current; + } -upb_MessageReservedRange* _upb_MessageReservedRange_At( - const upb_MessageReservedRange* r, int i) { - return (upb_MessageReservedRange*)&r[i]; + return defs; } -int32_t upb_MessageReservedRange_Start(const upb_MessageReservedRange* r) { - return r->start; +static void resolve_subdef(upb_DefBuilder* ctx, const char* prefix, + upb_FieldDef* f) { + const UPB_DESC(FieldDescriptorProto)* field_proto = f->sub.unresolved; + upb_StringView name = UPB_DESC(FieldDescriptorProto_type_name)(field_proto); + bool has_name = UPB_DESC(FieldDescriptorProto_has_type_name)(field_proto); + switch ((int)f->type_) { + case UPB_FIELD_TYPE_UNSPECIFIED: { + // Type was not specified and must be inferred. + UPB_ASSERT(has_name); + upb_deftype_t type; + const void* def = + _upb_DefBuilder_ResolveAny(ctx, f->full_name, prefix, name, &type); + switch (type) { + case UPB_DEFTYPE_ENUM: + f->sub.enumdef = def; + f->type_ = kUpb_FieldType_Enum; + if (!UPB_DESC(FieldOptions_has_packed)(f->opts)) { + f->is_packed = f->label_ == kUpb_Label_Repeated && + upb_FileDef_Syntax(f->file) == kUpb_Syntax_Proto3; + } + break; + case UPB_DEFTYPE_MSG: + f->sub.msgdef = def; + f->type_ = kUpb_FieldType_Message; // It appears there is no way of + // this being a group. + f->has_presence = !upb_FieldDef_IsRepeated(f); + break; + default: + _upb_DefBuilder_Errf(ctx, "Couldn't resolve type name for field %s", + f->full_name); + } + break; + } + case kUpb_FieldType_Message: + case kUpb_FieldType_Group: + UPB_ASSERT(has_name); + f->sub.msgdef = _upb_DefBuilder_Resolve(ctx, f->full_name, prefix, name, + UPB_DEFTYPE_MSG); + break; + case kUpb_FieldType_Enum: + UPB_ASSERT(has_name); + f->sub.enumdef = _upb_DefBuilder_Resolve(ctx, f->full_name, prefix, name, + UPB_DEFTYPE_ENUM); + break; + default: + // No resolution necessary. + break; + } } -int32_t upb_MessageReservedRange_End(const upb_MessageReservedRange* r) { - return r->end; + +static int _upb_FieldDef_Compare(const void* p1, const void* p2) { + const uint32_t v1 = (*(upb_FieldDef**)p1)->number_; + const uint32_t v2 = (*(upb_FieldDef**)p2)->number_; + return (v1 < v2) ? -1 : (v1 > v2); } -upb_MessageReservedRange* _upb_MessageReservedRanges_New( - upb_DefBuilder* ctx, int n, - const UPB_DESC(DescriptorProto_ReservedRange) * const* protos, - const upb_MessageDef* m) { - upb_MessageReservedRange* r = - _upb_DefBuilder_Alloc(ctx, sizeof(upb_MessageReservedRange) * n); +// _upb_FieldDefs_Sorted() is mostly a pure function of its inputs, but has one +// critical side effect that we depend on: it sets layout_index appropriately +// for non-sorted lists of fields. +const upb_FieldDef** _upb_FieldDefs_Sorted(const upb_FieldDef* f, int n, + upb_Arena* a) { + // TODO(salo): Replace this arena alloc with a persistent scratch buffer. + upb_FieldDef** out = (upb_FieldDef**)upb_Arena_Malloc(a, n * sizeof(void*)); + if (!out) return NULL; for (int i = 0; i < n; i++) { - const int32_t start = - UPB_DESC(DescriptorProto_ReservedRange_start)(protos[i]); - const int32_t end = UPB_DESC(DescriptorProto_ReservedRange_end)(protos[i]); - const int32_t max = kUpb_MaxFieldNumber + 1; - - // A full validation would also check that each range is disjoint, and that - // none of the fields overlap with the extension ranges, but we are just - // sanity checking here. - if (start < 1 || end <= start || end > max) { - _upb_DefBuilder_Errf(ctx, - "Reserved range (%d, %d) is invalid, message=%s\n", - (int)start, (int)end, upb_MessageDef_FullName(m)); - } - - r[i].start = start; - r[i].end = end; + out[i] = (upb_FieldDef*)&f[i]; } + qsort(out, n, sizeof(void*), _upb_FieldDef_Compare); - return r; + for (int i = 0; i < n; i++) { + out[i]->layout_index = i; + } + return (const upb_FieldDef**)out; } +bool upb_FieldDef_MiniDescriptorEncode(const upb_FieldDef* f, upb_Arena* a, + upb_StringView* out) { + UPB_ASSERT(f->is_extension); -// Must be last. + upb_DescState s; + _upb_DescState_Init(&s); -struct upb_MethodDef { - const UPB_DESC(MethodOptions) * opts; - upb_ServiceDef* service; - const char* full_name; - const upb_MessageDef* input_type; - const upb_MessageDef* output_type; - int index; - bool client_streaming; - bool server_streaming; -}; - -upb_MethodDef* _upb_MethodDef_At(const upb_MethodDef* m, int i) { - return (upb_MethodDef*)&m[i]; -} + const int number = upb_FieldDef_Number(f); + const uint64_t modifiers = _upb_FieldDef_Modifiers(f); -const upb_ServiceDef* upb_MethodDef_Service(const upb_MethodDef* m) { - return m->service; -} + if (!_upb_DescState_Grow(&s, a)) return false; + s.ptr = upb_MtDataEncoder_EncodeExtension(&s.e, s.ptr, f->type_, number, + modifiers); + *s.ptr = '\0'; -const UPB_DESC(MethodOptions) * upb_MethodDef_Options(const upb_MethodDef* m) { - return m->opts; + out->data = s.buf; + out->size = s.ptr - s.buf; + return true; } -bool upb_MethodDef_HasOptions(const upb_MethodDef* m) { - return m->opts != (void*)kUpbDefOptDefault; -} +static void resolve_extension(upb_DefBuilder* ctx, const char* prefix, + upb_FieldDef* f, + const UPB_DESC(FieldDescriptorProto) * + field_proto) { + if (!UPB_DESC(FieldDescriptorProto_has_extendee)(field_proto)) { + _upb_DefBuilder_Errf(ctx, "extension for field '%s' had no extendee", + f->full_name); + } -const char* upb_MethodDef_FullName(const upb_MethodDef* m) { - return m->full_name; -} + upb_StringView name = UPB_DESC(FieldDescriptorProto_extendee)(field_proto); + const upb_MessageDef* m = + _upb_DefBuilder_Resolve(ctx, f->full_name, prefix, name, UPB_DEFTYPE_MSG); + f->msgdef = m; -const char* upb_MethodDef_Name(const upb_MethodDef* m) { - return _upb_DefBuilder_FullToShort(m->full_name); + if (!_upb_MessageDef_IsValidExtensionNumber(m, f->number_)) { + _upb_DefBuilder_Errf( + ctx, + "field number %u in extension %s has no extension range in message %s", + (unsigned)f->number_, f->full_name, upb_MessageDef_FullName(m)); + } } -int upb_MethodDef_Index(const upb_MethodDef* m) { return m->index; } +void _upb_FieldDef_BuildMiniTableExtension(upb_DefBuilder* ctx, + const upb_FieldDef* f) { + const upb_MiniTableExtension* ext = _upb_FieldDef_ExtensionMiniTable(f); -const upb_MessageDef* upb_MethodDef_InputType(const upb_MethodDef* m) { - return m->input_type; -} + if (ctx->layout) { + UPB_ASSERT(upb_FieldDef_Number(f) == ext->field.number); + } else { + upb_StringView desc; + if (!upb_FieldDef_MiniDescriptorEncode(f, ctx->tmp_arena, &desc)) { + _upb_DefBuilder_OomErr(ctx); + } -const upb_MessageDef* upb_MethodDef_OutputType(const upb_MethodDef* m) { - return m->output_type; -} + upb_MiniTableExtension* mut_ext = (upb_MiniTableExtension*)ext; + upb_MiniTableSub sub = {NULL}; + if (upb_FieldDef_IsSubMessage(f)) { + sub.submsg = upb_MessageDef_MiniTable(f->sub.msgdef); + } else if (_upb_FieldDef_IsClosedEnum(f)) { + sub.subenum = _upb_EnumDef_MiniTable(f->sub.enumdef); + } + bool ok2 = upb_MiniTableExtension_Init(desc.data, desc.size, mut_ext, + upb_MessageDef_MiniTable(f->msgdef), + sub, ctx->status); + if (!ok2) _upb_DefBuilder_Errf(ctx, "Could not build extension mini table"); + } -bool upb_MethodDef_ClientStreaming(const upb_MethodDef* m) { - return m->client_streaming; + bool ok = _upb_DefPool_InsertExt(ctx->symtab, ext, f); + if (!ok) _upb_DefBuilder_OomErr(ctx); } -bool upb_MethodDef_ServerStreaming(const upb_MethodDef* m) { - return m->server_streaming; -} +static void resolve_default(upb_DefBuilder* ctx, upb_FieldDef* f, + const UPB_DESC(FieldDescriptorProto) * + field_proto) { + // Have to delay resolving of the default value until now because of the enum + // case, since enum defaults are specified with a label. + if (UPB_DESC(FieldDescriptorProto_has_default_value)(field_proto)) { + upb_StringView defaultval = + UPB_DESC(FieldDescriptorProto_default_value)(field_proto); -static void create_method(upb_DefBuilder* ctx, - const UPB_DESC(MethodDescriptorProto) * method_proto, - upb_ServiceDef* s, upb_MethodDef* m) { - upb_StringView name = UPB_DESC(MethodDescriptorProto_name)(method_proto); + if (upb_FileDef_Syntax(f->file) == kUpb_Syntax_Proto3) { + _upb_DefBuilder_Errf(ctx, + "proto3 fields cannot have explicit defaults (%s)", + f->full_name); + } - m->service = s; - m->full_name = - _upb_DefBuilder_MakeFullName(ctx, upb_ServiceDef_FullName(s), name); - m->client_streaming = - UPB_DESC(MethodDescriptorProto_client_streaming)(method_proto); - m->server_streaming = - UPB_DESC(MethodDescriptorProto_server_streaming)(method_proto); - m->input_type = _upb_DefBuilder_Resolve( - ctx, m->full_name, m->full_name, - UPB_DESC(MethodDescriptorProto_input_type)(method_proto), - UPB_DEFTYPE_MSG); - m->output_type = _upb_DefBuilder_Resolve( - ctx, m->full_name, m->full_name, - UPB_DESC(MethodDescriptorProto_output_type)(method_proto), - UPB_DEFTYPE_MSG); + if (upb_FieldDef_IsSubMessage(f)) { + _upb_DefBuilder_Errf(ctx, + "message fields cannot have explicit defaults (%s)", + f->full_name); + } - UPB_DEF_SET_OPTIONS(m->opts, MethodDescriptorProto, MethodOptions, - method_proto); + parse_default(ctx, defaultval.data, defaultval.size, f); + f->has_default = true; + } else { + set_default_default(ctx, f); + f->has_default = false; + } } -// Allocate and initialize an array of |n| method defs belonging to |s|. -upb_MethodDef* _upb_MethodDefs_New( - upb_DefBuilder* ctx, int n, - const UPB_DESC(MethodDescriptorProto) * const* protos, upb_ServiceDef* s) { - upb_MethodDef* m = _upb_DefBuilder_Alloc(ctx, sizeof(upb_MethodDef) * n); - for (int i = 0; i < n; i++) { - create_method(ctx, protos[i], s, &m[i]); - m[i].index = i; +void _upb_FieldDef_Resolve(upb_DefBuilder* ctx, const char* prefix, + upb_FieldDef* f) { + // We have to stash this away since resolve_subdef() may overwrite it. + const UPB_DESC(FieldDescriptorProto)* field_proto = f->sub.unresolved; + + resolve_subdef(ctx, prefix, f); + resolve_default(ctx, f, field_proto); + + if (f->is_extension) { + resolve_extension(ctx, prefix, f, field_proto); } - return m; } -#include -#include -#include // Must be last. -struct upb_OneofDef { - const UPB_DESC(OneofOptions) * opts; - const upb_MessageDef* parent; - const char* full_name; - int field_count; - bool synthetic; - const upb_FieldDef** fields; - upb_strtable ntof; // lookup a field by name - upb_inttable itof; // lookup a field by number (index) -#if UINTPTR_MAX == 0xffffffff - uint32_t padding; // Increase size to a multiple of 8. -#endif +struct upb_FileDef { + const UPB_DESC(FileOptions) * opts; + const char* name; + const char* package; + const char* edition; + + const upb_FileDef** deps; + const int32_t* public_deps; + const int32_t* weak_deps; + const upb_MessageDef* top_lvl_msgs; + const upb_EnumDef* top_lvl_enums; + const upb_FieldDef* top_lvl_exts; + const upb_ServiceDef* services; + const upb_MiniTableExtension** ext_layouts; + const upb_DefPool* symtab; + + int dep_count; + int public_dep_count; + int weak_dep_count; + int top_lvl_msg_count; + int top_lvl_enum_count; + int top_lvl_ext_count; + int service_count; + int ext_count; // All exts in the file. + upb_Syntax syntax; }; -upb_OneofDef* _upb_OneofDef_At(const upb_OneofDef* o, int i) { - return (upb_OneofDef*)&o[i]; +const UPB_DESC(FileOptions) * upb_FileDef_Options(const upb_FileDef* f) { + return f->opts; } -const UPB_DESC(OneofOptions) * upb_OneofDef_Options(const upb_OneofDef* o) { - return o->opts; +bool upb_FileDef_HasOptions(const upb_FileDef* f) { + return f->opts != (void*)kUpbDefOptDefault; } -bool upb_OneofDef_HasOptions(const upb_OneofDef* o) { - return o->opts != (void*)kUpbDefOptDefault; -} +const char* upb_FileDef_Name(const upb_FileDef* f) { return f->name; } -const char* upb_OneofDef_FullName(const upb_OneofDef* o) { - return o->full_name; +const char* upb_FileDef_Package(const upb_FileDef* f) { + return f->package ? f->package : ""; } -const char* upb_OneofDef_Name(const upb_OneofDef* o) { - return _upb_DefBuilder_FullToShort(o->full_name); +const char* upb_FileDef_Edition(const upb_FileDef* f) { + return f->edition ? f->edition : ""; } -const upb_MessageDef* upb_OneofDef_ContainingType(const upb_OneofDef* o) { - return o->parent; -} +const char* _upb_FileDef_RawPackage(const upb_FileDef* f) { return f->package; } -int upb_OneofDef_FieldCount(const upb_OneofDef* o) { return o->field_count; } +upb_Syntax upb_FileDef_Syntax(const upb_FileDef* f) { return f->syntax; } -const upb_FieldDef* upb_OneofDef_Field(const upb_OneofDef* o, int i) { - UPB_ASSERT(i < o->field_count); - return o->fields[i]; +int upb_FileDef_TopLevelMessageCount(const upb_FileDef* f) { + return f->top_lvl_msg_count; } -int upb_OneofDef_numfields(const upb_OneofDef* o) { return o->field_count; } +int upb_FileDef_DependencyCount(const upb_FileDef* f) { return f->dep_count; } -uint32_t upb_OneofDef_Index(const upb_OneofDef* o) { - // Compute index in our parent's array. - return o - upb_MessageDef_Oneof(o->parent, 0); +int upb_FileDef_PublicDependencyCount(const upb_FileDef* f) { + return f->public_dep_count; } -bool upb_OneofDef_IsSynthetic(const upb_OneofDef* o) { return o->synthetic; } - -const upb_FieldDef* upb_OneofDef_LookupNameWithSize(const upb_OneofDef* o, - const char* name, - size_t size) { - upb_value val; - return upb_strtable_lookup2(&o->ntof, name, size, &val) - ? upb_value_getptr(val) - : NULL; +int upb_FileDef_WeakDependencyCount(const upb_FileDef* f) { + return f->weak_dep_count; } -const upb_FieldDef* upb_OneofDef_LookupName(const upb_OneofDef* o, - const char* name) { - return upb_OneofDef_LookupNameWithSize(o, name, strlen(name)); +const int32_t* _upb_FileDef_PublicDependencyIndexes(const upb_FileDef* f) { + return f->public_deps; } -const upb_FieldDef* upb_OneofDef_LookupNumber(const upb_OneofDef* o, - uint32_t num) { - upb_value val; - return upb_inttable_lookup(&o->itof, num, &val) ? upb_value_getptr(val) - : NULL; +const int32_t* _upb_FileDef_WeakDependencyIndexes(const upb_FileDef* f) { + return f->weak_deps; } -void _upb_OneofDef_Insert(upb_DefBuilder* ctx, upb_OneofDef* o, - const upb_FieldDef* f, const char* name, - size_t size) { - o->field_count++; - if (_upb_FieldDef_IsProto3Optional(f)) o->synthetic = true; - - const int number = upb_FieldDef_Number(f); - const upb_value v = upb_value_constptr(f); - - // TODO(salo): This lookup is unfortunate because we also perform it when - // inserting into the message's table. Unfortunately that step occurs after - // this one and moving things around could be tricky so let's leave it for - // a future refactoring. - const bool number_exists = upb_inttable_lookup(&o->itof, number, NULL); - if (UPB_UNLIKELY(number_exists)) { - _upb_DefBuilder_Errf(ctx, "oneof fields have the same number (%d)", number); - } - - // TODO(salo): More redundant work happening here. - const bool name_exists = upb_strtable_lookup2(&o->ntof, name, size, NULL); - if (UPB_UNLIKELY(name_exists)) { - _upb_DefBuilder_Errf(ctx, "oneof fields have the same name (%.*s)", - (int)size, name); - } - - const bool ok = upb_inttable_insert(&o->itof, number, v, ctx->arena) && - upb_strtable_insert(&o->ntof, name, size, v, ctx->arena); - if (UPB_UNLIKELY(!ok)) { - _upb_DefBuilder_OomErr(ctx); - } +int upb_FileDef_TopLevelEnumCount(const upb_FileDef* f) { + return f->top_lvl_enum_count; } -// Returns the synthetic count. -size_t _upb_OneofDefs_Finalize(upb_DefBuilder* ctx, upb_MessageDef* m) { - int synthetic_count = 0; - - for (int i = 0; i < upb_MessageDef_OneofCount(m); i++) { - upb_OneofDef* o = (upb_OneofDef*)upb_MessageDef_Oneof(m, i); - - if (o->synthetic && o->field_count != 1) { - _upb_DefBuilder_Errf(ctx, - "Synthetic oneofs must have one field, not %d: %s", - o->field_count, upb_OneofDef_Name(o)); - } - - if (o->synthetic) { - synthetic_count++; - } else if (synthetic_count != 0) { - _upb_DefBuilder_Errf( - ctx, "Synthetic oneofs must be after all other oneofs: %s", - upb_OneofDef_Name(o)); - } - - o->fields = - _upb_DefBuilder_Alloc(ctx, sizeof(upb_FieldDef*) * o->field_count); - o->field_count = 0; - } - - for (int i = 0; i < upb_MessageDef_FieldCount(m); i++) { - const upb_FieldDef* f = upb_MessageDef_Field(m, i); - upb_OneofDef* o = (upb_OneofDef*)upb_FieldDef_ContainingOneof(f); - if (o) { - o->fields[o->field_count++] = f; - } - } - - return synthetic_count; +int upb_FileDef_TopLevelExtensionCount(const upb_FileDef* f) { + return f->top_lvl_ext_count; } -static void create_oneofdef(upb_DefBuilder* ctx, upb_MessageDef* m, - const UPB_DESC(OneofDescriptorProto) * oneof_proto, - const upb_OneofDef* _o) { - upb_OneofDef* o = (upb_OneofDef*)_o; - upb_StringView name = UPB_DESC(OneofDescriptorProto_name)(oneof_proto); - - o->parent = m; - o->full_name = - _upb_DefBuilder_MakeFullName(ctx, upb_MessageDef_FullName(m), name); - o->field_count = 0; - o->synthetic = false; - - UPB_DEF_SET_OPTIONS(o->opts, OneofDescriptorProto, OneofOptions, oneof_proto); - - if (upb_MessageDef_FindByNameWithSize(m, name.data, name.size, NULL, NULL)) { - _upb_DefBuilder_Errf(ctx, "duplicate oneof name (%s)", o->full_name); - } - - upb_value v = _upb_DefType_Pack(o, UPB_DEFTYPE_ONEOF); - bool ok = _upb_MessageDef_Insert(m, name.data, name.size, v, ctx->arena); - if (!ok) _upb_DefBuilder_OomErr(ctx); - - ok = upb_inttable_init(&o->itof, ctx->arena); - if (!ok) _upb_DefBuilder_OomErr(ctx); +int upb_FileDef_ServiceCount(const upb_FileDef* f) { return f->service_count; } - ok = upb_strtable_init(&o->ntof, 4, ctx->arena); - if (!ok) _upb_DefBuilder_OomErr(ctx); +const upb_FileDef* upb_FileDef_Dependency(const upb_FileDef* f, int i) { + UPB_ASSERT(0 <= i && i < f->dep_count); + return f->deps[i]; } -// Allocate and initialize an array of |n| oneof defs. -upb_OneofDef* _upb_OneofDefs_New( - upb_DefBuilder* ctx, int n, - const UPB_DESC(OneofDescriptorProto) * const* protos, upb_MessageDef* m) { - _upb_DefType_CheckPadding(sizeof(upb_OneofDef)); - - upb_OneofDef* o = _upb_DefBuilder_Alloc(ctx, sizeof(upb_OneofDef) * n); - for (int i = 0; i < n; i++) { - create_oneofdef(ctx, m, protos[i], &o[i]); - } - return o; +const upb_FileDef* upb_FileDef_PublicDependency(const upb_FileDef* f, int i) { + UPB_ASSERT(0 <= i && i < f->public_dep_count); + return f->deps[f->public_deps[i]]; } - -// Must be last. - -struct upb_ServiceDef { - const UPB_DESC(ServiceOptions) * opts; - const upb_FileDef* file; - const char* full_name; - upb_MethodDef* methods; - int method_count; - int index; -}; - -upb_ServiceDef* _upb_ServiceDef_At(const upb_ServiceDef* s, int index) { - return (upb_ServiceDef*)&s[index]; +const upb_FileDef* upb_FileDef_WeakDependency(const upb_FileDef* f, int i) { + UPB_ASSERT(0 <= i && i < f->public_dep_count); + return f->deps[f->weak_deps[i]]; } -const UPB_DESC(ServiceOptions) * - upb_ServiceDef_Options(const upb_ServiceDef* s) { - return s->opts; +const upb_MessageDef* upb_FileDef_TopLevelMessage(const upb_FileDef* f, int i) { + UPB_ASSERT(0 <= i && i < f->top_lvl_msg_count); + return _upb_MessageDef_At(f->top_lvl_msgs, i); } -bool upb_ServiceDef_HasOptions(const upb_ServiceDef* s) { - return s->opts != (void*)kUpbDefOptDefault; +const upb_EnumDef* upb_FileDef_TopLevelEnum(const upb_FileDef* f, int i) { + UPB_ASSERT(0 <= i && i < f->top_lvl_enum_count); + return _upb_EnumDef_At(f->top_lvl_enums, i); } -const char* upb_ServiceDef_FullName(const upb_ServiceDef* s) { - return s->full_name; +const upb_FieldDef* upb_FileDef_TopLevelExtension(const upb_FileDef* f, int i) { + UPB_ASSERT(0 <= i && i < f->top_lvl_ext_count); + return _upb_FieldDef_At(f->top_lvl_exts, i); } -const char* upb_ServiceDef_Name(const upb_ServiceDef* s) { - return _upb_DefBuilder_FullToShort(s->full_name); +const upb_ServiceDef* upb_FileDef_Service(const upb_FileDef* f, int i) { + UPB_ASSERT(0 <= i && i < f->service_count); + return _upb_ServiceDef_At(f->services, i); } -int upb_ServiceDef_Index(const upb_ServiceDef* s) { return s->index; } - -const upb_FileDef* upb_ServiceDef_File(const upb_ServiceDef* s) { - return s->file; -} +const upb_DefPool* upb_FileDef_Pool(const upb_FileDef* f) { return f->symtab; } -int upb_ServiceDef_MethodCount(const upb_ServiceDef* s) { - return s->method_count; +const upb_MiniTableExtension* _upb_FileDef_ExtensionMiniTable( + const upb_FileDef* f, int i) { + return f->ext_layouts[i]; } -const upb_MethodDef* upb_ServiceDef_Method(const upb_ServiceDef* s, int i) { - return (i < 0 || i >= s->method_count) ? NULL - : _upb_MethodDef_At(s->methods, i); +static char* strviewdup(upb_DefBuilder* ctx, upb_StringView view) { + char* ret = upb_strdup2(view.data, view.size, _upb_DefBuilder_Arena(ctx)); + if (!ret) _upb_DefBuilder_OomErr(ctx); + return ret; } -const upb_MethodDef* upb_ServiceDef_FindMethodByName(const upb_ServiceDef* s, - const char* name) { - for (int i = 0; i < s->method_count; i++) { - const upb_MethodDef* m = _upb_MethodDef_At(s->methods, i); - if (strcmp(name, upb_MethodDef_Name(m)) == 0) { - return m; - } - } - return NULL; +static bool streql_view(upb_StringView view, const char* b) { + return view.size == strlen(b) && memcmp(view.data, b, view.size) == 0; } -static void create_service(upb_DefBuilder* ctx, - const UPB_DESC(ServiceDescriptorProto) * svc_proto, - upb_ServiceDef* s) { - upb_StringView name; +static int count_exts_in_msg(const UPB_DESC(DescriptorProto) * msg_proto) { size_t n; + UPB_DESC(DescriptorProto_extension)(msg_proto, &n); + int ext_count = n; - // Must happen before _upb_DefBuilder_Add() - s->file = _upb_DefBuilder_File(ctx); + const UPB_DESC(DescriptorProto)* const* nested_msgs = + UPB_DESC(DescriptorProto_nested_type)(msg_proto, &n); + for (size_t i = 0; i < n; i++) { + ext_count += count_exts_in_msg(nested_msgs[i]); + } - name = UPB_DESC(ServiceDescriptorProto_name)(svc_proto); - const char* package = _upb_FileDef_RawPackage(s->file); - s->full_name = _upb_DefBuilder_MakeFullName(ctx, package, name); - _upb_DefBuilder_Add(ctx, s->full_name, - _upb_DefType_Pack(s, UPB_DEFTYPE_SERVICE)); + return ext_count; +} - const UPB_DESC(MethodDescriptorProto)* const* methods = - UPB_DESC(ServiceDescriptorProto_method)(svc_proto, &n); - s->method_count = n; - s->methods = _upb_MethodDefs_New(ctx, n, methods, s); +// Allocate and initialize one file def, and add it to the context object. +void _upb_FileDef_Create(upb_DefBuilder* ctx, + const UPB_DESC(FileDescriptorProto) * file_proto) { + upb_FileDef* file = _upb_DefBuilder_Alloc(ctx, sizeof(upb_FileDef)); + ctx->file = file; - UPB_DEF_SET_OPTIONS(s->opts, ServiceDescriptorProto, ServiceOptions, - svc_proto); -} + const UPB_DESC(DescriptorProto)* const* msgs; + const UPB_DESC(EnumDescriptorProto)* const* enums; + const UPB_DESC(FieldDescriptorProto)* const* exts; + const UPB_DESC(ServiceDescriptorProto)* const* services; + const upb_StringView* strs; + const int32_t* public_deps; + const int32_t* weak_deps; + size_t n; -upb_ServiceDef* _upb_ServiceDefs_New( - upb_DefBuilder* ctx, int n, - const UPB_DESC(ServiceDescriptorProto) * const* protos) { - _upb_DefType_CheckPadding(sizeof(upb_ServiceDef)); + file->symtab = ctx->symtab; - upb_ServiceDef* s = _upb_DefBuilder_Alloc(ctx, sizeof(upb_ServiceDef) * n); - for (int i = 0; i < n; i++) { - create_service(ctx, protos[i], &s[i]); - s[i].index = i; + // Count all extensions in the file, to build a flat array of layouts. + UPB_DESC(FileDescriptorProto_extension)(file_proto, &n); + int ext_count = n; + msgs = UPB_DESC(FileDescriptorProto_message_type)(file_proto, &n); + for (size_t i = 0; i < n; i++) { + ext_count += count_exts_in_msg(msgs[i]); } - return s; -} + file->ext_count = ext_count; + if (ctx->layout) { + // We are using the ext layouts that were passed in. + file->ext_layouts = ctx->layout->exts; + if (ctx->layout->ext_count != file->ext_count) { + _upb_DefBuilder_Errf(ctx, + "Extension count did not match layout (%d vs %d)", + ctx->layout->ext_count, file->ext_count); + } + } else { + // We are building ext layouts from scratch. + file->ext_layouts = _upb_DefBuilder_Alloc( + ctx, sizeof(*file->ext_layouts) * file->ext_count); + upb_MiniTableExtension* ext = + _upb_DefBuilder_Alloc(ctx, sizeof(*ext) * file->ext_count); + for (int i = 0; i < file->ext_count; i++) { + file->ext_layouts[i] = &ext[i]; + } + } -#include + upb_StringView name = UPB_DESC(FileDescriptorProto_name)(file_proto); + file->name = strviewdup(ctx, name); + if (strlen(file->name) != name.size) { + _upb_DefBuilder_Errf(ctx, "File name contained embedded NULL"); + } + upb_StringView package = UPB_DESC(FileDescriptorProto_package)(file_proto); -// Must be last. + if (package.size) { + _upb_DefBuilder_CheckIdentFull(ctx, package); + file->package = strviewdup(ctx, package); + } else { + file->package = NULL; + } -// A few fake field types for our tables. -enum { - kUpb_FakeFieldType_FieldNotFound = 0, - kUpb_FakeFieldType_MessageSetItem = 19, -}; + upb_StringView edition = UPB_DESC(FileDescriptorProto_edition)(file_proto); -// DecodeOp: an action to be performed for a wire-type/field-type combination. -enum { - // Special ops: we don't write data to regular fields for these. - kUpb_DecodeOp_UnknownField = -1, - kUpb_DecodeOp_MessageSetItem = -2, + if (edition.size == 0) { + file->edition = NULL; + } else { + // TODO(b/267770604): How should we validate this? + file->edition = strviewdup(ctx, edition); + if (strlen(file->edition) != edition.size) { + _upb_DefBuilder_Errf(ctx, "Edition name contained embedded NULL"); + } + } - // Scalar-only ops. - kUpb_DecodeOp_Scalar1Byte = 0, - kUpb_DecodeOp_Scalar4Byte = 2, - kUpb_DecodeOp_Scalar8Byte = 3, - kUpb_DecodeOp_Enum = 1, + if (UPB_DESC(FileDescriptorProto_has_syntax)(file_proto)) { + upb_StringView syntax = UPB_DESC(FileDescriptorProto_syntax)(file_proto); - // Scalar/repeated ops. - kUpb_DecodeOp_String = 4, - kUpb_DecodeOp_Bytes = 5, - kUpb_DecodeOp_SubMessage = 6, + if (streql_view(syntax, "proto2")) { + file->syntax = kUpb_Syntax_Proto2; + } else if (streql_view(syntax, "proto3")) { + file->syntax = kUpb_Syntax_Proto3; + } else { + _upb_DefBuilder_Errf(ctx, "Invalid syntax '" UPB_STRINGVIEW_FORMAT "'", + UPB_STRINGVIEW_ARGS(syntax)); + } + } else { + file->syntax = kUpb_Syntax_Proto2; + } - // Repeated-only ops (also see macros below). - kUpb_DecodeOp_PackedEnum = 13, -}; + // Read options. + UPB_DEF_SET_OPTIONS(file->opts, FileDescriptorProto, FileOptions, file_proto); -// For packed fields it is helpful to be able to recover the lg2 of the data -// size from the op. -#define OP_FIXPCK_LG2(n) (n + 5) /* n in [2, 3] => op in [7, 8] */ -#define OP_VARPCK_LG2(n) (n + 9) /* n in [0, 2, 3] => op in [9, 11, 12] */ + // Verify dependencies. + strs = UPB_DESC(FileDescriptorProto_dependency)(file_proto, &n); + file->dep_count = n; + file->deps = _upb_DefBuilder_Alloc(ctx, sizeof(*file->deps) * n); -typedef union { - bool bool_val; - uint32_t uint32_val; - uint64_t uint64_val; - uint32_t size; -} wireval; + for (size_t i = 0; i < n; i++) { + upb_StringView str = strs[i]; + file->deps[i] = + upb_DefPool_FindFileByNameWithSize(ctx->symtab, str.data, str.size); + if (!file->deps[i]) { + _upb_DefBuilder_Errf(ctx, + "Depends on file '" UPB_STRINGVIEW_FORMAT + "', but it has not been loaded", + UPB_STRINGVIEW_ARGS(str)); + } + } -static const char* _upb_Decoder_DecodeMessage(upb_Decoder* d, const char* ptr, - upb_Message* msg, - const upb_MiniTable* layout); + public_deps = UPB_DESC(FileDescriptorProto_public_dependency)(file_proto, &n); + file->public_dep_count = n; + file->public_deps = + _upb_DefBuilder_Alloc(ctx, sizeof(*file->public_deps) * n); + int32_t* mutable_public_deps = (int32_t*)file->public_deps; + for (size_t i = 0; i < n; i++) { + if (public_deps[i] >= file->dep_count) { + _upb_DefBuilder_Errf(ctx, "public_dep %d is out of range", + (int)public_deps[i]); + } + mutable_public_deps[i] = public_deps[i]; + } -UPB_NORETURN static void* _upb_Decoder_ErrorJmp(upb_Decoder* d, - upb_DecodeStatus status) { - assert(status != kUpb_DecodeStatus_Ok); - d->status = status; - UPB_LONGJMP(d->err, 1); -} + weak_deps = UPB_DESC(FileDescriptorProto_weak_dependency)(file_proto, &n); + file->weak_dep_count = n; + file->weak_deps = _upb_DefBuilder_Alloc(ctx, sizeof(*file->weak_deps) * n); + int32_t* mutable_weak_deps = (int32_t*)file->weak_deps; + for (size_t i = 0; i < n; i++) { + if (weak_deps[i] >= file->dep_count) { + _upb_DefBuilder_Errf(ctx, "weak_dep %d is out of range", + (int)weak_deps[i]); + } + mutable_weak_deps[i] = weak_deps[i]; + } -const char* _upb_FastDecoder_ErrorJmp(upb_Decoder* d, int status) { - assert(status != kUpb_DecodeStatus_Ok); - d->status = status; - UPB_LONGJMP(d->err, 1); - return NULL; -} + // Create enums. + enums = UPB_DESC(FileDescriptorProto_enum_type)(file_proto, &n); + file->top_lvl_enum_count = n; + file->top_lvl_enums = _upb_EnumDefs_New(ctx, n, enums, NULL); -static void _upb_Decoder_VerifyUtf8(upb_Decoder* d, const char* buf, int len) { - if (!_upb_Decoder_VerifyUtf8Inline(buf, len)) { - _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_BadUtf8); - } -} + // Create extensions. + exts = UPB_DESC(FileDescriptorProto_extension)(file_proto, &n); + file->top_lvl_ext_count = n; + file->top_lvl_exts = _upb_Extensions_New(ctx, n, exts, file->package, NULL); -static bool _upb_Decoder_Reserve(upb_Decoder* d, upb_Array* arr, size_t elem) { - bool need_realloc = arr->capacity - arr->size < elem; - if (need_realloc && !_upb_array_realloc(arr, arr->size + elem, &d->arena)) { - _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); + // Create messages. + msgs = UPB_DESC(FileDescriptorProto_message_type)(file_proto, &n); + file->top_lvl_msg_count = n; + file->top_lvl_msgs = _upb_MessageDefs_New(ctx, n, msgs, NULL); + + // Create services. + services = UPB_DESC(FileDescriptorProto_service)(file_proto, &n); + file->service_count = n; + file->services = _upb_ServiceDefs_New(ctx, n, services); + + // Now that all names are in the table, build layouts and resolve refs. + + for (int i = 0; i < file->top_lvl_msg_count; i++) { + upb_MessageDef* m = (upb_MessageDef*)upb_FileDef_TopLevelMessage(file, i); + _upb_MessageDef_Resolve(ctx, m); } - return need_realloc; -} -typedef struct { - const char* ptr; - uint64_t val; -} _upb_DecodeLongVarintReturn; + for (int i = 0; i < file->top_lvl_ext_count; i++) { + upb_FieldDef* f = (upb_FieldDef*)upb_FileDef_TopLevelExtension(file, i); + _upb_FieldDef_Resolve(ctx, file->package, f); + } -UPB_NOINLINE -static _upb_DecodeLongVarintReturn _upb_Decoder_DecodeLongVarint( - const char* ptr, uint64_t val) { - _upb_DecodeLongVarintReturn ret = {NULL, 0}; - uint64_t byte; - int i; - for (i = 1; i < 10; i++) { - byte = (uint8_t)ptr[i]; - val += (byte - 1) << (i * 7); - if (!(byte & 0x80)) { - ret.ptr = ptr + i + 1; - ret.val = val; - return ret; - } + for (int i = 0; i < file->top_lvl_msg_count; i++) { + upb_MessageDef* m = (upb_MessageDef*)upb_FileDef_TopLevelMessage(file, i); + _upb_MessageDef_CreateMiniTable(ctx, (upb_MessageDef*)m); } - return ret; -} -UPB_FORCEINLINE -static const char* _upb_Decoder_DecodeVarint(upb_Decoder* d, const char* ptr, - uint64_t* val) { - uint64_t byte = (uint8_t)*ptr; - if (UPB_LIKELY((byte & 0x80) == 0)) { - *val = byte; - return ptr + 1; - } else { - _upb_DecodeLongVarintReturn res = _upb_Decoder_DecodeLongVarint(ptr, byte); - if (!res.ptr) _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); - *val = res.val; - return res.ptr; + for (int i = 0; i < file->top_lvl_ext_count; i++) { + upb_FieldDef* f = (upb_FieldDef*)upb_FileDef_TopLevelExtension(file, i); + _upb_FieldDef_BuildMiniTableExtension(ctx, f); } -} -UPB_FORCEINLINE -static const char* _upb_Decoder_DecodeTag(upb_Decoder* d, const char* ptr, - uint32_t* val) { - uint64_t byte = (uint8_t)*ptr; - if (UPB_LIKELY((byte & 0x80) == 0)) { - *val = byte; - return ptr + 1; - } else { - const char* start = ptr; - _upb_DecodeLongVarintReturn res = _upb_Decoder_DecodeLongVarint(ptr, byte); - if (!res.ptr || res.ptr - start > 5 || res.val > UINT32_MAX) { - _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); - } - *val = res.val; - return res.ptr; + for (int i = 0; i < file->top_lvl_msg_count; i++) { + upb_MessageDef* m = (upb_MessageDef*)upb_FileDef_TopLevelMessage(file, i); + _upb_MessageDef_LinkMiniTable(ctx, m); } -} -UPB_FORCEINLINE -static const char* upb_Decoder_DecodeSize(upb_Decoder* d, const char* ptr, - uint32_t* size) { - uint64_t size64; - ptr = _upb_Decoder_DecodeVarint(d, ptr, &size64); - if (size64 >= INT32_MAX || - !upb_EpsCopyInputStream_CheckSize(&d->input, ptr, (int)size64)) { - _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); + if (file->ext_count) { + bool ok = upb_ExtensionRegistry_AddArray( + _upb_DefPool_ExtReg(ctx->symtab), file->ext_layouts, file->ext_count); + if (!ok) _upb_DefBuilder_OomErr(ctx); } - *size = size64; - return ptr; } -static void _upb_Decoder_MungeInt32(wireval* val) { - if (!_upb_IsLittleEndian()) { - /* The next stage will memcpy(dst, &val, 4) */ - val->uint32_val = val->uint64_val; - } + +#include + + +// Must be last. + +bool upb_Message_HasFieldByDef(const upb_Message* msg, const upb_FieldDef* f) { + UPB_ASSERT(upb_FieldDef_HasPresence(f)); + return upb_Message_HasField(msg, upb_FieldDef_MiniTable(f)); } -static void _upb_Decoder_Munge(int type, wireval* val) { - switch (type) { - case kUpb_FieldType_Bool: - val->bool_val = val->uint64_val != 0; - break; - case kUpb_FieldType_SInt32: { - uint32_t n = val->uint64_val; - val->uint32_val = (n >> 1) ^ -(int32_t)(n & 1); - break; - } - case kUpb_FieldType_SInt64: { - uint64_t n = val->uint64_val; - val->uint64_val = (n >> 1) ^ -(int64_t)(n & 1); - break; - } - case kUpb_FieldType_Int32: - case kUpb_FieldType_UInt32: - case kUpb_FieldType_Enum: - _upb_Decoder_MungeInt32(val); - break; +const upb_FieldDef* upb_Message_WhichOneof(const upb_Message* msg, + const upb_OneofDef* o) { + const upb_FieldDef* f = upb_OneofDef_Field(o, 0); + if (upb_OneofDef_IsSynthetic(o)) { + UPB_ASSERT(upb_OneofDef_FieldCount(o) == 1); + return upb_Message_HasFieldByDef(msg, f) ? f : NULL; + } else { + const upb_MiniTableField* field = upb_FieldDef_MiniTable(f); + uint32_t oneof_case = upb_Message_WhichOneofFieldNumber(msg, field); + f = oneof_case ? upb_OneofDef_LookupNumber(o, oneof_case) : NULL; + UPB_ASSERT((f != NULL) == (oneof_case != 0)); + return f; } } -static upb_Message* _upb_Decoder_NewSubMessage(upb_Decoder* d, - const upb_MiniTableSub* subs, - const upb_MiniTableField* field, - upb_TaggedMessagePtr* target) { - const upb_MiniTable* subl = subs[field->UPB_PRIVATE(submsg_index)].submsg; - UPB_ASSERT(subl); - upb_Message* msg = _upb_Message_New(subl, &d->arena); - if (!msg) _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); - - // Extensions should not be unlinked. A message extension should not be - // registered until its sub-message type is available to be linked. - bool is_empty = subl == &_kUpb_MiniTable_Empty; - bool is_extension = field->mode & kUpb_LabelFlags_IsExtension; - UPB_ASSERT(!(is_empty && is_extension)); +upb_MessageValue upb_Message_GetFieldByDef(const upb_Message* msg, + const upb_FieldDef* f) { + upb_MessageValue default_val = upb_FieldDef_Default(f); + upb_MessageValue ret; + _upb_Message_GetField(msg, upb_FieldDef_MiniTable(f), &default_val, &ret); + return ret; +} - if (is_empty && !(d->options & kUpb_DecodeOption_ExperimentalAllowUnlinked)) { - _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_UnlinkedSubMessage); +upb_MutableMessageValue upb_Message_Mutable(upb_Message* msg, + const upb_FieldDef* f, + upb_Arena* a) { + UPB_ASSERT(upb_FieldDef_IsSubMessage(f) || upb_FieldDef_IsRepeated(f)); + if (upb_FieldDef_HasPresence(f) && !upb_Message_HasFieldByDef(msg, f)) { + // We need to skip the upb_Message_GetFieldByDef() call in this case. + goto make; } - upb_TaggedMessagePtr tagged = _upb_TaggedMessagePtr_Pack(msg, is_empty); - memcpy(target, &tagged, sizeof(tagged)); - return msg; -} - -static upb_Message* _upb_Decoder_ReuseSubMessage( - upb_Decoder* d, const upb_MiniTableSub* subs, - const upb_MiniTableField* field, upb_TaggedMessagePtr* target) { - upb_TaggedMessagePtr tagged = *target; - const upb_MiniTable* subl = subs[field->UPB_PRIVATE(submsg_index)].submsg; - UPB_ASSERT(subl); - if (!upb_TaggedMessagePtr_IsEmpty(tagged) || subl == &_kUpb_MiniTable_Empty) { - return _upb_TaggedMessagePtr_GetMessage(tagged); + upb_MessageValue val = upb_Message_GetFieldByDef(msg, f); + if (val.array_val) { + return (upb_MutableMessageValue){.array = (upb_Array*)val.array_val}; } - // We found an empty message from a previous parse that was performed before - // this field was linked. But it is linked now, so we want to allocate a new - // message of the correct type and promote data into it before continuing. - upb_Message* existing = _upb_TaggedMessagePtr_GetEmptyMessage(tagged); - upb_Message* promoted = _upb_Decoder_NewSubMessage(d, subs, field, target); - size_t size; - const char* unknown = upb_Message_GetUnknown(existing, &size); - upb_DecodeStatus status = upb_Decode(unknown, size, promoted, subl, d->extreg, - d->options, &d->arena); - if (status != kUpb_DecodeStatus_Ok) _upb_Decoder_ErrorJmp(d, status); - return promoted; -} + upb_MutableMessageValue ret; +make: + if (!a) return (upb_MutableMessageValue){.array = NULL}; + if (upb_FieldDef_IsMap(f)) { + const upb_MessageDef* entry = upb_FieldDef_MessageSubDef(f); + const upb_FieldDef* key = + upb_MessageDef_FindFieldByNumber(entry, kUpb_MapEntry_KeyFieldNumber); + const upb_FieldDef* value = + upb_MessageDef_FindFieldByNumber(entry, kUpb_MapEntry_ValueFieldNumber); + ret.map = + upb_Map_New(a, upb_FieldDef_CType(key), upb_FieldDef_CType(value)); + } else if (upb_FieldDef_IsRepeated(f)) { + ret.array = upb_Array_New(a, upb_FieldDef_CType(f)); + } else { + UPB_ASSERT(upb_FieldDef_IsSubMessage(f)); + const upb_MessageDef* m = upb_FieldDef_MessageSubDef(f); + ret.msg = upb_Message_New(upb_MessageDef_MiniTable(m), a); + } -static const char* _upb_Decoder_ReadString(upb_Decoder* d, const char* ptr, - int size, upb_StringView* str) { - const char* str_ptr = ptr; - ptr = upb_EpsCopyInputStream_ReadString(&d->input, &str_ptr, size, &d->arena); - if (!ptr) _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); - str->data = str_ptr; - str->size = size; - return ptr; -} + val.array_val = ret.array; + upb_Message_SetFieldByDef(msg, f, val, a); -UPB_FORCEINLINE -static const char* _upb_Decoder_RecurseSubMessage(upb_Decoder* d, - const char* ptr, - upb_Message* submsg, - const upb_MiniTable* subl, - uint32_t expected_end_group) { - if (--d->depth < 0) { - _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_MaxDepthExceeded); - } - ptr = _upb_Decoder_DecodeMessage(d, ptr, submsg, subl); - d->depth++; - if (d->end_group != expected_end_group) { - _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); - } - return ptr; + return ret; } -UPB_FORCEINLINE -static const char* _upb_Decoder_DecodeSubMessage( - upb_Decoder* d, const char* ptr, upb_Message* submsg, - const upb_MiniTableSub* subs, const upb_MiniTableField* field, int size) { - int saved_delta = upb_EpsCopyInputStream_PushLimit(&d->input, ptr, size); - const upb_MiniTable* subl = subs[field->UPB_PRIVATE(submsg_index)].submsg; - UPB_ASSERT(subl); - ptr = _upb_Decoder_RecurseSubMessage(d, ptr, submsg, subl, DECODE_NOGROUP); - upb_EpsCopyInputStream_PopLimit(&d->input, ptr, saved_delta); - return ptr; +bool upb_Message_SetFieldByDef(upb_Message* msg, const upb_FieldDef* f, + upb_MessageValue val, upb_Arena* a) { + return _upb_Message_SetField(msg, upb_FieldDef_MiniTable(f), &val, a); } -UPB_FORCEINLINE -static const char* _upb_Decoder_DecodeGroup(upb_Decoder* d, const char* ptr, - upb_Message* submsg, - const upb_MiniTable* subl, - uint32_t number) { - if (_upb_Decoder_IsDone(d, &ptr)) { - _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); - } - ptr = _upb_Decoder_RecurseSubMessage(d, ptr, submsg, subl, number); - d->end_group = DECODE_NOGROUP; - return ptr; +void upb_Message_ClearFieldByDef(upb_Message* msg, const upb_FieldDef* f) { + upb_Message_ClearField(msg, upb_FieldDef_MiniTable(f)); } -UPB_FORCEINLINE -static const char* _upb_Decoder_DecodeUnknownGroup(upb_Decoder* d, - const char* ptr, - uint32_t number) { - return _upb_Decoder_DecodeGroup(d, ptr, NULL, NULL, number); +void upb_Message_ClearByDef(upb_Message* msg, const upb_MessageDef* m) { + upb_Message_Clear(msg, upb_MessageDef_MiniTable(m)); } -UPB_FORCEINLINE -static const char* _upb_Decoder_DecodeKnownGroup( - upb_Decoder* d, const char* ptr, upb_Message* submsg, - const upb_MiniTableSub* subs, const upb_MiniTableField* field) { - const upb_MiniTable* subl = subs[field->UPB_PRIVATE(submsg_index)].submsg; - UPB_ASSERT(subl); - return _upb_Decoder_DecodeGroup(d, ptr, submsg, subl, field->number); -} +bool upb_Message_Next(const upb_Message* msg, const upb_MessageDef* m, + const upb_DefPool* ext_pool, const upb_FieldDef** out_f, + upb_MessageValue* out_val, size_t* iter) { + size_t i = *iter; + size_t n = upb_MessageDef_FieldCount(m); + UPB_UNUSED(ext_pool); -static char* upb_Decoder_EncodeVarint32(uint32_t val, char* ptr) { - do { - uint8_t byte = val & 0x7fU; - val >>= 7; - if (val) byte |= 0x80U; - *(ptr++) = byte; - } while (val); - return ptr; -} + // Iterate over normal fields, returning the first one that is set. + while (++i < n) { + const upb_FieldDef* f = upb_MessageDef_Field(m, i); + const upb_MiniTableField* field = upb_FieldDef_MiniTable(f); + upb_MessageValue val = upb_Message_GetFieldByDef(msg, f); -static void _upb_Decoder_AddUnknownVarints(upb_Decoder* d, upb_Message* msg, - uint32_t val1, uint32_t val2) { - char buf[20]; - char* end = buf; - end = upb_Decoder_EncodeVarint32(val1, end); - end = upb_Decoder_EncodeVarint32(val2, end); + // Skip field if unset or empty. + if (upb_MiniTableField_HasPresence(field)) { + if (!upb_Message_HasFieldByDef(msg, f)) continue; + } else { + switch (upb_FieldMode_Get(field)) { + case kUpb_FieldMode_Map: + if (!val.map_val || upb_Map_Size(val.map_val) == 0) continue; + break; + case kUpb_FieldMode_Array: + if (!val.array_val || upb_Array_Size(val.array_val) == 0) continue; + break; + case kUpb_FieldMode_Scalar: + if (!_upb_MiniTable_ValueIsNonZero(&val, field)) continue; + break; + } + } - if (!_upb_Message_AddUnknown(msg, buf, end - buf, &d->arena)) { - _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); + *out_val = val; + *out_f = f; + *iter = i; + return true; } -} -UPB_NOINLINE -static bool _upb_Decoder_CheckEnumSlow(upb_Decoder* d, const char* ptr, - upb_Message* msg, - const upb_MiniTableEnum* e, - const upb_MiniTableField* field, - uint32_t v) { - if (_upb_MiniTable_CheckEnumValueSlow(e, v)) return true; + if (ext_pool) { + // Return any extensions that are set. + size_t count; + const upb_Message_Extension* ext = _upb_Message_Getexts(msg, &count); + if (i - n < count) { + ext += count - 1 - (i - n); + memcpy(out_val, &ext->data, sizeof(*out_val)); + *out_f = upb_DefPool_FindExtensionByMiniTable(ext_pool, ext->ext); + *iter = i; + return true; + } + } - // Unrecognized enum goes into unknown fields. - // For packed fields the tag could be arbitrarily far in the past, so we - // just re-encode the tag and value here. - uint32_t tag = ((uint32_t)field->number << 3) | kUpb_WireType_Varint; - upb_Message* unknown_msg = - field->mode & kUpb_LabelFlags_IsExtension ? d->unknown_msg : msg; - _upb_Decoder_AddUnknownVarints(d, unknown_msg, tag, v); + *iter = i; return false; } -UPB_FORCEINLINE -static bool _upb_Decoder_CheckEnum(upb_Decoder* d, const char* ptr, - upb_Message* msg, const upb_MiniTableEnum* e, - const upb_MiniTableField* field, - wireval* val) { - uint32_t v = val->uint32_val; +bool _upb_Message_DiscardUnknown(upb_Message* msg, const upb_MessageDef* m, + int depth) { + size_t iter = kUpb_Message_Begin; + const upb_FieldDef* f; + upb_MessageValue val; + bool ret = true; - _kUpb_FastEnumCheck_Status status = _upb_MiniTable_CheckEnumValueFast(e, v); - if (UPB_LIKELY(status == _kUpb_FastEnumCheck_ValueIsInEnum)) return true; - return _upb_Decoder_CheckEnumSlow(d, ptr, msg, e, field, v); -} + if (--depth == 0) return false; -UPB_NOINLINE -static const char* _upb_Decoder_DecodeEnumArray(upb_Decoder* d, const char* ptr, - upb_Message* msg, - upb_Array* arr, - const upb_MiniTableSub* subs, - const upb_MiniTableField* field, - wireval* val) { - const upb_MiniTableEnum* e = subs[field->UPB_PRIVATE(submsg_index)].subenum; - if (!_upb_Decoder_CheckEnum(d, ptr, msg, e, field, val)) return ptr; - void* mem = UPB_PTR_AT(_upb_array_ptr(arr), arr->size * 4, void); - arr->size++; - memcpy(mem, val, 4); - return ptr; -} + _upb_Message_DiscardUnknown_shallow(msg); -UPB_FORCEINLINE -static const char* _upb_Decoder_DecodeFixedPacked( - upb_Decoder* d, const char* ptr, upb_Array* arr, wireval* val, - const upb_MiniTableField* field, int lg2) { - int mask = (1 << lg2) - 1; - size_t count = val->size >> lg2; - if ((val->size & mask) != 0) { - // Length isn't a round multiple of elem size. - _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); - } - _upb_Decoder_Reserve(d, arr, count); - void* mem = UPB_PTR_AT(_upb_array_ptr(arr), arr->size << lg2, void); - arr->size += count; - // Note: if/when the decoder supports multi-buffer input, we will need to - // handle buffer seams here. - if (_upb_IsLittleEndian()) { - ptr = upb_EpsCopyInputStream_Copy(&d->input, ptr, mem, val->size); - } else { - int delta = upb_EpsCopyInputStream_PushLimit(&d->input, ptr, val->size); - char* dst = mem; - while (!_upb_Decoder_IsDone(d, &ptr)) { - if (lg2 == 2) { - ptr = upb_WireReader_ReadFixed32(ptr, dst); - dst += 4; - } else { - UPB_ASSERT(lg2 == 3); - ptr = upb_WireReader_ReadFixed64(ptr, dst); - dst += 8; + while (upb_Message_Next(msg, m, NULL /*ext_pool*/, &f, &val, &iter)) { + const upb_MessageDef* subm = upb_FieldDef_MessageSubDef(f); + if (!subm) continue; + if (upb_FieldDef_IsMap(f)) { + const upb_FieldDef* val_f = upb_MessageDef_FindFieldByNumber(subm, 2); + const upb_MessageDef* val_m = upb_FieldDef_MessageSubDef(val_f); + upb_Map* map = (upb_Map*)val.map_val; + size_t iter = kUpb_Map_Begin; + + if (!val_m) continue; + + upb_MessageValue map_key, map_val; + while (upb_Map_Next(map, &map_key, &map_val, &iter)) { + if (!_upb_Message_DiscardUnknown((upb_Message*)map_val.msg_val, val_m, + depth)) { + ret = false; + } + } + } else if (upb_FieldDef_IsRepeated(f)) { + const upb_Array* arr = val.array_val; + size_t i, n = upb_Array_Size(arr); + for (i = 0; i < n; i++) { + upb_MessageValue elem = upb_Array_Get(arr, i); + if (!_upb_Message_DiscardUnknown((upb_Message*)elem.msg_val, subm, + depth)) { + ret = false; + } + } + } else { + if (!_upb_Message_DiscardUnknown((upb_Message*)val.msg_val, subm, + depth)) { + ret = false; } } - upb_EpsCopyInputStream_PopLimit(&d->input, ptr, delta); } - return ptr; + return ret; } -UPB_FORCEINLINE -static const char* _upb_Decoder_DecodeVarintPacked( - upb_Decoder* d, const char* ptr, upb_Array* arr, wireval* val, - const upb_MiniTableField* field, int lg2) { - int scale = 1 << lg2; - int saved_limit = upb_EpsCopyInputStream_PushLimit(&d->input, ptr, val->size); - char* out = UPB_PTR_AT(_upb_array_ptr(arr), arr->size << lg2, void); - while (!_upb_Decoder_IsDone(d, &ptr)) { - wireval elem; - ptr = _upb_Decoder_DecodeVarint(d, ptr, &elem.uint64_val); - _upb_Decoder_Munge(field->UPB_PRIVATE(descriptortype), &elem); - if (_upb_Decoder_Reserve(d, arr, 1)) { - out = UPB_PTR_AT(_upb_array_ptr(arr), arr->size << lg2, void); - } - arr->size++; - memcpy(out, &elem, scale); - out += scale; - } - upb_EpsCopyInputStream_PopLimit(&d->input, ptr, saved_limit); - return ptr; +bool upb_Message_DiscardUnknown(upb_Message* msg, const upb_MessageDef* m, + int maxdepth) { + return _upb_Message_DiscardUnknown(msg, m, maxdepth); } -UPB_NOINLINE -static const char* _upb_Decoder_DecodeEnumPacked( - upb_Decoder* d, const char* ptr, upb_Message* msg, upb_Array* arr, - const upb_MiniTableSub* subs, const upb_MiniTableField* field, - wireval* val) { - const upb_MiniTableEnum* e = subs[field->UPB_PRIVATE(submsg_index)].subenum; - int saved_limit = upb_EpsCopyInputStream_PushLimit(&d->input, ptr, val->size); - char* out = UPB_PTR_AT(_upb_array_ptr(arr), arr->size * 4, void); - while (!_upb_Decoder_IsDone(d, &ptr)) { - wireval elem; - ptr = _upb_Decoder_DecodeVarint(d, ptr, &elem.uint64_val); - _upb_Decoder_MungeInt32(&elem); - if (!_upb_Decoder_CheckEnum(d, ptr, msg, e, field, &elem)) { - continue; - } - if (_upb_Decoder_Reserve(d, arr, 1)) { - out = UPB_PTR_AT(_upb_array_ptr(arr), arr->size * 4, void); - } - arr->size++; - memcpy(out, &elem, 4); - out += 4; - } - upb_EpsCopyInputStream_PopLimit(&d->input, ptr, saved_limit); - return ptr; -} -upb_Array* _upb_Decoder_CreateArray(upb_Decoder* d, - const upb_MiniTableField* field) { - /* Maps descriptor type -> elem_size_lg2. */ - static const uint8_t kElemSizeLg2[] = { - [0] = -1, // invalid descriptor type - [kUpb_FieldType_Double] = 3, - [kUpb_FieldType_Float] = 2, - [kUpb_FieldType_Int64] = 3, - [kUpb_FieldType_UInt64] = 3, - [kUpb_FieldType_Int32] = 2, - [kUpb_FieldType_Fixed64] = 3, - [kUpb_FieldType_Fixed32] = 2, - [kUpb_FieldType_Bool] = 0, - [kUpb_FieldType_String] = UPB_SIZE(3, 4), - [kUpb_FieldType_Group] = UPB_SIZE(2, 3), - [kUpb_FieldType_Message] = UPB_SIZE(2, 3), - [kUpb_FieldType_Bytes] = UPB_SIZE(3, 4), - [kUpb_FieldType_UInt32] = 2, - [kUpb_FieldType_Enum] = 2, - [kUpb_FieldType_SFixed32] = 2, - [kUpb_FieldType_SFixed64] = 3, - [kUpb_FieldType_SInt32] = 2, - [kUpb_FieldType_SInt64] = 3, - }; - size_t lg2 = kElemSizeLg2[field->UPB_PRIVATE(descriptortype)]; - upb_Array* ret = _upb_Array_New(&d->arena, 4, lg2); - if (!ret) _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); - return ret; -} +// Must be last. -static const char* _upb_Decoder_DecodeToArray(upb_Decoder* d, const char* ptr, - upb_Message* msg, - const upb_MiniTableSub* subs, - const upb_MiniTableField* field, - wireval* val, int op) { - upb_Array** arrp = UPB_PTR_AT(msg, field->offset, void); - upb_Array* arr = *arrp; - void* mem; +struct upb_MessageDef { + const UPB_DESC(MessageOptions) * opts; + const upb_MiniTable* layout; + const upb_FileDef* file; + const upb_MessageDef* containing_type; + const char* full_name; - if (arr) { - _upb_Decoder_Reserve(d, arr, 1); + // Tables for looking up fields by number and name. + upb_inttable itof; + upb_strtable ntof; + + /* All nested defs. + * MEM: We could save some space here by putting nested defs in a contiguous + * region and calculating counts from offsets or vice-versa. */ + const upb_FieldDef* fields; + const upb_OneofDef* oneofs; + const upb_ExtensionRange* ext_ranges; + const upb_StringView* res_names; + const upb_MessageDef* nested_msgs; + const upb_MessageReservedRange* res_ranges; + const upb_EnumDef* nested_enums; + const upb_FieldDef* nested_exts; + + // TODO(salo): These counters don't need anywhere near 32 bits. + int field_count; + int real_oneof_count; + int oneof_count; + int ext_range_count; + int res_range_count; + int res_name_count; + int nested_msg_count; + int nested_enum_count; + int nested_ext_count; + bool in_message_set; + bool is_sorted; + upb_WellKnown well_known_type; +#if UINTPTR_MAX == 0xffffffff + uint32_t padding; // Increase size to a multiple of 8. +#endif +}; + +static void assign_msg_wellknowntype(upb_MessageDef* m) { + const char* name = m->full_name; + if (name == NULL) { + m->well_known_type = kUpb_WellKnown_Unspecified; + return; + } + if (!strcmp(name, "google.protobuf.Any")) { + m->well_known_type = kUpb_WellKnown_Any; + } else if (!strcmp(name, "google.protobuf.FieldMask")) { + m->well_known_type = kUpb_WellKnown_FieldMask; + } else if (!strcmp(name, "google.protobuf.Duration")) { + m->well_known_type = kUpb_WellKnown_Duration; + } else if (!strcmp(name, "google.protobuf.Timestamp")) { + m->well_known_type = kUpb_WellKnown_Timestamp; + } else if (!strcmp(name, "google.protobuf.DoubleValue")) { + m->well_known_type = kUpb_WellKnown_DoubleValue; + } else if (!strcmp(name, "google.protobuf.FloatValue")) { + m->well_known_type = kUpb_WellKnown_FloatValue; + } else if (!strcmp(name, "google.protobuf.Int64Value")) { + m->well_known_type = kUpb_WellKnown_Int64Value; + } else if (!strcmp(name, "google.protobuf.UInt64Value")) { + m->well_known_type = kUpb_WellKnown_UInt64Value; + } else if (!strcmp(name, "google.protobuf.Int32Value")) { + m->well_known_type = kUpb_WellKnown_Int32Value; + } else if (!strcmp(name, "google.protobuf.UInt32Value")) { + m->well_known_type = kUpb_WellKnown_UInt32Value; + } else if (!strcmp(name, "google.protobuf.BoolValue")) { + m->well_known_type = kUpb_WellKnown_BoolValue; + } else if (!strcmp(name, "google.protobuf.StringValue")) { + m->well_known_type = kUpb_WellKnown_StringValue; + } else if (!strcmp(name, "google.protobuf.BytesValue")) { + m->well_known_type = kUpb_WellKnown_BytesValue; + } else if (!strcmp(name, "google.protobuf.Value")) { + m->well_known_type = kUpb_WellKnown_Value; + } else if (!strcmp(name, "google.protobuf.ListValue")) { + m->well_known_type = kUpb_WellKnown_ListValue; + } else if (!strcmp(name, "google.protobuf.Struct")) { + m->well_known_type = kUpb_WellKnown_Struct; } else { - arr = _upb_Decoder_CreateArray(d, field); - *arrp = arr; + m->well_known_type = kUpb_WellKnown_Unspecified; } +} - switch (op) { - case kUpb_DecodeOp_Scalar1Byte: - case kUpb_DecodeOp_Scalar4Byte: - case kUpb_DecodeOp_Scalar8Byte: - /* Append scalar value. */ - mem = UPB_PTR_AT(_upb_array_ptr(arr), arr->size << op, void); - arr->size++; - memcpy(mem, val, 1 << op); - return ptr; - case kUpb_DecodeOp_String: - _upb_Decoder_VerifyUtf8(d, ptr, val->size); - /* Fallthrough. */ - case kUpb_DecodeOp_Bytes: { - /* Append bytes. */ - upb_StringView* str = (upb_StringView*)_upb_array_ptr(arr) + arr->size; - arr->size++; - return _upb_Decoder_ReadString(d, ptr, val->size, str); - } - case kUpb_DecodeOp_SubMessage: { - /* Append submessage / group. */ - upb_TaggedMessagePtr* target = UPB_PTR_AT( - _upb_array_ptr(arr), arr->size * sizeof(void*), upb_TaggedMessagePtr); - upb_Message* submsg = _upb_Decoder_NewSubMessage(d, subs, field, target); - arr->size++; - if (UPB_UNLIKELY(field->UPB_PRIVATE(descriptortype) == - kUpb_FieldType_Group)) { - return _upb_Decoder_DecodeKnownGroup(d, ptr, submsg, subs, field); - } else { - return _upb_Decoder_DecodeSubMessage(d, ptr, submsg, subs, field, - val->size); - } +upb_MessageDef* _upb_MessageDef_At(const upb_MessageDef* m, int i) { + return (upb_MessageDef*)&m[i]; +} + +bool _upb_MessageDef_IsValidExtensionNumber(const upb_MessageDef* m, int n) { + for (int i = 0; i < m->ext_range_count; i++) { + const upb_ExtensionRange* r = upb_MessageDef_ExtensionRange(m, i); + if (upb_ExtensionRange_Start(r) <= n && n < upb_ExtensionRange_End(r)) { + return true; } - case OP_FIXPCK_LG2(2): - case OP_FIXPCK_LG2(3): - return _upb_Decoder_DecodeFixedPacked(d, ptr, arr, val, field, - op - OP_FIXPCK_LG2(0)); - case OP_VARPCK_LG2(0): - case OP_VARPCK_LG2(2): - case OP_VARPCK_LG2(3): - return _upb_Decoder_DecodeVarintPacked(d, ptr, arr, val, field, - op - OP_VARPCK_LG2(0)); - case kUpb_DecodeOp_Enum: - return _upb_Decoder_DecodeEnumArray(d, ptr, msg, arr, subs, field, val); - case kUpb_DecodeOp_PackedEnum: - return _upb_Decoder_DecodeEnumPacked(d, ptr, msg, arr, subs, field, val); - default: - UPB_UNREACHABLE(); } + return false; } -upb_Map* _upb_Decoder_CreateMap(upb_Decoder* d, const upb_MiniTable* entry) { - /* Maps descriptor type -> upb map size. */ - static const uint8_t kSizeInMap[] = { - [0] = -1, // invalid descriptor type */ - [kUpb_FieldType_Double] = 8, - [kUpb_FieldType_Float] = 4, - [kUpb_FieldType_Int64] = 8, - [kUpb_FieldType_UInt64] = 8, - [kUpb_FieldType_Int32] = 4, - [kUpb_FieldType_Fixed64] = 8, - [kUpb_FieldType_Fixed32] = 4, - [kUpb_FieldType_Bool] = 1, - [kUpb_FieldType_String] = UPB_MAPTYPE_STRING, - [kUpb_FieldType_Group] = sizeof(void*), - [kUpb_FieldType_Message] = sizeof(void*), - [kUpb_FieldType_Bytes] = UPB_MAPTYPE_STRING, - [kUpb_FieldType_UInt32] = 4, - [kUpb_FieldType_Enum] = 4, - [kUpb_FieldType_SFixed32] = 4, - [kUpb_FieldType_SFixed64] = 8, - [kUpb_FieldType_SInt32] = 4, - [kUpb_FieldType_SInt64] = 8, - }; +const UPB_DESC(MessageOptions) * + upb_MessageDef_Options(const upb_MessageDef* m) { + return m->opts; +} - const upb_MiniTableField* key_field = &entry->fields[0]; - const upb_MiniTableField* val_field = &entry->fields[1]; - char key_size = kSizeInMap[key_field->UPB_PRIVATE(descriptortype)]; - char val_size = kSizeInMap[val_field->UPB_PRIVATE(descriptortype)]; - UPB_ASSERT(key_field->offset == offsetof(upb_MapEntryData, k)); - UPB_ASSERT(val_field->offset == offsetof(upb_MapEntryData, v)); - upb_Map* ret = _upb_Map_New(&d->arena, key_size, val_size); - if (!ret) _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); - return ret; +bool upb_MessageDef_HasOptions(const upb_MessageDef* m) { + return m->opts != (void*)kUpbDefOptDefault; } -static const char* _upb_Decoder_DecodeToMap(upb_Decoder* d, const char* ptr, - upb_Message* msg, - const upb_MiniTableSub* subs, - const upb_MiniTableField* field, - wireval* val) { - upb_Map** map_p = UPB_PTR_AT(msg, field->offset, upb_Map*); - upb_Map* map = *map_p; - upb_MapEntry ent; - UPB_ASSERT(upb_MiniTableField_Type(field) == kUpb_FieldType_Message); - const upb_MiniTable* entry = subs[field->UPB_PRIVATE(submsg_index)].submsg; +const char* upb_MessageDef_FullName(const upb_MessageDef* m) { + return m->full_name; +} - UPB_ASSERT(entry); - UPB_ASSERT(entry->field_count == 2); - UPB_ASSERT(!upb_IsRepeatedOrMap(&entry->fields[0])); - UPB_ASSERT(!upb_IsRepeatedOrMap(&entry->fields[1])); +const upb_FileDef* upb_MessageDef_File(const upb_MessageDef* m) { + return m->file; +} - if (!map) { - map = _upb_Decoder_CreateMap(d, entry); - *map_p = map; - } +const upb_MessageDef* upb_MessageDef_ContainingType(const upb_MessageDef* m) { + return m->containing_type; +} - // Parse map entry. - memset(&ent, 0, sizeof(ent)); +const char* upb_MessageDef_Name(const upb_MessageDef* m) { + return _upb_DefBuilder_FullToShort(m->full_name); +} - if (entry->fields[1].UPB_PRIVATE(descriptortype) == kUpb_FieldType_Message || - entry->fields[1].UPB_PRIVATE(descriptortype) == kUpb_FieldType_Group) { - // Create proactively to handle the case where it doesn't appear. - upb_TaggedMessagePtr msg; - _upb_Decoder_NewSubMessage(d, entry->subs, &entry->fields[1], &msg); - ent.data.v.val = upb_value_uintptr(msg); - } +upb_Syntax upb_MessageDef_Syntax(const upb_MessageDef* m) { + return upb_FileDef_Syntax(m->file); +} - ptr = - _upb_Decoder_DecodeSubMessage(d, ptr, &ent.data, subs, field, val->size); - // check if ent had any unknown fields - size_t size; - upb_Message_GetUnknown(&ent.data, &size); - if (size != 0) { - char* buf; - size_t size; - uint32_t tag = ((uint32_t)field->number << 3) | kUpb_WireType_Delimited; - upb_EncodeStatus status = - upb_Encode(&ent.data, entry, 0, &d->arena, &buf, &size); - if (status != kUpb_EncodeStatus_Ok) { - _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); - } - _upb_Decoder_AddUnknownVarints(d, msg, tag, size); - if (!_upb_Message_AddUnknown(msg, buf, size, &d->arena)) { - _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); - } - } else { - if (_upb_Map_Insert(map, &ent.data.k, map->key_size, &ent.data.v, - map->val_size, - &d->arena) == kUpb_MapInsertStatus_OutOfMemory) { - _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); - } - } - return ptr; +const upb_FieldDef* upb_MessageDef_FindFieldByNumber(const upb_MessageDef* m, + uint32_t i) { + upb_value val; + return upb_inttable_lookup(&m->itof, i, &val) ? upb_value_getconstptr(val) + : NULL; } -static const char* _upb_Decoder_DecodeToSubMessage( - upb_Decoder* d, const char* ptr, upb_Message* msg, - const upb_MiniTableSub* subs, const upb_MiniTableField* field, wireval* val, - int op) { - void* mem = UPB_PTR_AT(msg, field->offset, void); - int type = field->UPB_PRIVATE(descriptortype); +const upb_FieldDef* upb_MessageDef_FindFieldByNameWithSize( + const upb_MessageDef* m, const char* name, size_t size) { + upb_value val; - if (UPB_UNLIKELY(op == kUpb_DecodeOp_Enum) && - !_upb_Decoder_CheckEnum(d, ptr, msg, - subs[field->UPB_PRIVATE(submsg_index)].subenum, - field, val)) { - return ptr; + if (!upb_strtable_lookup2(&m->ntof, name, size, &val)) { + return NULL; } - /* Set presence if necessary. */ - if (field->presence > 0) { - _upb_sethas_field(msg, field); - } else if (field->presence < 0) { - /* Oneof case */ - uint32_t* oneof_case = _upb_oneofcase_field(msg, field); - if (op == kUpb_DecodeOp_SubMessage && *oneof_case != field->number) { - memset(mem, 0, sizeof(void*)); - } - *oneof_case = field->number; - } + return _upb_DefType_Unpack(val, UPB_DEFTYPE_FIELD); +} - /* Store into message. */ - switch (op) { - case kUpb_DecodeOp_SubMessage: { - upb_TaggedMessagePtr* submsgp = mem; - upb_Message* submsg; - if (*submsgp) { - submsg = _upb_Decoder_ReuseSubMessage(d, subs, field, submsgp); - } else { - submsg = _upb_Decoder_NewSubMessage(d, subs, field, submsgp); - } - if (UPB_UNLIKELY(type == kUpb_FieldType_Group)) { - ptr = _upb_Decoder_DecodeKnownGroup(d, ptr, submsg, subs, field); - } else { - ptr = _upb_Decoder_DecodeSubMessage(d, ptr, submsg, subs, field, - val->size); - } - break; - } - case kUpb_DecodeOp_String: - _upb_Decoder_VerifyUtf8(d, ptr, val->size); - /* Fallthrough. */ - case kUpb_DecodeOp_Bytes: - return _upb_Decoder_ReadString(d, ptr, val->size, mem); - case kUpb_DecodeOp_Scalar8Byte: - memcpy(mem, val, 8); - break; - case kUpb_DecodeOp_Enum: - case kUpb_DecodeOp_Scalar4Byte: - memcpy(mem, val, 4); - break; - case kUpb_DecodeOp_Scalar1Byte: - memcpy(mem, val, 1); - break; - default: - UPB_UNREACHABLE(); +const upb_OneofDef* upb_MessageDef_FindOneofByNameWithSize( + const upb_MessageDef* m, const char* name, size_t size) { + upb_value val; + + if (!upb_strtable_lookup2(&m->ntof, name, size, &val)) { + return NULL; } - return ptr; + return _upb_DefType_Unpack(val, UPB_DEFTYPE_ONEOF); } -UPB_NOINLINE -const char* _upb_Decoder_CheckRequired(upb_Decoder* d, const char* ptr, - const upb_Message* msg, - const upb_MiniTable* l) { - assert(l->required_count); - if (UPB_LIKELY((d->options & kUpb_DecodeOption_CheckRequired) == 0)) { - return ptr; - } - uint64_t msg_head; - memcpy(&msg_head, msg, 8); - msg_head = _upb_BigEndian_Swap64(msg_head); - if (upb_MiniTable_requiredmask(l) & ~msg_head) { - d->missing_required = true; - } - return ptr; +bool _upb_MessageDef_Insert(upb_MessageDef* m, const char* name, size_t len, + upb_value v, upb_Arena* a) { + return upb_strtable_insert(&m->ntof, name, len, v, a); } -UPB_FORCEINLINE -static bool _upb_Decoder_TryFastDispatch(upb_Decoder* d, const char** ptr, - upb_Message* msg, - const upb_MiniTable* layout) { -#if UPB_FASTTABLE - if (layout && layout->table_mask != (unsigned char)-1) { - uint16_t tag = _upb_FastDecoder_LoadTag(*ptr); - intptr_t table = decode_totable(layout); - *ptr = _upb_FastDecoder_TagDispatch(d, *ptr, msg, table, 0, tag); - return true; +bool upb_MessageDef_FindByNameWithSize(const upb_MessageDef* m, + const char* name, size_t len, + const upb_FieldDef** out_f, + const upb_OneofDef** out_o) { + upb_value val; + + if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) { + return false; } -#endif - return false; + + const upb_FieldDef* f = _upb_DefType_Unpack(val, UPB_DEFTYPE_FIELD); + const upb_OneofDef* o = _upb_DefType_Unpack(val, UPB_DEFTYPE_ONEOF); + if (out_f) *out_f = f; + if (out_o) *out_o = o; + return f || o; /* False if this was a JSON name. */ } -static const char* upb_Decoder_SkipField(upb_Decoder* d, const char* ptr, - uint32_t tag) { - int field_number = tag >> 3; - int wire_type = tag & 7; - switch (wire_type) { - case kUpb_WireType_Varint: { - uint64_t val; - return _upb_Decoder_DecodeVarint(d, ptr, &val); - } - case kUpb_WireType_64Bit: - return ptr + 8; - case kUpb_WireType_32Bit: - return ptr + 4; - case kUpb_WireType_Delimited: { - uint32_t size; - ptr = upb_Decoder_DecodeSize(d, ptr, &size); - return ptr + size; - } - case kUpb_WireType_StartGroup: - return _upb_Decoder_DecodeUnknownGroup(d, ptr, field_number); - default: - _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); +const upb_FieldDef* upb_MessageDef_FindByJsonNameWithSize( + const upb_MessageDef* m, const char* name, size_t size) { + upb_value val; + const upb_FieldDef* f; + + if (!upb_strtable_lookup2(&m->ntof, name, size, &val)) { + return NULL; } + + f = _upb_DefType_Unpack(val, UPB_DEFTYPE_FIELD); + if (!f) f = _upb_DefType_Unpack(val, UPB_DEFTYPE_FIELD_JSONNAME); + + return f; } -enum { - kStartItemTag = ((kUpb_MsgSet_Item << 3) | kUpb_WireType_StartGroup), - kEndItemTag = ((kUpb_MsgSet_Item << 3) | kUpb_WireType_EndGroup), - kTypeIdTag = ((kUpb_MsgSet_TypeId << 3) | kUpb_WireType_Varint), - kMessageTag = ((kUpb_MsgSet_Message << 3) | kUpb_WireType_Delimited), -}; +int upb_MessageDef_ExtensionRangeCount(const upb_MessageDef* m) { + return m->ext_range_count; +} -static void upb_Decoder_AddKnownMessageSetItem( - upb_Decoder* d, upb_Message* msg, const upb_MiniTableExtension* item_mt, - const char* data, uint32_t size) { - upb_Message_Extension* ext = - _upb_Message_GetOrCreateExtension(msg, item_mt, &d->arena); - if (UPB_UNLIKELY(!ext)) { - _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); - } - upb_Message* submsg = _upb_Decoder_NewSubMessage( - d, &ext->ext->sub, &ext->ext->field, (upb_TaggedMessagePtr*)&ext->data); - upb_DecodeStatus status = upb_Decode(data, size, submsg, item_mt->sub.submsg, - d->extreg, d->options, &d->arena); - if (status != kUpb_DecodeStatus_Ok) _upb_Decoder_ErrorJmp(d, status); +int upb_MessageDef_ReservedRangeCount(const upb_MessageDef* m) { + return m->res_range_count; } -static void upb_Decoder_AddUnknownMessageSetItem(upb_Decoder* d, - upb_Message* msg, - uint32_t type_id, - const char* message_data, - uint32_t message_size) { - char buf[60]; - char* ptr = buf; - ptr = upb_Decoder_EncodeVarint32(kStartItemTag, ptr); - ptr = upb_Decoder_EncodeVarint32(kTypeIdTag, ptr); - ptr = upb_Decoder_EncodeVarint32(type_id, ptr); - ptr = upb_Decoder_EncodeVarint32(kMessageTag, ptr); - ptr = upb_Decoder_EncodeVarint32(message_size, ptr); - char* split = ptr; +int upb_MessageDef_ReservedNameCount(const upb_MessageDef* m) { + return m->res_name_count; +} - ptr = upb_Decoder_EncodeVarint32(kEndItemTag, ptr); - char* end = ptr; +int upb_MessageDef_FieldCount(const upb_MessageDef* m) { + return m->field_count; +} - if (!_upb_Message_AddUnknown(msg, buf, split - buf, &d->arena) || - !_upb_Message_AddUnknown(msg, message_data, message_size, &d->arena) || - !_upb_Message_AddUnknown(msg, split, end - split, &d->arena)) { - _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); - } +int upb_MessageDef_OneofCount(const upb_MessageDef* m) { + return m->oneof_count; } -static void upb_Decoder_AddMessageSetItem(upb_Decoder* d, upb_Message* msg, - const upb_MiniTable* t, - uint32_t type_id, const char* data, - uint32_t size) { - const upb_MiniTableExtension* item_mt = - upb_ExtensionRegistry_Lookup(d->extreg, t, type_id); - if (item_mt) { - upb_Decoder_AddKnownMessageSetItem(d, msg, item_mt, data, size); - } else { - upb_Decoder_AddUnknownMessageSetItem(d, msg, type_id, data, size); - } +int upb_MessageDef_RealOneofCount(const upb_MessageDef* m) { + return m->real_oneof_count; } -static const char* upb_Decoder_DecodeMessageSetItem( - upb_Decoder* d, const char* ptr, upb_Message* msg, - const upb_MiniTable* layout) { - uint32_t type_id = 0; - upb_StringView preserved = {NULL, 0}; - typedef enum { - kUpb_HaveId = 1 << 0, - kUpb_HavePayload = 1 << 1, - } StateMask; - StateMask state_mask = 0; - while (!_upb_Decoder_IsDone(d, &ptr)) { - uint32_t tag; - ptr = _upb_Decoder_DecodeTag(d, ptr, &tag); - switch (tag) { - case kEndItemTag: - return ptr; - case kTypeIdTag: { - uint64_t tmp; - ptr = _upb_Decoder_DecodeVarint(d, ptr, &tmp); - if (state_mask & kUpb_HaveId) break; // Ignore dup. - state_mask |= kUpb_HaveId; - type_id = tmp; - if (state_mask & kUpb_HavePayload) { - upb_Decoder_AddMessageSetItem(d, msg, layout, type_id, preserved.data, - preserved.size); - } - break; - } - case kMessageTag: { - uint32_t size; - ptr = upb_Decoder_DecodeSize(d, ptr, &size); - const char* data = ptr; - ptr += size; - if (state_mask & kUpb_HavePayload) break; // Ignore dup. - state_mask |= kUpb_HavePayload; - if (state_mask & kUpb_HaveId) { - upb_Decoder_AddMessageSetItem(d, msg, layout, type_id, data, size); - } else { - // Out of order, we must preserve the payload. - preserved.data = data; - preserved.size = size; - } - break; - } - default: - // We do not preserve unexpected fields inside a message set item. - ptr = upb_Decoder_SkipField(d, ptr, tag); - break; - } - } - _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); +int upb_MessageDef_NestedMessageCount(const upb_MessageDef* m) { + return m->nested_msg_count; } -static const upb_MiniTableField* _upb_Decoder_FindField(upb_Decoder* d, - const upb_MiniTable* t, - uint32_t field_number, - int* last_field_index) { - static upb_MiniTableField none = { - 0, 0, 0, 0, kUpb_FakeFieldType_FieldNotFound, 0}; - if (t == NULL) return &none; +int upb_MessageDef_NestedEnumCount(const upb_MessageDef* m) { + return m->nested_enum_count; +} - size_t idx = ((size_t)field_number) - 1; // 0 wraps to SIZE_MAX - if (idx < t->dense_below) { - /* Fastest case: index into dense fields. */ - goto found; - } +int upb_MessageDef_NestedExtensionCount(const upb_MessageDef* m) { + return m->nested_ext_count; +} - if (t->dense_below < t->field_count) { - /* Linear search non-dense fields. Resume scanning from last_field_index - * since fields are usually in order. */ - size_t last = *last_field_index; - for (idx = last; idx < t->field_count; idx++) { - if (t->fields[idx].number == field_number) { - goto found; - } - } +const upb_MiniTable* upb_MessageDef_MiniTable(const upb_MessageDef* m) { + return m->layout; +} - for (idx = t->dense_below; idx < last; idx++) { - if (t->fields[idx].number == field_number) { - goto found; - } - } - } +const upb_ExtensionRange* upb_MessageDef_ExtensionRange(const upb_MessageDef* m, + int i) { + UPB_ASSERT(0 <= i && i < m->ext_range_count); + return _upb_ExtensionRange_At(m->ext_ranges, i); +} - if (d->extreg) { - switch (t->ext) { - case kUpb_ExtMode_Extendable: { - const upb_MiniTableExtension* ext = - upb_ExtensionRegistry_Lookup(d->extreg, t, field_number); - if (ext) return &ext->field; - break; - } - case kUpb_ExtMode_IsMessageSet: - if (field_number == kUpb_MsgSet_Item) { - static upb_MiniTableField item = { - 0, 0, 0, 0, kUpb_FakeFieldType_MessageSetItem, 0}; - return &item; - } - break; - } - } +const upb_MessageReservedRange* upb_MessageDef_ReservedRange( + const upb_MessageDef* m, int i) { + UPB_ASSERT(0 <= i && i < m->res_range_count); + return _upb_MessageReservedRange_At(m->res_ranges, i); +} - return &none; /* Unknown field. */ +upb_StringView upb_MessageDef_ReservedName(const upb_MessageDef* m, int i) { + UPB_ASSERT(0 <= i && i < m->res_name_count); + return m->res_names[i]; +} -found: - UPB_ASSERT(t->fields[idx].number == field_number); - *last_field_index = idx; - return &t->fields[idx]; +const upb_FieldDef* upb_MessageDef_Field(const upb_MessageDef* m, int i) { + UPB_ASSERT(0 <= i && i < m->field_count); + return _upb_FieldDef_At(m->fields, i); } -int _upb_Decoder_GetVarintOp(const upb_MiniTableField* field) { - static const int8_t kVarintOps[] = { - [kUpb_FakeFieldType_FieldNotFound] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_Double] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_Float] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_Int64] = kUpb_DecodeOp_Scalar8Byte, - [kUpb_FieldType_UInt64] = kUpb_DecodeOp_Scalar8Byte, - [kUpb_FieldType_Int32] = kUpb_DecodeOp_Scalar4Byte, - [kUpb_FieldType_Fixed64] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_Fixed32] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_Bool] = kUpb_DecodeOp_Scalar1Byte, - [kUpb_FieldType_String] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_Group] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_Message] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_Bytes] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_UInt32] = kUpb_DecodeOp_Scalar4Byte, - [kUpb_FieldType_Enum] = kUpb_DecodeOp_Enum, - [kUpb_FieldType_SFixed32] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_SFixed64] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_SInt32] = kUpb_DecodeOp_Scalar4Byte, - [kUpb_FieldType_SInt64] = kUpb_DecodeOp_Scalar8Byte, - [kUpb_FakeFieldType_MessageSetItem] = kUpb_DecodeOp_UnknownField, - }; +const upb_OneofDef* upb_MessageDef_Oneof(const upb_MessageDef* m, int i) { + UPB_ASSERT(0 <= i && i < m->oneof_count); + return _upb_OneofDef_At(m->oneofs, i); +} - return kVarintOps[field->UPB_PRIVATE(descriptortype)]; +const upb_MessageDef* upb_MessageDef_NestedMessage(const upb_MessageDef* m, + int i) { + UPB_ASSERT(0 <= i && i < m->nested_msg_count); + return &m->nested_msgs[i]; } -UPB_FORCEINLINE -static void _upb_Decoder_CheckUnlinked(upb_Decoder* d, const upb_MiniTable* mt, - const upb_MiniTableField* field, - int* op) { - // If sub-message is not linked, treat as unknown. - if (field->mode & kUpb_LabelFlags_IsExtension) return; - const upb_MiniTableSub* sub = &mt->subs[field->UPB_PRIVATE(submsg_index)]; - if ((d->options & kUpb_DecodeOption_ExperimentalAllowUnlinked) || - sub->submsg != &_kUpb_MiniTable_Empty) { - return; - } -#ifndef NDEBUG - const upb_MiniTableField* oneof = upb_MiniTable_GetOneof(mt, field); - if (oneof) { - // All other members of the oneof must be message fields that are also - // unlinked. - do { - assert(upb_MiniTableField_CType(oneof) == kUpb_CType_Message); - const upb_MiniTableSub* oneof_sub = - &mt->subs[oneof->UPB_PRIVATE(submsg_index)]; - assert(!oneof_sub); - } while (upb_MiniTable_NextOneofField(mt, &oneof)); - } -#endif // NDEBUG - *op = kUpb_DecodeOp_UnknownField; +const upb_EnumDef* upb_MessageDef_NestedEnum(const upb_MessageDef* m, int i) { + UPB_ASSERT(0 <= i && i < m->nested_enum_count); + return _upb_EnumDef_At(m->nested_enums, i); } -int _upb_Decoder_GetDelimitedOp(upb_Decoder* d, const upb_MiniTable* mt, - const upb_MiniTableField* field) { - enum { kRepeatedBase = 19 }; +const upb_FieldDef* upb_MessageDef_NestedExtension(const upb_MessageDef* m, + int i) { + UPB_ASSERT(0 <= i && i < m->nested_ext_count); + return _upb_FieldDef_At(m->nested_exts, i); +} - static const int8_t kDelimitedOps[] = { - /* For non-repeated field type. */ - [kUpb_FakeFieldType_FieldNotFound] = - kUpb_DecodeOp_UnknownField, // Field not found. - [kUpb_FieldType_Double] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_Float] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_Int64] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_UInt64] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_Int32] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_Fixed64] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_Fixed32] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_Bool] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_String] = kUpb_DecodeOp_String, - [kUpb_FieldType_Group] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_Message] = kUpb_DecodeOp_SubMessage, - [kUpb_FieldType_Bytes] = kUpb_DecodeOp_Bytes, - [kUpb_FieldType_UInt32] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_Enum] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_SFixed32] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_SFixed64] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_SInt32] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_SInt64] = kUpb_DecodeOp_UnknownField, - [kUpb_FakeFieldType_MessageSetItem] = kUpb_DecodeOp_UnknownField, - // For repeated field type. */ - [kRepeatedBase + kUpb_FieldType_Double] = OP_FIXPCK_LG2(3), - [kRepeatedBase + kUpb_FieldType_Float] = OP_FIXPCK_LG2(2), - [kRepeatedBase + kUpb_FieldType_Int64] = OP_VARPCK_LG2(3), - [kRepeatedBase + kUpb_FieldType_UInt64] = OP_VARPCK_LG2(3), - [kRepeatedBase + kUpb_FieldType_Int32] = OP_VARPCK_LG2(2), - [kRepeatedBase + kUpb_FieldType_Fixed64] = OP_FIXPCK_LG2(3), - [kRepeatedBase + kUpb_FieldType_Fixed32] = OP_FIXPCK_LG2(2), - [kRepeatedBase + kUpb_FieldType_Bool] = OP_VARPCK_LG2(0), - [kRepeatedBase + kUpb_FieldType_String] = kUpb_DecodeOp_String, - [kRepeatedBase + kUpb_FieldType_Group] = kUpb_DecodeOp_SubMessage, - [kRepeatedBase + kUpb_FieldType_Message] = kUpb_DecodeOp_SubMessage, - [kRepeatedBase + kUpb_FieldType_Bytes] = kUpb_DecodeOp_Bytes, - [kRepeatedBase + kUpb_FieldType_UInt32] = OP_VARPCK_LG2(2), - [kRepeatedBase + kUpb_FieldType_Enum] = kUpb_DecodeOp_PackedEnum, - [kRepeatedBase + kUpb_FieldType_SFixed32] = OP_FIXPCK_LG2(2), - [kRepeatedBase + kUpb_FieldType_SFixed64] = OP_FIXPCK_LG2(3), - [kRepeatedBase + kUpb_FieldType_SInt32] = OP_VARPCK_LG2(2), - [kRepeatedBase + kUpb_FieldType_SInt64] = OP_VARPCK_LG2(3), - // Omitting kUpb_FakeFieldType_MessageSetItem, because we never emit a - // repeated msgset type - }; +upb_WellKnown upb_MessageDef_WellKnownType(const upb_MessageDef* m) { + return m->well_known_type; +} - int ndx = field->UPB_PRIVATE(descriptortype); - if (upb_FieldMode_Get(field) == kUpb_FieldMode_Array) ndx += kRepeatedBase; - int op = kDelimitedOps[ndx]; +bool _upb_MessageDef_InMessageSet(const upb_MessageDef* m) { + return m->in_message_set; +} - if (op == kUpb_DecodeOp_SubMessage) { - _upb_Decoder_CheckUnlinked(d, mt, field, &op); - } +const upb_FieldDef* upb_MessageDef_FindFieldByName(const upb_MessageDef* m, + const char* name) { + return upb_MessageDef_FindFieldByNameWithSize(m, name, strlen(name)); +} - return op; +const upb_OneofDef* upb_MessageDef_FindOneofByName(const upb_MessageDef* m, + const char* name) { + return upb_MessageDef_FindOneofByNameWithSize(m, name, strlen(name)); } -UPB_FORCEINLINE -static const char* _upb_Decoder_DecodeWireValue(upb_Decoder* d, const char* ptr, - const upb_MiniTable* mt, - const upb_MiniTableField* field, - int wire_type, wireval* val, - int* op) { - static const unsigned kFixed32OkMask = (1 << kUpb_FieldType_Float) | - (1 << kUpb_FieldType_Fixed32) | - (1 << kUpb_FieldType_SFixed32); +bool upb_MessageDef_IsMapEntry(const upb_MessageDef* m) { + return UPB_DESC(MessageOptions_map_entry)(m->opts); +} - static const unsigned kFixed64OkMask = (1 << kUpb_FieldType_Double) | - (1 << kUpb_FieldType_Fixed64) | - (1 << kUpb_FieldType_SFixed64); +bool upb_MessageDef_IsMessageSet(const upb_MessageDef* m) { + return UPB_DESC(MessageOptions_message_set_wire_format)(m->opts); +} - switch (wire_type) { - case kUpb_WireType_Varint: - ptr = _upb_Decoder_DecodeVarint(d, ptr, &val->uint64_val); - *op = _upb_Decoder_GetVarintOp(field); - _upb_Decoder_Munge(field->UPB_PRIVATE(descriptortype), val); - return ptr; - case kUpb_WireType_32Bit: - *op = kUpb_DecodeOp_Scalar4Byte; - if (((1 << field->UPB_PRIVATE(descriptortype)) & kFixed32OkMask) == 0) { - *op = kUpb_DecodeOp_UnknownField; - } - return upb_WireReader_ReadFixed32(ptr, &val->uint32_val); - case kUpb_WireType_64Bit: - *op = kUpb_DecodeOp_Scalar8Byte; - if (((1 << field->UPB_PRIVATE(descriptortype)) & kFixed64OkMask) == 0) { - *op = kUpb_DecodeOp_UnknownField; - } - return upb_WireReader_ReadFixed64(ptr, &val->uint64_val); - case kUpb_WireType_Delimited: - ptr = upb_Decoder_DecodeSize(d, ptr, &val->size); - *op = _upb_Decoder_GetDelimitedOp(d, mt, field); - return ptr; - case kUpb_WireType_StartGroup: - val->uint32_val = field->number; - if (field->UPB_PRIVATE(descriptortype) == kUpb_FieldType_Group) { - *op = kUpb_DecodeOp_SubMessage; - _upb_Decoder_CheckUnlinked(d, mt, field, op); - } else if (field->UPB_PRIVATE(descriptortype) == - kUpb_FakeFieldType_MessageSetItem) { - *op = kUpb_DecodeOp_MessageSetItem; - } else { - *op = kUpb_DecodeOp_UnknownField; - } - return ptr; - default: - break; - } - _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); +static upb_MiniTable* _upb_MessageDef_MakeMiniTable(upb_DefBuilder* ctx, + const upb_MessageDef* m) { + upb_StringView desc; + // Note: this will assign layout_index for fields, so upb_FieldDef_MiniTable() + // is safe to call only after this call. + bool ok = upb_MessageDef_MiniDescriptorEncode(m, ctx->tmp_arena, &desc); + if (!ok) _upb_DefBuilder_OomErr(ctx); + + void** scratch_data = _upb_DefPool_ScratchData(ctx->symtab); + size_t* scratch_size = _upb_DefPool_ScratchSize(ctx->symtab); + upb_MiniTable* ret = upb_MiniTable_BuildWithBuf( + desc.data, desc.size, ctx->platform, ctx->arena, scratch_data, + scratch_size, ctx->status); + if (!ret) _upb_DefBuilder_FailJmp(ctx); + + return ret; } -UPB_FORCEINLINE -static const char* _upb_Decoder_DecodeKnownField( - upb_Decoder* d, const char* ptr, upb_Message* msg, - const upb_MiniTable* layout, const upb_MiniTableField* field, int op, - wireval* val) { - const upb_MiniTableSub* subs = layout->subs; - uint8_t mode = field->mode; +void _upb_MessageDef_Resolve(upb_DefBuilder* ctx, upb_MessageDef* m) { + for (int i = 0; i < m->field_count; i++) { + upb_FieldDef* f = (upb_FieldDef*)upb_MessageDef_Field(m, i); + _upb_FieldDef_Resolve(ctx, m->full_name, f); + } - if (UPB_UNLIKELY(mode & kUpb_LabelFlags_IsExtension)) { - const upb_MiniTableExtension* ext_layout = - (const upb_MiniTableExtension*)field; - upb_Message_Extension* ext = - _upb_Message_GetOrCreateExtension(msg, ext_layout, &d->arena); - if (UPB_UNLIKELY(!ext)) { - _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); + m->in_message_set = false; + for (int i = 0; i < upb_MessageDef_NestedExtensionCount(m); i++) { + upb_FieldDef* ext = (upb_FieldDef*)upb_MessageDef_NestedExtension(m, i); + _upb_FieldDef_Resolve(ctx, m->full_name, ext); + if (upb_FieldDef_Type(ext) == kUpb_FieldType_Message && + upb_FieldDef_Label(ext) == kUpb_Label_Optional && + upb_FieldDef_MessageSubDef(ext) == m && + UPB_DESC(MessageOptions_message_set_wire_format)( + upb_MessageDef_Options(upb_FieldDef_ContainingType(ext)))) { + m->in_message_set = true; } - d->unknown_msg = msg; - msg = &ext->data; - subs = &ext->ext->sub; } - switch (mode & kUpb_FieldMode_Mask) { - case kUpb_FieldMode_Array: - return _upb_Decoder_DecodeToArray(d, ptr, msg, subs, field, val, op); - case kUpb_FieldMode_Map: - return _upb_Decoder_DecodeToMap(d, ptr, msg, subs, field, val); - case kUpb_FieldMode_Scalar: - return _upb_Decoder_DecodeToSubMessage(d, ptr, msg, subs, field, val, op); - default: - UPB_UNREACHABLE(); + for (int i = 0; i < upb_MessageDef_NestedMessageCount(m); i++) { + upb_MessageDef* n = (upb_MessageDef*)upb_MessageDef_NestedMessage(m, i); + _upb_MessageDef_Resolve(ctx, n); } } -static const char* _upb_Decoder_ReverseSkipVarint(const char* ptr, - uint32_t val) { - uint32_t seen = 0; - do { - ptr--; - seen <<= 7; - seen |= *ptr & 0x7f; - } while (seen != val); - return ptr; -} - -static const char* _upb_Decoder_DecodeUnknownField(upb_Decoder* d, - const char* ptr, - upb_Message* msg, - int field_number, - int wire_type, wireval val) { - if (field_number == 0) _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); +void _upb_MessageDef_InsertField(upb_DefBuilder* ctx, upb_MessageDef* m, + const upb_FieldDef* f) { + const int32_t field_number = upb_FieldDef_Number(f); - // Since unknown fields are the uncommon case, we do a little extra work here - // to walk backwards through the buffer to find the field start. This frees - // up a register in the fast paths (when the field is known), which leads to - // significant speedups in benchmarks. - const char* start = ptr; + if (field_number <= 0 || field_number > kUpb_MaxFieldNumber) { + _upb_DefBuilder_Errf(ctx, "invalid field number (%u)", field_number); + } - if (wire_type == kUpb_WireType_Delimited) ptr += val.size; - if (msg) { - switch (wire_type) { - case kUpb_WireType_Varint: - case kUpb_WireType_Delimited: - start--; - while (start[-1] & 0x80) start--; - break; - case kUpb_WireType_32Bit: - start -= 4; - break; - case kUpb_WireType_64Bit: - start -= 8; - break; - default: - break; - } + const char* json_name = upb_FieldDef_JsonName(f); + const char* shortname = upb_FieldDef_Name(f); + const size_t shortnamelen = strlen(shortname); - assert(start == d->debug_valstart); - uint32_t tag = ((uint32_t)field_number << 3) | wire_type; - start = _upb_Decoder_ReverseSkipVarint(start, tag); - assert(start == d->debug_tagstart); + upb_value v = upb_value_constptr(f); - if (wire_type == kUpb_WireType_StartGroup) { - d->unknown = start; - d->unknown_msg = msg; - ptr = _upb_Decoder_DecodeUnknownGroup(d, ptr, field_number); - start = d->unknown; - d->unknown = NULL; - } - if (!_upb_Message_AddUnknown(msg, start, ptr - start, &d->arena)) { - _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); - } - } else if (wire_type == kUpb_WireType_StartGroup) { - ptr = _upb_Decoder_DecodeUnknownGroup(d, ptr, field_number); + upb_value existing_v; + if (upb_strtable_lookup(&m->ntof, shortname, &existing_v)) { + _upb_DefBuilder_Errf(ctx, "duplicate field name (%s)", shortname); } - return ptr; -} -UPB_NOINLINE -static const char* _upb_Decoder_DecodeMessage(upb_Decoder* d, const char* ptr, - upb_Message* msg, - const upb_MiniTable* layout) { - int last_field_index = 0; + const upb_value field_v = _upb_DefType_Pack(f, UPB_DEFTYPE_FIELD); + bool ok = + _upb_MessageDef_Insert(m, shortname, shortnamelen, field_v, ctx->arena); + if (!ok) _upb_DefBuilder_OomErr(ctx); -#if UPB_FASTTABLE - // The first time we want to skip fast dispatch, because we may have just been - // invoked by the fast parser to handle a case that it bailed on. - if (!_upb_Decoder_IsDone(d, &ptr)) goto nofast; -#endif + if (strcmp(shortname, json_name) != 0) { + if (upb_strtable_lookup(&m->ntof, json_name, &v)) { + _upb_DefBuilder_Errf(ctx, "duplicate json_name (%s)", json_name); + } - while (!_upb_Decoder_IsDone(d, &ptr)) { - uint32_t tag; - const upb_MiniTableField* field; - int field_number; - int wire_type; - wireval val; - int op; + const size_t json_size = strlen(json_name); + const upb_value json_v = _upb_DefType_Pack(f, UPB_DEFTYPE_FIELD_JSONNAME); + ok = _upb_MessageDef_Insert(m, json_name, json_size, json_v, ctx->arena); + if (!ok) _upb_DefBuilder_OomErr(ctx); + } - if (_upb_Decoder_TryFastDispatch(d, &ptr, msg, layout)) break; + if (upb_inttable_lookup(&m->itof, field_number, NULL)) { + _upb_DefBuilder_Errf(ctx, "duplicate field number (%u)", field_number); + } -#if UPB_FASTTABLE - nofast: -#endif + ok = upb_inttable_insert(&m->itof, field_number, v, ctx->arena); + if (!ok) _upb_DefBuilder_OomErr(ctx); +} -#ifndef NDEBUG - d->debug_tagstart = ptr; -#endif +void _upb_MessageDef_CreateMiniTable(upb_DefBuilder* ctx, upb_MessageDef* m) { + if (ctx->layout == NULL) { + m->layout = _upb_MessageDef_MakeMiniTable(ctx, m); + } else { + UPB_ASSERT(ctx->msg_count < ctx->layout->msg_count); + m->layout = ctx->layout->msgs[ctx->msg_count++]; + UPB_ASSERT(m->field_count == m->layout->field_count); - UPB_ASSERT(ptr < d->input.limit_ptr); - ptr = _upb_Decoder_DecodeTag(d, ptr, &tag); - field_number = tag >> 3; - wire_type = tag & 7; + // We don't need the result of this call, but it will assign layout_index + // for all the fields in O(n lg n) time. + _upb_FieldDefs_Sorted(m->fields, m->field_count, ctx->tmp_arena); + } -#ifndef NDEBUG - d->debug_valstart = ptr; -#endif + for (int i = 0; i < m->nested_msg_count; i++) { + upb_MessageDef* nested = + (upb_MessageDef*)upb_MessageDef_NestedMessage(m, i); + _upb_MessageDef_CreateMiniTable(ctx, nested); + } +} - if (wire_type == kUpb_WireType_EndGroup) { - d->end_group = field_number; - return ptr; - } +void _upb_MessageDef_LinkMiniTable(upb_DefBuilder* ctx, + const upb_MessageDef* m) { + for (int i = 0; i < upb_MessageDef_NestedExtensionCount(m); i++) { + const upb_FieldDef* ext = upb_MessageDef_NestedExtension(m, i); + _upb_FieldDef_BuildMiniTableExtension(ctx, ext); + } - field = _upb_Decoder_FindField(d, layout, field_number, &last_field_index); - ptr = _upb_Decoder_DecodeWireValue(d, ptr, layout, field, wire_type, &val, - &op); + for (int i = 0; i < m->nested_msg_count; i++) { + _upb_MessageDef_LinkMiniTable(ctx, upb_MessageDef_NestedMessage(m, i)); + } - if (op >= 0) { - ptr = _upb_Decoder_DecodeKnownField(d, ptr, msg, layout, field, op, &val); - } else { - switch (op) { - case kUpb_DecodeOp_UnknownField: - ptr = _upb_Decoder_DecodeUnknownField(d, ptr, msg, field_number, - wire_type, val); - break; - case kUpb_DecodeOp_MessageSetItem: - ptr = upb_Decoder_DecodeMessageSetItem(d, ptr, msg, layout); - break; + if (ctx->layout) return; + + for (int i = 0; i < m->field_count; i++) { + const upb_FieldDef* f = upb_MessageDef_Field(m, i); + const upb_MessageDef* sub_m = upb_FieldDef_MessageSubDef(f); + const upb_EnumDef* sub_e = upb_FieldDef_EnumSubDef(f); + const int layout_index = _upb_FieldDef_LayoutIndex(f); + upb_MiniTable* mt = (upb_MiniTable*)upb_MessageDef_MiniTable(m); + + UPB_ASSERT(layout_index < m->field_count); + upb_MiniTableField* mt_f = + (upb_MiniTableField*)&m->layout->fields[layout_index]; + if (sub_m) { + if (!mt->subs) { + _upb_DefBuilder_Errf(ctx, "unexpected submsg for (%s)", m->full_name); + } + UPB_ASSERT(mt_f); + UPB_ASSERT(sub_m->layout); + if (UPB_UNLIKELY(!upb_MiniTable_SetSubMessage(mt, mt_f, sub_m->layout))) { + _upb_DefBuilder_Errf(ctx, "invalid submsg for (%s)", m->full_name); + } + } else if (_upb_FieldDef_IsClosedEnum(f)) { + const upb_MiniTableEnum* mt_e = _upb_EnumDef_MiniTable(sub_e); + if (UPB_UNLIKELY(!upb_MiniTable_SetSubEnum(mt, mt_f, mt_e))) { + _upb_DefBuilder_Errf(ctx, "invalid subenum for (%s)", m->full_name); } } } - return UPB_UNLIKELY(layout && layout->required_count) - ? _upb_Decoder_CheckRequired(d, ptr, msg, layout) - : ptr; -} - -const char* _upb_FastDecoder_DecodeGeneric(struct upb_Decoder* d, - const char* ptr, upb_Message* msg, - intptr_t table, uint64_t hasbits, - uint64_t data) { - (void)data; - *(uint32_t*)msg |= hasbits; - return _upb_Decoder_DecodeMessage(d, ptr, msg, decode_totablep(table)); +#ifndef NDEBUG + for (int i = 0; i < m->field_count; i++) { + const upb_FieldDef* f = upb_MessageDef_Field(m, i); + const int layout_index = _upb_FieldDef_LayoutIndex(f); + UPB_ASSERT(layout_index < m->layout->field_count); + const upb_MiniTableField* mt_f = &m->layout->fields[layout_index]; + UPB_ASSERT(upb_FieldDef_Type(f) == upb_MiniTableField_Type(mt_f)); + UPB_ASSERT(upb_FieldDef_CType(f) == upb_MiniTableField_CType(mt_f)); + UPB_ASSERT(upb_FieldDef_HasPresence(f) == + upb_MiniTableField_HasPresence(mt_f)); + } +#endif } -static upb_DecodeStatus _upb_Decoder_DecodeTop(struct upb_Decoder* d, - const char* buf, void* msg, - const upb_MiniTable* l) { - if (!_upb_Decoder_TryFastDispatch(d, &buf, msg, l)) { - _upb_Decoder_DecodeMessage(d, buf, msg, l); +static uint64_t _upb_MessageDef_Modifiers(const upb_MessageDef* m) { + uint64_t out = 0; + if (upb_FileDef_Syntax(m->file) == kUpb_Syntax_Proto3) { + out |= kUpb_MessageModifier_ValidateUtf8; + out |= kUpb_MessageModifier_DefaultIsPacked; } - if (d->end_group != DECODE_NOGROUP) return kUpb_DecodeStatus_Malformed; - if (d->missing_required) return kUpb_DecodeStatus_MissingRequired; - return kUpb_DecodeStatus_Ok; + if (m->ext_range_count) { + out |= kUpb_MessageModifier_IsExtendable; + } + return out; } -UPB_NOINLINE -const char* _upb_Decoder_IsDoneFallback(upb_EpsCopyInputStream* e, - const char* ptr, int overrun) { - return _upb_EpsCopyInputStream_IsDoneFallbackInline( - e, ptr, overrun, _upb_Decoder_BufferFlipCallback); -} +static bool _upb_MessageDef_EncodeMap(upb_DescState* s, const upb_MessageDef* m, + upb_Arena* a) { + if (m->field_count != 2) return false; -static upb_DecodeStatus upb_Decoder_Decode(upb_Decoder* const decoder, - const char* const buf, - void* const msg, - const upb_MiniTable* const l, - upb_Arena* const arena) { - if (UPB_SETJMP(decoder->err) == 0) { - decoder->status = _upb_Decoder_DecodeTop(decoder, buf, msg, l); - } else { - UPB_ASSERT(decoder->status != kUpb_DecodeStatus_Ok); - } + const upb_FieldDef* key_field = upb_MessageDef_Field(m, 0); + const upb_FieldDef* val_field = upb_MessageDef_Field(m, 1); + if (key_field == NULL || val_field == NULL) return false; - _upb_MemBlock* blocks = - upb_Atomic_Load(&decoder->arena.blocks, memory_order_relaxed); - arena->head = decoder->arena.head; - upb_Atomic_Store(&arena->blocks, blocks, memory_order_relaxed); - return decoder->status; + UPB_ASSERT(_upb_FieldDef_LayoutIndex(key_field) == 0); + UPB_ASSERT(_upb_FieldDef_LayoutIndex(val_field) == 1); + + s->ptr = upb_MtDataEncoder_EncodeMap( + &s->e, s->ptr, upb_FieldDef_Type(key_field), upb_FieldDef_Type(val_field), + _upb_FieldDef_Modifiers(key_field), _upb_FieldDef_Modifiers(val_field)); + return true; } -upb_DecodeStatus upb_Decode(const char* buf, size_t size, void* msg, - const upb_MiniTable* l, - const upb_ExtensionRegistry* extreg, int options, - upb_Arena* arena) { - upb_Decoder decoder; - unsigned depth = (unsigned)options >> 16; - - upb_EpsCopyInputStream_Init(&decoder.input, &buf, size, - options & kUpb_DecodeOption_AliasString); +static bool _upb_MessageDef_EncodeMessage(upb_DescState* s, + const upb_MessageDef* m, + upb_Arena* a) { + const upb_FieldDef** sorted = NULL; + if (!m->is_sorted) { + sorted = _upb_FieldDefs_Sorted(m->fields, m->field_count, a); + if (!sorted) return false; + } - decoder.extreg = extreg; - decoder.unknown = NULL; - decoder.depth = depth ? depth : kUpb_WireFormat_DefaultDepthLimit; - decoder.end_group = DECODE_NOGROUP; - decoder.options = (uint16_t)options; - decoder.missing_required = false; - decoder.status = kUpb_DecodeStatus_Ok; + s->ptr = upb_MtDataEncoder_StartMessage(&s->e, s->ptr, + _upb_MessageDef_Modifiers(m)); - // Violating the encapsulation of the arena for performance reasons. - // This is a temporary arena that we swap into and swap out of when we are - // done. The temporary arena only needs to be able to handle allocation, - // not fuse or free, so it does not need many of the members to be initialized - // (particularly parent_or_count). - _upb_MemBlock* blocks = upb_Atomic_Load(&arena->blocks, memory_order_relaxed); - decoder.arena.head = arena->head; - decoder.arena.block_alloc = arena->block_alloc; - upb_Atomic_Init(&decoder.arena.blocks, blocks); + for (int i = 0; i < m->field_count; i++) { + const upb_FieldDef* f = sorted ? sorted[i] : upb_MessageDef_Field(m, i); + const upb_FieldType type = upb_FieldDef_Type(f); + const int number = upb_FieldDef_Number(f); + const uint64_t modifiers = _upb_FieldDef_Modifiers(f); - return upb_Decoder_Decode(&decoder, buf, msg, l, arena); -} + if (!_upb_DescState_Grow(s, a)) return false; + s->ptr = upb_MtDataEncoder_PutField(&s->e, s->ptr, type, number, modifiers); + } -#undef OP_FIXPCK_LG2 -#undef OP_VARPCK_LG2 + for (int i = 0; i < m->real_oneof_count; i++) { + if (!_upb_DescState_Grow(s, a)) return false; + s->ptr = upb_MtDataEncoder_StartOneof(&s->e, s->ptr); -// Fast decoder: ~3x the speed of decode.c, but requires x86-64/ARM64. -// Also the table size grows by 2x. -// -// Could potentially be ported to other 64-bit archs that pass at least six -// arguments in registers and have 8 unused high bits in pointers. -// -// The overall design is to create specialized functions for every possible -// field type (eg. oneof boolean field with a 1 byte tag) and then dispatch -// to the specialized function as quickly as possible. + const upb_OneofDef* o = upb_MessageDef_Oneof(m, i); + const int field_count = upb_OneofDef_FieldCount(o); + for (int j = 0; j < field_count; j++) { + const int number = upb_FieldDef_Number(upb_OneofDef_Field(o, j)); + if (!_upb_DescState_Grow(s, a)) return false; + s->ptr = upb_MtDataEncoder_PutOneofField(&s->e, s->ptr, number); + } + } + return true; +} -// Must be last. +static bool _upb_MessageDef_EncodeMessageSet(upb_DescState* s, + const upb_MessageDef* m, + upb_Arena* a) { + s->ptr = upb_MtDataEncoder_EncodeMessageSet(&s->e, s->ptr); -#if UPB_FASTTABLE + return true; +} -// The standard set of arguments passed to each parsing function. -// Thanks to x86-64 calling conventions, these will stay in registers. -#define UPB_PARSE_PARAMS \ - upb_Decoder *d, const char *ptr, upb_Message *msg, intptr_t table, \ - uint64_t hasbits, uint64_t data +bool upb_MessageDef_MiniDescriptorEncode(const upb_MessageDef* m, upb_Arena* a, + upb_StringView* out) { + upb_DescState s; + _upb_DescState_Init(&s); -#define UPB_PARSE_ARGS d, ptr, msg, table, hasbits, data + if (!_upb_DescState_Grow(&s, a)) return false; -#define RETURN_GENERIC(m) \ - /* Uncomment either of these for debugging purposes. */ \ - /* fprintf(stderr, m); */ \ - /*__builtin_trap(); */ \ - return _upb_FastDecoder_DecodeGeneric(d, ptr, msg, table, hasbits, 0); + if (upb_MessageDef_IsMapEntry(m)) { + if (!_upb_MessageDef_EncodeMap(&s, m, a)) return false; + } else if (UPB_DESC(MessageOptions_message_set_wire_format)(m->opts)) { + if (!_upb_MessageDef_EncodeMessageSet(&s, m, a)) return false; + } else { + if (!_upb_MessageDef_EncodeMessage(&s, m, a)) return false; + } -typedef enum { - CARD_s = 0, /* Singular (optional, non-repeated) */ - CARD_o = 1, /* Oneof */ - CARD_r = 2, /* Repeated */ - CARD_p = 3 /* Packed Repeated */ -} upb_card; + if (!_upb_DescState_Grow(&s, a)) return false; + *s.ptr = '\0'; -UPB_NOINLINE -static const char* fastdecode_isdonefallback(UPB_PARSE_PARAMS) { - int overrun = data; - ptr = _upb_EpsCopyInputStream_IsDoneFallbackInline( - &d->input, ptr, overrun, _upb_Decoder_BufferFlipCallback); - data = _upb_FastDecoder_LoadTag(ptr); - UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); + out->data = s.buf; + out->size = s.ptr - s.buf; + return true; } -UPB_FORCEINLINE -static const char* fastdecode_dispatch(UPB_PARSE_PARAMS) { - int overrun; - switch (upb_EpsCopyInputStream_IsDoneStatus(&d->input, ptr, &overrun)) { - case kUpb_IsDoneStatus_Done: - *(uint32_t*)msg |= hasbits; // Sync hasbits. - const upb_MiniTable* l = decode_totablep(table); - return UPB_UNLIKELY(l->required_count) - ? _upb_Decoder_CheckRequired(d, ptr, msg, l) - : ptr; - case kUpb_IsDoneStatus_NotDone: - break; - case kUpb_IsDoneStatus_NeedFallback: - data = overrun; - UPB_MUSTTAIL return fastdecode_isdonefallback(UPB_PARSE_ARGS); +static upb_StringView* _upb_ReservedNames_New(upb_DefBuilder* ctx, int n, + const upb_StringView* protos) { + upb_StringView* sv = _upb_DefBuilder_Alloc(ctx, sizeof(upb_StringView) * n); + for (int i = 0; i < n; i++) { + sv[i].data = + upb_strdup2(protos[i].data, protos[i].size, _upb_DefBuilder_Arena(ctx)); + sv[i].size = protos[i].size; } - - // Read two bytes of tag data (for a one-byte tag, the high byte is junk). - data = _upb_FastDecoder_LoadTag(ptr); - UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); + return sv; } -UPB_FORCEINLINE -static bool fastdecode_checktag(uint16_t data, int tagbytes) { - if (tagbytes == 1) { - return (data & 0xff) == 0; - } else { - return data == 0; - } -} +static void create_msgdef(upb_DefBuilder* ctx, const char* prefix, + const UPB_DESC(DescriptorProto) * msg_proto, + const upb_MessageDef* containing_type, + upb_MessageDef* m) { + const UPB_DESC(OneofDescriptorProto)* const* oneofs; + const UPB_DESC(FieldDescriptorProto)* const* fields; + const UPB_DESC(DescriptorProto_ExtensionRange)* const* ext_ranges; + const UPB_DESC(DescriptorProto_ReservedRange)* const* res_ranges; + const upb_StringView* res_names; + size_t n_oneof, n_field, n_enum, n_ext, n_msg; + size_t n_ext_range, n_res_range, n_res_name; + upb_StringView name; -UPB_FORCEINLINE -static const char* fastdecode_longsize(const char* ptr, int* size) { - int i; - UPB_ASSERT(*size & 0x80); - *size &= 0xff; - for (i = 0; i < 3; i++) { - ptr++; - size_t byte = (uint8_t)ptr[-1]; - *size += (byte - 1) << (7 + 7 * i); - if (UPB_LIKELY((byte & 0x80) == 0)) return ptr; - } - ptr++; - size_t byte = (uint8_t)ptr[-1]; - // len is limited by 2gb not 4gb, hence 8 and not 16 as normally expected - // for a 32 bit varint. - if (UPB_UNLIKELY(byte >= 8)) return NULL; - *size += (byte - 1) << 28; - return ptr; -} + // Must happen before _upb_DefBuilder_Add() + m->file = _upb_DefBuilder_File(ctx); -UPB_FORCEINLINE -static const char* fastdecode_delimited( - upb_Decoder* d, const char* ptr, - upb_EpsCopyInputStream_ParseDelimitedFunc* func, void* ctx) { - ptr++; + m->containing_type = containing_type; + m->is_sorted = true; - // Sign-extend so varint greater than one byte becomes negative, causing - // fast delimited parse to fail. - int len = (int8_t)ptr[-1]; + name = UPB_DESC(DescriptorProto_name)(msg_proto); - if (!upb_EpsCopyInputStream_TryParseDelimitedFast(&d->input, &ptr, len, func, - ctx)) { - // Slow case: Sub-message is >=128 bytes and/or exceeds the current buffer. - // If it exceeds the buffer limit, limit/limit_ptr will change during - // sub-message parsing, so we need to preserve delta, not limit. - if (UPB_UNLIKELY(len & 0x80)) { - // Size varint >1 byte (length >= 128). - ptr = fastdecode_longsize(ptr, &len); - if (!ptr) { - // Corrupt wire format: size exceeded INT_MAX. - return NULL; - } - } - if (!upb_EpsCopyInputStream_CheckSize(&d->input, ptr, len)) { - // Corrupt wire format: invalid limit. - return NULL; - } - int delta = upb_EpsCopyInputStream_PushLimit(&d->input, ptr, len); - ptr = func(&d->input, ptr, ctx); - upb_EpsCopyInputStream_PopLimit(&d->input, ptr, delta); - } - return ptr; -} + m->full_name = _upb_DefBuilder_MakeFullName(ctx, prefix, name); + _upb_DefBuilder_Add(ctx, m->full_name, _upb_DefType_Pack(m, UPB_DEFTYPE_MSG)); -/* singular, oneof, repeated field handling ***********************************/ + oneofs = UPB_DESC(DescriptorProto_oneof_decl)(msg_proto, &n_oneof); + fields = UPB_DESC(DescriptorProto_field)(msg_proto, &n_field); + ext_ranges = + UPB_DESC(DescriptorProto_extension_range)(msg_proto, &n_ext_range); + res_ranges = + UPB_DESC(DescriptorProto_reserved_range)(msg_proto, &n_res_range); + res_names = UPB_DESC(DescriptorProto_reserved_name)(msg_proto, &n_res_name); -typedef struct { - upb_Array* arr; - void* end; -} fastdecode_arr; + bool ok = upb_inttable_init(&m->itof, ctx->arena); + if (!ok) _upb_DefBuilder_OomErr(ctx); -typedef enum { - FD_NEXT_ATLIMIT, - FD_NEXT_SAMEFIELD, - FD_NEXT_OTHERFIELD -} fastdecode_next; + ok = upb_strtable_init(&m->ntof, n_oneof + n_field, ctx->arena); + if (!ok) _upb_DefBuilder_OomErr(ctx); -typedef struct { - void* dst; - fastdecode_next next; - uint32_t tag; -} fastdecode_nextret; + UPB_DEF_SET_OPTIONS(m->opts, DescriptorProto, MessageOptions, msg_proto); -UPB_FORCEINLINE -static void* fastdecode_resizearr(upb_Decoder* d, void* dst, - fastdecode_arr* farr, int valbytes) { - if (UPB_UNLIKELY(dst == farr->end)) { - size_t old_size = farr->arr->capacity; - size_t old_bytes = old_size * valbytes; - size_t new_size = old_size * 2; - size_t new_bytes = new_size * valbytes; - char* old_ptr = _upb_array_ptr(farr->arr); - char* new_ptr = upb_Arena_Realloc(&d->arena, old_ptr, old_bytes, new_bytes); - uint8_t elem_size_lg2 = __builtin_ctz(valbytes); - farr->arr->capacity = new_size; - farr->arr->data = _upb_array_tagptr(new_ptr, elem_size_lg2); - dst = (void*)(new_ptr + (old_size * valbytes)); - farr->end = (void*)(new_ptr + (new_size * valbytes)); + m->oneof_count = n_oneof; + m->oneofs = _upb_OneofDefs_New(ctx, n_oneof, oneofs, m); + + m->field_count = n_field; + m->fields = + _upb_FieldDefs_New(ctx, n_field, fields, m->full_name, m, &m->is_sorted); + + // Message Sets may not contain fields. + if (UPB_UNLIKELY(UPB_DESC(MessageOptions_message_set_wire_format)(m->opts))) { + if (UPB_UNLIKELY(n_field > 0)) { + _upb_DefBuilder_Errf(ctx, "invalid message set (%s)", m->full_name); + } } - return dst; + + m->ext_range_count = n_ext_range; + m->ext_ranges = _upb_ExtensionRanges_New(ctx, n_ext_range, ext_ranges, m); + + m->res_range_count = n_res_range; + m->res_ranges = + _upb_MessageReservedRanges_New(ctx, n_res_range, res_ranges, m); + + m->res_name_count = n_res_name; + m->res_names = _upb_ReservedNames_New(ctx, n_res_name, res_names); + + const size_t synthetic_count = _upb_OneofDefs_Finalize(ctx, m); + m->real_oneof_count = m->oneof_count - synthetic_count; + + assign_msg_wellknowntype(m); + upb_inttable_compact(&m->itof, ctx->arena); + + const UPB_DESC(EnumDescriptorProto)* const* enums = + UPB_DESC(DescriptorProto_enum_type)(msg_proto, &n_enum); + m->nested_enum_count = n_enum; + m->nested_enums = _upb_EnumDefs_New(ctx, n_enum, enums, m); + + const UPB_DESC(FieldDescriptorProto)* const* exts = + UPB_DESC(DescriptorProto_extension)(msg_proto, &n_ext); + m->nested_ext_count = n_ext; + m->nested_exts = _upb_Extensions_New(ctx, n_ext, exts, m->full_name, m); + + const UPB_DESC(DescriptorProto)* const* msgs = + UPB_DESC(DescriptorProto_nested_type)(msg_proto, &n_msg); + m->nested_msg_count = n_msg; + m->nested_msgs = _upb_MessageDefs_New(ctx, n_msg, msgs, m); } -UPB_FORCEINLINE -static bool fastdecode_tagmatch(uint32_t tag, uint64_t data, int tagbytes) { - if (tagbytes == 1) { - return (uint8_t)tag == (uint8_t)data; - } else { - return (uint16_t)tag == (uint16_t)data; +// Allocate and initialize an array of |n| message defs. +upb_MessageDef* _upb_MessageDefs_New( + upb_DefBuilder* ctx, int n, const UPB_DESC(DescriptorProto) * const* protos, + const upb_MessageDef* containing_type) { + _upb_DefType_CheckPadding(sizeof(upb_MessageDef)); + + const char* name = containing_type ? containing_type->full_name + : _upb_FileDef_RawPackage(ctx->file); + + upb_MessageDef* m = _upb_DefBuilder_Alloc(ctx, sizeof(upb_MessageDef) * n); + for (int i = 0; i < n; i++) { + create_msgdef(ctx, name, protos[i], containing_type, &m[i]); } + return m; } -UPB_FORCEINLINE -static void fastdecode_commitarr(void* dst, fastdecode_arr* farr, - int valbytes) { - farr->arr->size = - (size_t)((char*)dst - (char*)_upb_array_ptr(farr->arr)) / valbytes; -} -UPB_FORCEINLINE -static fastdecode_nextret fastdecode_nextrepeated(upb_Decoder* d, void* dst, - const char** ptr, - fastdecode_arr* farr, - uint64_t data, int tagbytes, - int valbytes) { - fastdecode_nextret ret; - dst = (char*)dst + valbytes; +// Must be last. - if (UPB_LIKELY(!_upb_Decoder_IsDone(d, ptr))) { - ret.tag = _upb_FastDecoder_LoadTag(*ptr); - if (fastdecode_tagmatch(ret.tag, data, tagbytes)) { - ret.next = FD_NEXT_SAMEFIELD; - } else { - fastdecode_commitarr(dst, farr, valbytes); - ret.next = FD_NEXT_OTHERFIELD; - } - } else { - fastdecode_commitarr(dst, farr, valbytes); - ret.next = FD_NEXT_ATLIMIT; - } +struct upb_MessageReservedRange { + int32_t start; + int32_t end; +}; - ret.dst = dst; - return ret; +upb_MessageReservedRange* _upb_MessageReservedRange_At( + const upb_MessageReservedRange* r, int i) { + return (upb_MessageReservedRange*)&r[i]; } -UPB_FORCEINLINE -static void* fastdecode_fieldmem(upb_Message* msg, uint64_t data) { - size_t ofs = data >> 48; - return (char*)msg + ofs; +int32_t upb_MessageReservedRange_Start(const upb_MessageReservedRange* r) { + return r->start; +} +int32_t upb_MessageReservedRange_End(const upb_MessageReservedRange* r) { + return r->end; } -UPB_FORCEINLINE -static void* fastdecode_getfield(upb_Decoder* d, const char* ptr, - upb_Message* msg, uint64_t* data, - uint64_t* hasbits, fastdecode_arr* farr, - int valbytes, upb_card card) { - switch (card) { - case CARD_s: { - uint8_t hasbit_index = *data >> 24; - // Set hasbit and return pointer to scalar field. - *hasbits |= 1ull << hasbit_index; - return fastdecode_fieldmem(msg, *data); - } - case CARD_o: { - uint16_t case_ofs = *data >> 32; - uint32_t* oneof_case = UPB_PTR_AT(msg, case_ofs, uint32_t); - uint8_t field_number = *data >> 24; - *oneof_case = field_number; - return fastdecode_fieldmem(msg, *data); - } - case CARD_r: { - // Get pointer to upb_Array and allocate/expand if necessary. - uint8_t elem_size_lg2 = __builtin_ctz(valbytes); - upb_Array** arr_p = fastdecode_fieldmem(msg, *data); - char* begin; - *(uint32_t*)msg |= *hasbits; - *hasbits = 0; - if (UPB_LIKELY(!*arr_p)) { - farr->arr = _upb_Array_New(&d->arena, 8, elem_size_lg2); - *arr_p = farr->arr; - } else { - farr->arr = *arr_p; - } - begin = _upb_array_ptr(farr->arr); - farr->end = begin + (farr->arr->capacity * valbytes); - *data = _upb_FastDecoder_LoadTag(ptr); - return begin + (farr->arr->size * valbytes); +upb_MessageReservedRange* _upb_MessageReservedRanges_New( + upb_DefBuilder* ctx, int n, + const UPB_DESC(DescriptorProto_ReservedRange) * const* protos, + const upb_MessageDef* m) { + upb_MessageReservedRange* r = + _upb_DefBuilder_Alloc(ctx, sizeof(upb_MessageReservedRange) * n); + + for (int i = 0; i < n; i++) { + const int32_t start = + UPB_DESC(DescriptorProto_ReservedRange_start)(protos[i]); + const int32_t end = UPB_DESC(DescriptorProto_ReservedRange_end)(protos[i]); + const int32_t max = kUpb_MaxFieldNumber + 1; + + // A full validation would also check that each range is disjoint, and that + // none of the fields overlap with the extension ranges, but we are just + // sanity checking here. + if (start < 1 || end <= start || end > max) { + _upb_DefBuilder_Errf(ctx, + "Reserved range (%d, %d) is invalid, message=%s\n", + (int)start, (int)end, upb_MessageDef_FullName(m)); } - default: - UPB_UNREACHABLE(); + + r[i].start = start; + r[i].end = end; } + + return r; } -UPB_FORCEINLINE -static bool fastdecode_flippacked(uint64_t* data, int tagbytes) { - *data ^= (0x2 ^ 0x0); // Patch data to match packed wiretype. - return fastdecode_checktag(*data, tagbytes); + + +// Must be last. + +struct upb_MethodDef { + const UPB_DESC(MethodOptions) * opts; + upb_ServiceDef* service; + const char* full_name; + const upb_MessageDef* input_type; + const upb_MessageDef* output_type; + int index; + bool client_streaming; + bool server_streaming; +}; + +upb_MethodDef* _upb_MethodDef_At(const upb_MethodDef* m, int i) { + return (upb_MethodDef*)&m[i]; } -#define FASTDECODE_CHECKPACKED(tagbytes, card, func) \ - if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { \ - if (card == CARD_r && fastdecode_flippacked(&data, tagbytes)) { \ - UPB_MUSTTAIL return func(UPB_PARSE_ARGS); \ - } \ - RETURN_GENERIC("packed check tag mismatch\n"); \ - } +const upb_ServiceDef* upb_MethodDef_Service(const upb_MethodDef* m) { + return m->service; +} -/* varint fields **************************************************************/ +const UPB_DESC(MethodOptions) * upb_MethodDef_Options(const upb_MethodDef* m) { + return m->opts; +} -UPB_FORCEINLINE -static uint64_t fastdecode_munge(uint64_t val, int valbytes, bool zigzag) { - if (valbytes == 1) { - return val != 0; - } else if (zigzag) { - if (valbytes == 4) { - uint32_t n = val; - return (n >> 1) ^ -(int32_t)(n & 1); - } else if (valbytes == 8) { - return (val >> 1) ^ -(int64_t)(val & 1); - } - UPB_UNREACHABLE(); - } - return val; +bool upb_MethodDef_HasOptions(const upb_MethodDef* m) { + return m->opts != (void*)kUpbDefOptDefault; } -UPB_FORCEINLINE -static const char* fastdecode_varint64(const char* ptr, uint64_t* val) { - ptr++; - *val = (uint8_t)ptr[-1]; - if (UPB_UNLIKELY(*val & 0x80)) { - int i; - for (i = 0; i < 8; i++) { - ptr++; - uint64_t byte = (uint8_t)ptr[-1]; - *val += (byte - 1) << (7 + 7 * i); - if (UPB_LIKELY((byte & 0x80) == 0)) goto done; - } - ptr++; - uint64_t byte = (uint8_t)ptr[-1]; - if (byte > 1) { - return NULL; - } - *val += (byte - 1) << 63; - } -done: - UPB_ASSUME(ptr != NULL); - return ptr; +const char* upb_MethodDef_FullName(const upb_MethodDef* m) { + return m->full_name; } -#define FASTDECODE_UNPACKEDVARINT(d, ptr, msg, table, hasbits, data, tagbytes, \ - valbytes, card, zigzag, packed) \ - uint64_t val; \ - void* dst; \ - fastdecode_arr farr; \ - \ - FASTDECODE_CHECKPACKED(tagbytes, card, packed); \ - \ - dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, valbytes, \ - card); \ - if (card == CARD_r) { \ - if (UPB_UNLIKELY(!dst)) { \ - RETURN_GENERIC("need array resize\n"); \ - } \ - } \ - \ - again: \ - if (card == CARD_r) { \ - dst = fastdecode_resizearr(d, dst, &farr, valbytes); \ - } \ - \ - ptr += tagbytes; \ - ptr = fastdecode_varint64(ptr, &val); \ - if (ptr == NULL) _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \ - val = fastdecode_munge(val, valbytes, zigzag); \ - memcpy(dst, &val, valbytes); \ - \ - if (card == CARD_r) { \ - fastdecode_nextret ret = fastdecode_nextrepeated( \ - d, dst, &ptr, &farr, data, tagbytes, valbytes); \ - switch (ret.next) { \ - case FD_NEXT_SAMEFIELD: \ - dst = ret.dst; \ - goto again; \ - case FD_NEXT_OTHERFIELD: \ - data = ret.tag; \ - UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); \ - case FD_NEXT_ATLIMIT: \ - return ptr; \ - } \ - } \ - \ - UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); +const char* upb_MethodDef_Name(const upb_MethodDef* m) { + return _upb_DefBuilder_FullToShort(m->full_name); +} -typedef struct { - uint8_t valbytes; - bool zigzag; - void* dst; - fastdecode_arr farr; -} fastdecode_varintdata; +int upb_MethodDef_Index(const upb_MethodDef* m) { return m->index; } -UPB_FORCEINLINE -static const char* fastdecode_topackedvarint(upb_EpsCopyInputStream* e, - const char* ptr, void* ctx) { - upb_Decoder* d = (upb_Decoder*)e; - fastdecode_varintdata* data = ctx; - void* dst = data->dst; - uint64_t val; +const upb_MessageDef* upb_MethodDef_InputType(const upb_MethodDef* m) { + return m->input_type; +} - while (!_upb_Decoder_IsDone(d, &ptr)) { - dst = fastdecode_resizearr(d, dst, &data->farr, data->valbytes); - ptr = fastdecode_varint64(ptr, &val); - if (ptr == NULL) return NULL; - val = fastdecode_munge(val, data->valbytes, data->zigzag); - memcpy(dst, &val, data->valbytes); - dst = (char*)dst + data->valbytes; - } +const upb_MessageDef* upb_MethodDef_OutputType(const upb_MethodDef* m) { + return m->output_type; +} - fastdecode_commitarr(dst, &data->farr, data->valbytes); - return ptr; +bool upb_MethodDef_ClientStreaming(const upb_MethodDef* m) { + return m->client_streaming; } -#define FASTDECODE_PACKEDVARINT(d, ptr, msg, table, hasbits, data, tagbytes, \ - valbytes, zigzag, unpacked) \ - fastdecode_varintdata ctx = {valbytes, zigzag}; \ - \ - FASTDECODE_CHECKPACKED(tagbytes, CARD_r, unpacked); \ - \ - ctx.dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &ctx.farr, \ - valbytes, CARD_r); \ - if (UPB_UNLIKELY(!ctx.dst)) { \ - RETURN_GENERIC("need array resize\n"); \ - } \ - \ - ptr += tagbytes; \ - ptr = fastdecode_delimited(d, ptr, &fastdecode_topackedvarint, &ctx); \ - \ - if (UPB_UNLIKELY(ptr == NULL)) { \ - _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \ - } \ - \ - UPB_MUSTTAIL return fastdecode_dispatch(d, ptr, msg, table, hasbits, 0); +bool upb_MethodDef_ServerStreaming(const upb_MethodDef* m) { + return m->server_streaming; +} -#define FASTDECODE_VARINT(d, ptr, msg, table, hasbits, data, tagbytes, \ - valbytes, card, zigzag, unpacked, packed) \ - if (card == CARD_p) { \ - FASTDECODE_PACKEDVARINT(d, ptr, msg, table, hasbits, data, tagbytes, \ - valbytes, zigzag, unpacked); \ - } else { \ - FASTDECODE_UNPACKEDVARINT(d, ptr, msg, table, hasbits, data, tagbytes, \ - valbytes, card, zigzag, packed); \ - } +static void create_method(upb_DefBuilder* ctx, + const UPB_DESC(MethodDescriptorProto) * method_proto, + upb_ServiceDef* s, upb_MethodDef* m) { + upb_StringView name = UPB_DESC(MethodDescriptorProto_name)(method_proto); -#define z_ZZ true -#define b_ZZ false -#define v_ZZ false + m->service = s; + m->full_name = + _upb_DefBuilder_MakeFullName(ctx, upb_ServiceDef_FullName(s), name); + m->client_streaming = + UPB_DESC(MethodDescriptorProto_client_streaming)(method_proto); + m->server_streaming = + UPB_DESC(MethodDescriptorProto_server_streaming)(method_proto); + m->input_type = _upb_DefBuilder_Resolve( + ctx, m->full_name, m->full_name, + UPB_DESC(MethodDescriptorProto_input_type)(method_proto), + UPB_DEFTYPE_MSG); + m->output_type = _upb_DefBuilder_Resolve( + ctx, m->full_name, m->full_name, + UPB_DESC(MethodDescriptorProto_output_type)(method_proto), + UPB_DEFTYPE_MSG); -/* Generate all combinations: - * {s,o,r,p} x {b1,v4,z4,v8,z8} x {1bt,2bt} */ + UPB_DEF_SET_OPTIONS(m->opts, MethodDescriptorProto, MethodOptions, + method_proto); +} -#define F(card, type, valbytes, tagbytes) \ - UPB_NOINLINE \ - const char* upb_p##card##type##valbytes##_##tagbytes##bt(UPB_PARSE_PARAMS) { \ - FASTDECODE_VARINT(d, ptr, msg, table, hasbits, data, tagbytes, valbytes, \ - CARD_##card, type##_ZZ, \ - upb_pr##type##valbytes##_##tagbytes##bt, \ - upb_pp##type##valbytes##_##tagbytes##bt); \ +// Allocate and initialize an array of |n| method defs belonging to |s|. +upb_MethodDef* _upb_MethodDefs_New( + upb_DefBuilder* ctx, int n, + const UPB_DESC(MethodDescriptorProto) * const* protos, upb_ServiceDef* s) { + upb_MethodDef* m = _upb_DefBuilder_Alloc(ctx, sizeof(upb_MethodDef) * n); + for (int i = 0; i < n; i++) { + create_method(ctx, protos[i], s, &m[i]); + m[i].index = i; } + return m; +} -#define TYPES(card, tagbytes) \ - F(card, b, 1, tagbytes) \ - F(card, v, 4, tagbytes) \ - F(card, v, 8, tagbytes) \ - F(card, z, 4, tagbytes) \ - F(card, z, 8, tagbytes) -#define TAGBYTES(card) \ - TYPES(card, 1) \ - TYPES(card, 2) +#include +#include +#include -TAGBYTES(s) -TAGBYTES(o) -TAGBYTES(r) -TAGBYTES(p) -#undef z_ZZ -#undef b_ZZ -#undef v_ZZ -#undef o_ONEOF -#undef s_ONEOF -#undef r_ONEOF -#undef F -#undef TYPES -#undef TAGBYTES -#undef FASTDECODE_UNPACKEDVARINT -#undef FASTDECODE_PACKEDVARINT -#undef FASTDECODE_VARINT +// Must be last. -/* fixed fields ***************************************************************/ +struct upb_OneofDef { + const UPB_DESC(OneofOptions) * opts; + const upb_MessageDef* parent; + const char* full_name; + int field_count; + bool synthetic; + const upb_FieldDef** fields; + upb_strtable ntof; // lookup a field by name + upb_inttable itof; // lookup a field by number (index) +#if UINTPTR_MAX == 0xffffffff + uint32_t padding; // Increase size to a multiple of 8. +#endif +}; -#define FASTDECODE_UNPACKEDFIXED(d, ptr, msg, table, hasbits, data, tagbytes, \ - valbytes, card, packed) \ - void* dst; \ - fastdecode_arr farr; \ - \ - FASTDECODE_CHECKPACKED(tagbytes, card, packed) \ - \ - dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, valbytes, \ - card); \ - if (card == CARD_r) { \ - if (UPB_UNLIKELY(!dst)) { \ - RETURN_GENERIC("couldn't allocate array in arena\n"); \ - } \ - } \ - \ - again: \ - if (card == CARD_r) { \ - dst = fastdecode_resizearr(d, dst, &farr, valbytes); \ - } \ - \ - ptr += tagbytes; \ - memcpy(dst, ptr, valbytes); \ - ptr += valbytes; \ - \ - if (card == CARD_r) { \ - fastdecode_nextret ret = fastdecode_nextrepeated( \ - d, dst, &ptr, &farr, data, tagbytes, valbytes); \ - switch (ret.next) { \ - case FD_NEXT_SAMEFIELD: \ - dst = ret.dst; \ - goto again; \ - case FD_NEXT_OTHERFIELD: \ - data = ret.tag; \ - UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); \ - case FD_NEXT_ATLIMIT: \ - return ptr; \ - } \ - } \ - \ - UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); +upb_OneofDef* _upb_OneofDef_At(const upb_OneofDef* o, int i) { + return (upb_OneofDef*)&o[i]; +} -#define FASTDECODE_PACKEDFIXED(d, ptr, msg, table, hasbits, data, tagbytes, \ - valbytes, unpacked) \ - FASTDECODE_CHECKPACKED(tagbytes, CARD_r, unpacked) \ - \ - ptr += tagbytes; \ - int size = (uint8_t)ptr[0]; \ - ptr++; \ - if (size & 0x80) { \ - ptr = fastdecode_longsize(ptr, &size); \ - } \ - \ - if (UPB_UNLIKELY(!upb_EpsCopyInputStream_CheckDataSizeAvailable( \ - &d->input, ptr, size) || \ - (size % valbytes) != 0)) { \ - _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \ - } \ - \ - upb_Array** arr_p = fastdecode_fieldmem(msg, data); \ - upb_Array* arr = *arr_p; \ - uint8_t elem_size_lg2 = __builtin_ctz(valbytes); \ - int elems = size / valbytes; \ - \ - if (UPB_LIKELY(!arr)) { \ - *arr_p = arr = _upb_Array_New(&d->arena, elems, elem_size_lg2); \ - if (!arr) { \ - _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \ - } \ - } else { \ - _upb_Array_ResizeUninitialized(arr, elems, &d->arena); \ - } \ - \ - char* dst = _upb_array_ptr(arr); \ - memcpy(dst, ptr, size); \ - arr->size = elems; \ - \ - ptr += size; \ - UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); +const UPB_DESC(OneofOptions) * upb_OneofDef_Options(const upb_OneofDef* o) { + return o->opts; +} -#define FASTDECODE_FIXED(d, ptr, msg, table, hasbits, data, tagbytes, \ - valbytes, card, unpacked, packed) \ - if (card == CARD_p) { \ - FASTDECODE_PACKEDFIXED(d, ptr, msg, table, hasbits, data, tagbytes, \ - valbytes, unpacked); \ - } else { \ - FASTDECODE_UNPACKEDFIXED(d, ptr, msg, table, hasbits, data, tagbytes, \ - valbytes, card, packed); \ - } +bool upb_OneofDef_HasOptions(const upb_OneofDef* o) { + return o->opts != (void*)kUpbDefOptDefault; +} -/* Generate all combinations: - * {s,o,r,p} x {f4,f8} x {1bt,2bt} */ +const char* upb_OneofDef_FullName(const upb_OneofDef* o) { + return o->full_name; +} -#define F(card, valbytes, tagbytes) \ - UPB_NOINLINE \ - const char* upb_p##card##f##valbytes##_##tagbytes##bt(UPB_PARSE_PARAMS) { \ - FASTDECODE_FIXED(d, ptr, msg, table, hasbits, data, tagbytes, valbytes, \ - CARD_##card, upb_ppf##valbytes##_##tagbytes##bt, \ - upb_prf##valbytes##_##tagbytes##bt); \ - } +const char* upb_OneofDef_Name(const upb_OneofDef* o) { + return _upb_DefBuilder_FullToShort(o->full_name); +} -#define TYPES(card, tagbytes) \ - F(card, 4, tagbytes) \ - F(card, 8, tagbytes) +const upb_MessageDef* upb_OneofDef_ContainingType(const upb_OneofDef* o) { + return o->parent; +} -#define TAGBYTES(card) \ - TYPES(card, 1) \ - TYPES(card, 2) +int upb_OneofDef_FieldCount(const upb_OneofDef* o) { return o->field_count; } -TAGBYTES(s) -TAGBYTES(o) -TAGBYTES(r) -TAGBYTES(p) +const upb_FieldDef* upb_OneofDef_Field(const upb_OneofDef* o, int i) { + UPB_ASSERT(i < o->field_count); + return o->fields[i]; +} -#undef F -#undef TYPES -#undef TAGBYTES -#undef FASTDECODE_UNPACKEDFIXED -#undef FASTDECODE_PACKEDFIXED +int upb_OneofDef_numfields(const upb_OneofDef* o) { return o->field_count; } -/* string fields **************************************************************/ +uint32_t upb_OneofDef_Index(const upb_OneofDef* o) { + // Compute index in our parent's array. + return o - upb_MessageDef_Oneof(o->parent, 0); +} -typedef const char* fastdecode_copystr_func(struct upb_Decoder* d, - const char* ptr, upb_Message* msg, - const upb_MiniTable* table, - uint64_t hasbits, - upb_StringView* dst); +bool upb_OneofDef_IsSynthetic(const upb_OneofDef* o) { return o->synthetic; } -UPB_NOINLINE -static const char* fastdecode_verifyutf8(upb_Decoder* d, const char* ptr, - upb_Message* msg, intptr_t table, - uint64_t hasbits, uint64_t data) { - upb_StringView* dst = (upb_StringView*)data; - if (!_upb_Decoder_VerifyUtf8Inline(dst->data, dst->size)) { - _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_BadUtf8); +const upb_FieldDef* upb_OneofDef_LookupNameWithSize(const upb_OneofDef* o, + const char* name, + size_t size) { + upb_value val; + return upb_strtable_lookup2(&o->ntof, name, size, &val) + ? upb_value_getptr(val) + : NULL; +} + +const upb_FieldDef* upb_OneofDef_LookupName(const upb_OneofDef* o, + const char* name) { + return upb_OneofDef_LookupNameWithSize(o, name, strlen(name)); +} + +const upb_FieldDef* upb_OneofDef_LookupNumber(const upb_OneofDef* o, + uint32_t num) { + upb_value val; + return upb_inttable_lookup(&o->itof, num, &val) ? upb_value_getptr(val) + : NULL; +} + +void _upb_OneofDef_Insert(upb_DefBuilder* ctx, upb_OneofDef* o, + const upb_FieldDef* f, const char* name, + size_t size) { + o->field_count++; + if (_upb_FieldDef_IsProto3Optional(f)) o->synthetic = true; + + const int number = upb_FieldDef_Number(f); + const upb_value v = upb_value_constptr(f); + + // TODO(salo): This lookup is unfortunate because we also perform it when + // inserting into the message's table. Unfortunately that step occurs after + // this one and moving things around could be tricky so let's leave it for + // a future refactoring. + const bool number_exists = upb_inttable_lookup(&o->itof, number, NULL); + if (UPB_UNLIKELY(number_exists)) { + _upb_DefBuilder_Errf(ctx, "oneof fields have the same number (%d)", number); + } + + // TODO(salo): More redundant work happening here. + const bool name_exists = upb_strtable_lookup2(&o->ntof, name, size, NULL); + if (UPB_UNLIKELY(name_exists)) { + _upb_DefBuilder_Errf(ctx, "oneof fields have the same name (%.*s)", + (int)size, name); + } + + const bool ok = upb_inttable_insert(&o->itof, number, v, ctx->arena) && + upb_strtable_insert(&o->ntof, name, size, v, ctx->arena); + if (UPB_UNLIKELY(!ok)) { + _upb_DefBuilder_OomErr(ctx); } - UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); } -#define FASTDECODE_LONGSTRING(d, ptr, msg, table, hasbits, dst, validate_utf8) \ - int size = (uint8_t)ptr[0]; /* Could plumb through hasbits. */ \ - ptr++; \ - if (size & 0x80) { \ - ptr = fastdecode_longsize(ptr, &size); \ - } \ - \ - if (UPB_UNLIKELY(!upb_EpsCopyInputStream_CheckSize(&d->input, ptr, size))) { \ - dst->size = 0; \ - _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \ - } \ - \ - const char* s_ptr = ptr; \ - ptr = upb_EpsCopyInputStream_ReadString(&d->input, &s_ptr, size, &d->arena); \ - if (!ptr) _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); \ - dst->data = s_ptr; \ - dst->size = size; \ - \ - if (validate_utf8) { \ - data = (uint64_t)dst; \ - UPB_MUSTTAIL return fastdecode_verifyutf8(UPB_PARSE_ARGS); \ - } else { \ - UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); \ +// Returns the synthetic count. +size_t _upb_OneofDefs_Finalize(upb_DefBuilder* ctx, upb_MessageDef* m) { + int synthetic_count = 0; + + for (int i = 0; i < upb_MessageDef_OneofCount(m); i++) { + upb_OneofDef* o = (upb_OneofDef*)upb_MessageDef_Oneof(m, i); + + if (o->synthetic && o->field_count != 1) { + _upb_DefBuilder_Errf(ctx, + "Synthetic oneofs must have one field, not %d: %s", + o->field_count, upb_OneofDef_Name(o)); + } + + if (o->synthetic) { + synthetic_count++; + } else if (synthetic_count != 0) { + _upb_DefBuilder_Errf( + ctx, "Synthetic oneofs must be after all other oneofs: %s", + upb_OneofDef_Name(o)); + } + + o->fields = + _upb_DefBuilder_Alloc(ctx, sizeof(upb_FieldDef*) * o->field_count); + o->field_count = 0; } -UPB_NOINLINE -static const char* fastdecode_longstring_utf8(struct upb_Decoder* d, - const char* ptr, upb_Message* msg, - intptr_t table, uint64_t hasbits, - uint64_t data) { - upb_StringView* dst = (upb_StringView*)data; - FASTDECODE_LONGSTRING(d, ptr, msg, table, hasbits, dst, true); + for (int i = 0; i < upb_MessageDef_FieldCount(m); i++) { + const upb_FieldDef* f = upb_MessageDef_Field(m, i); + upb_OneofDef* o = (upb_OneofDef*)upb_FieldDef_ContainingOneof(f); + if (o) { + o->fields[o->field_count++] = f; + } + } + + return synthetic_count; } -UPB_NOINLINE -static const char* fastdecode_longstring_noutf8( - struct upb_Decoder* d, const char* ptr, upb_Message* msg, intptr_t table, - uint64_t hasbits, uint64_t data) { - upb_StringView* dst = (upb_StringView*)data; - FASTDECODE_LONGSTRING(d, ptr, msg, table, hasbits, dst, false); +static void create_oneofdef(upb_DefBuilder* ctx, upb_MessageDef* m, + const UPB_DESC(OneofDescriptorProto) * oneof_proto, + const upb_OneofDef* _o) { + upb_OneofDef* o = (upb_OneofDef*)_o; + upb_StringView name = UPB_DESC(OneofDescriptorProto_name)(oneof_proto); + + o->parent = m; + o->full_name = + _upb_DefBuilder_MakeFullName(ctx, upb_MessageDef_FullName(m), name); + o->field_count = 0; + o->synthetic = false; + + UPB_DEF_SET_OPTIONS(o->opts, OneofDescriptorProto, OneofOptions, oneof_proto); + + if (upb_MessageDef_FindByNameWithSize(m, name.data, name.size, NULL, NULL)) { + _upb_DefBuilder_Errf(ctx, "duplicate oneof name (%s)", o->full_name); + } + + upb_value v = _upb_DefType_Pack(o, UPB_DEFTYPE_ONEOF); + bool ok = _upb_MessageDef_Insert(m, name.data, name.size, v, ctx->arena); + if (!ok) _upb_DefBuilder_OomErr(ctx); + + ok = upb_inttable_init(&o->itof, ctx->arena); + if (!ok) _upb_DefBuilder_OomErr(ctx); + + ok = upb_strtable_init(&o->ntof, 4, ctx->arena); + if (!ok) _upb_DefBuilder_OomErr(ctx); } -UPB_FORCEINLINE -static void fastdecode_docopy(upb_Decoder* d, const char* ptr, uint32_t size, - int copy, char* data, upb_StringView* dst) { - d->arena.head.ptr += copy; - dst->data = data; - UPB_UNPOISON_MEMORY_REGION(data, copy); - memcpy(data, ptr, copy); - UPB_POISON_MEMORY_REGION(data + size, copy - size); +// Allocate and initialize an array of |n| oneof defs. +upb_OneofDef* _upb_OneofDefs_New( + upb_DefBuilder* ctx, int n, + const UPB_DESC(OneofDescriptorProto) * const* protos, upb_MessageDef* m) { + _upb_DefType_CheckPadding(sizeof(upb_OneofDef)); + + upb_OneofDef* o = _upb_DefBuilder_Alloc(ctx, sizeof(upb_OneofDef) * n); + for (int i = 0; i < n; i++) { + create_oneofdef(ctx, m, protos[i], &o[i]); + } + return o; } -#define FASTDECODE_COPYSTRING(d, ptr, msg, table, hasbits, data, tagbytes, \ - card, validate_utf8) \ - upb_StringView* dst; \ - fastdecode_arr farr; \ - int64_t size; \ - size_t arena_has; \ - size_t common_has; \ - char* buf; \ - \ - UPB_ASSERT(!upb_EpsCopyInputStream_AliasingAvailable(&d->input, ptr, 0)); \ - UPB_ASSERT(fastdecode_checktag(data, tagbytes)); \ - \ - dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, \ - sizeof(upb_StringView), card); \ - \ - again: \ - if (card == CARD_r) { \ - dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_StringView)); \ - } \ - \ - size = (uint8_t)ptr[tagbytes]; \ - ptr += tagbytes + 1; \ - dst->size = size; \ - \ - buf = d->arena.head.ptr; \ - arena_has = _upb_ArenaHas(&d->arena); \ - common_has = UPB_MIN(arena_has, \ - upb_EpsCopyInputStream_BytesAvailable(&d->input, ptr)); \ - \ - if (UPB_LIKELY(size <= 15 - tagbytes)) { \ - if (arena_has < 16) goto longstr; \ - d->arena.head.ptr += 16; \ - memcpy(buf, ptr - tagbytes - 1, 16); \ - dst->data = buf + tagbytes + 1; \ - } else if (UPB_LIKELY(size <= 32)) { \ - if (UPB_UNLIKELY(common_has < 32)) goto longstr; \ - fastdecode_docopy(d, ptr, size, 32, buf, dst); \ - } else if (UPB_LIKELY(size <= 64)) { \ - if (UPB_UNLIKELY(common_has < 64)) goto longstr; \ - fastdecode_docopy(d, ptr, size, 64, buf, dst); \ - } else if (UPB_LIKELY(size < 128)) { \ - if (UPB_UNLIKELY(common_has < 128)) goto longstr; \ - fastdecode_docopy(d, ptr, size, 128, buf, dst); \ - } else { \ - goto longstr; \ - } \ - \ - ptr += size; \ - \ - if (card == CARD_r) { \ - if (validate_utf8 && \ - !_upb_Decoder_VerifyUtf8Inline(dst->data, dst->size)) { \ - _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_BadUtf8); \ - } \ - fastdecode_nextret ret = fastdecode_nextrepeated( \ - d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_StringView)); \ - switch (ret.next) { \ - case FD_NEXT_SAMEFIELD: \ - dst = ret.dst; \ - goto again; \ - case FD_NEXT_OTHERFIELD: \ - data = ret.tag; \ - UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); \ - case FD_NEXT_ATLIMIT: \ - return ptr; \ - } \ - } \ - \ - if (card != CARD_r && validate_utf8) { \ - data = (uint64_t)dst; \ - UPB_MUSTTAIL return fastdecode_verifyutf8(UPB_PARSE_ARGS); \ - } \ - \ - UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); \ - \ - longstr: \ - if (card == CARD_r) { \ - fastdecode_commitarr(dst + 1, &farr, sizeof(upb_StringView)); \ - } \ - ptr--; \ - if (validate_utf8) { \ - UPB_MUSTTAIL return fastdecode_longstring_utf8(d, ptr, msg, table, \ - hasbits, (uint64_t)dst); \ - } else { \ - UPB_MUSTTAIL return fastdecode_longstring_noutf8(d, ptr, msg, table, \ - hasbits, (uint64_t)dst); \ - } -#define FASTDECODE_STRING(d, ptr, msg, table, hasbits, data, tagbytes, card, \ - copyfunc, validate_utf8) \ - upb_StringView* dst; \ - fastdecode_arr farr; \ - int64_t size; \ - \ - if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { \ - RETURN_GENERIC("string field tag mismatch\n"); \ - } \ - \ - if (UPB_UNLIKELY( \ - !upb_EpsCopyInputStream_AliasingAvailable(&d->input, ptr, 0))) { \ - UPB_MUSTTAIL return copyfunc(UPB_PARSE_ARGS); \ - } \ - \ - dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, \ - sizeof(upb_StringView), card); \ - \ - again: \ - if (card == CARD_r) { \ - dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_StringView)); \ - } \ - \ - size = (int8_t)ptr[tagbytes]; \ - ptr += tagbytes + 1; \ - \ - if (UPB_UNLIKELY( \ - !upb_EpsCopyInputStream_AliasingAvailable(&d->input, ptr, size))) { \ - ptr--; \ - if (validate_utf8) { \ - return fastdecode_longstring_utf8(d, ptr, msg, table, hasbits, \ - (uint64_t)dst); \ - } else { \ - return fastdecode_longstring_noutf8(d, ptr, msg, table, hasbits, \ - (uint64_t)dst); \ - } \ - } \ - \ - dst->data = ptr; \ - dst->size = size; \ - ptr = upb_EpsCopyInputStream_ReadStringAliased(&d->input, &dst->data, \ - dst->size); \ - \ - if (card == CARD_r) { \ - if (validate_utf8 && \ - !_upb_Decoder_VerifyUtf8Inline(dst->data, dst->size)) { \ - _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_BadUtf8); \ - } \ - fastdecode_nextret ret = fastdecode_nextrepeated( \ - d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_StringView)); \ - switch (ret.next) { \ - case FD_NEXT_SAMEFIELD: \ - dst = ret.dst; \ - goto again; \ - case FD_NEXT_OTHERFIELD: \ - data = ret.tag; \ - UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); \ - case FD_NEXT_ATLIMIT: \ - return ptr; \ - } \ - } \ - \ - if (card != CARD_r && validate_utf8) { \ - data = (uint64_t)dst; \ - UPB_MUSTTAIL return fastdecode_verifyutf8(UPB_PARSE_ARGS); \ - } \ - \ - UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); -/* Generate all combinations: - * {p,c} x {s,o,r} x {s, b} x {1bt,2bt} */ +// Must be last. -#define s_VALIDATE true -#define b_VALIDATE false +struct upb_ServiceDef { + const UPB_DESC(ServiceOptions) * opts; + const upb_FileDef* file; + const char* full_name; + upb_MethodDef* methods; + int method_count; + int index; +}; -#define F(card, tagbytes, type) \ - UPB_NOINLINE \ - const char* upb_c##card##type##_##tagbytes##bt(UPB_PARSE_PARAMS) { \ - FASTDECODE_COPYSTRING(d, ptr, msg, table, hasbits, data, tagbytes, \ - CARD_##card, type##_VALIDATE); \ - } \ - const char* upb_p##card##type##_##tagbytes##bt(UPB_PARSE_PARAMS) { \ - FASTDECODE_STRING(d, ptr, msg, table, hasbits, data, tagbytes, \ - CARD_##card, upb_c##card##type##_##tagbytes##bt, \ - type##_VALIDATE); \ - } +upb_ServiceDef* _upb_ServiceDef_At(const upb_ServiceDef* s, int index) { + return (upb_ServiceDef*)&s[index]; +} -#define UTF8(card, tagbytes) \ - F(card, tagbytes, s) \ - F(card, tagbytes, b) +const UPB_DESC(ServiceOptions) * + upb_ServiceDef_Options(const upb_ServiceDef* s) { + return s->opts; +} -#define TAGBYTES(card) \ - UTF8(card, 1) \ - UTF8(card, 2) +bool upb_ServiceDef_HasOptions(const upb_ServiceDef* s) { + return s->opts != (void*)kUpbDefOptDefault; +} -TAGBYTES(s) -TAGBYTES(o) -TAGBYTES(r) +const char* upb_ServiceDef_FullName(const upb_ServiceDef* s) { + return s->full_name; +} -#undef s_VALIDATE -#undef b_VALIDATE -#undef F -#undef TAGBYTES -#undef FASTDECODE_LONGSTRING -#undef FASTDECODE_COPYSTRING -#undef FASTDECODE_STRING +const char* upb_ServiceDef_Name(const upb_ServiceDef* s) { + return _upb_DefBuilder_FullToShort(s->full_name); +} -/* message fields *************************************************************/ +int upb_ServiceDef_Index(const upb_ServiceDef* s) { return s->index; } -UPB_INLINE -upb_Message* decode_newmsg_ceil(upb_Decoder* d, const upb_MiniTable* l, - int msg_ceil_bytes) { - size_t size = l->size + sizeof(upb_Message_Internal); - char* msg_data; - if (UPB_LIKELY(msg_ceil_bytes > 0 && - _upb_ArenaHas(&d->arena) >= msg_ceil_bytes)) { - UPB_ASSERT(size <= (size_t)msg_ceil_bytes); - msg_data = d->arena.head.ptr; - d->arena.head.ptr += size; - UPB_UNPOISON_MEMORY_REGION(msg_data, msg_ceil_bytes); - memset(msg_data, 0, msg_ceil_bytes); - UPB_POISON_MEMORY_REGION(msg_data + size, msg_ceil_bytes - size); - } else { - msg_data = (char*)upb_Arena_Malloc(&d->arena, size); - memset(msg_data, 0, size); - } - return msg_data + sizeof(upb_Message_Internal); +const upb_FileDef* upb_ServiceDef_File(const upb_ServiceDef* s) { + return s->file; } -typedef struct { - intptr_t table; - upb_Message* msg; -} fastdecode_submsgdata; +int upb_ServiceDef_MethodCount(const upb_ServiceDef* s) { + return s->method_count; +} -UPB_FORCEINLINE -static const char* fastdecode_tosubmsg(upb_EpsCopyInputStream* e, - const char* ptr, void* ctx) { - upb_Decoder* d = (upb_Decoder*)e; - fastdecode_submsgdata* submsg = ctx; - ptr = fastdecode_dispatch(d, ptr, submsg->msg, submsg->table, 0, 0); - UPB_ASSUME(ptr != NULL); - return ptr; +const upb_MethodDef* upb_ServiceDef_Method(const upb_ServiceDef* s, int i) { + return (i < 0 || i >= s->method_count) ? NULL + : _upb_MethodDef_At(s->methods, i); } -#define FASTDECODE_SUBMSG(d, ptr, msg, table, hasbits, data, tagbytes, \ - msg_ceil_bytes, card) \ - \ - if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { \ - RETURN_GENERIC("submessage field tag mismatch\n"); \ - } \ - \ - if (--d->depth == 0) { \ - _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_MaxDepthExceeded); \ - } \ - \ - upb_Message** dst; \ - uint32_t submsg_idx = (data >> 16) & 0xff; \ - const upb_MiniTable* tablep = decode_totablep(table); \ - const upb_MiniTable* subtablep = tablep->subs[submsg_idx].submsg; \ - fastdecode_submsgdata submsg = {decode_totable(subtablep)}; \ - fastdecode_arr farr; \ - \ - if (subtablep->table_mask == (uint8_t)-1) { \ - RETURN_GENERIC("submessage doesn't have fast tables."); \ - } \ - \ - dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, \ - sizeof(upb_Message*), card); \ - \ - if (card == CARD_s) { \ - *(uint32_t*)msg |= hasbits; \ - hasbits = 0; \ - } \ - \ - again: \ - if (card == CARD_r) { \ - dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_Message*)); \ - } \ - \ - submsg.msg = *dst; \ - \ - if (card == CARD_r || UPB_LIKELY(!submsg.msg)) { \ - *dst = submsg.msg = decode_newmsg_ceil(d, subtablep, msg_ceil_bytes); \ - } \ - \ - ptr += tagbytes; \ - ptr = fastdecode_delimited(d, ptr, fastdecode_tosubmsg, &submsg); \ - \ - if (UPB_UNLIKELY(ptr == NULL || d->end_group != DECODE_NOGROUP)) { \ - _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \ - } \ - \ - if (card == CARD_r) { \ - fastdecode_nextret ret = fastdecode_nextrepeated( \ - d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_Message*)); \ - switch (ret.next) { \ - case FD_NEXT_SAMEFIELD: \ - dst = ret.dst; \ - goto again; \ - case FD_NEXT_OTHERFIELD: \ - d->depth++; \ - data = ret.tag; \ - UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); \ - case FD_NEXT_ATLIMIT: \ - d->depth++; \ - return ptr; \ - } \ - } \ - \ - d->depth++; \ - UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); - -#define F(card, tagbytes, size_ceil, ceil_arg) \ - const char* upb_p##card##m_##tagbytes##bt_max##size_ceil##b( \ - UPB_PARSE_PARAMS) { \ - FASTDECODE_SUBMSG(d, ptr, msg, table, hasbits, data, tagbytes, ceil_arg, \ - CARD_##card); \ +const upb_MethodDef* upb_ServiceDef_FindMethodByName(const upb_ServiceDef* s, + const char* name) { + for (int i = 0; i < s->method_count; i++) { + const upb_MethodDef* m = _upb_MethodDef_At(s->methods, i); + if (strcmp(name, upb_MethodDef_Name(m)) == 0) { + return m; + } } + return NULL; +} -#define SIZES(card, tagbytes) \ - F(card, tagbytes, 64, 64) \ - F(card, tagbytes, 128, 128) \ - F(card, tagbytes, 192, 192) \ - F(card, tagbytes, 256, 256) \ - F(card, tagbytes, max, -1) +static void create_service(upb_DefBuilder* ctx, + const UPB_DESC(ServiceDescriptorProto) * svc_proto, + upb_ServiceDef* s) { + upb_StringView name; + size_t n; -#define TAGBYTES(card) \ - SIZES(card, 1) \ - SIZES(card, 2) + // Must happen before _upb_DefBuilder_Add() + s->file = _upb_DefBuilder_File(ctx); -TAGBYTES(s) -TAGBYTES(o) -TAGBYTES(r) + name = UPB_DESC(ServiceDescriptorProto_name)(svc_proto); + const char* package = _upb_FileDef_RawPackage(s->file); + s->full_name = _upb_DefBuilder_MakeFullName(ctx, package, name); + _upb_DefBuilder_Add(ctx, s->full_name, + _upb_DefType_Pack(s, UPB_DEFTYPE_SERVICE)); -#undef TAGBYTES -#undef SIZES -#undef F -#undef FASTDECODE_SUBMSG + const UPB_DESC(MethodDescriptorProto)* const* methods = + UPB_DESC(ServiceDescriptorProto_method)(svc_proto, &n); + s->method_count = n; + s->methods = _upb_MethodDefs_New(ctx, n, methods, s); -#endif /* UPB_FASTTABLE */ + UPB_DEF_SET_OPTIONS(s->opts, ServiceDescriptorProto, ServiceOptions, + svc_proto); +} -// We encode backwards, to avoid pre-computing lengths (one-pass encode). +upb_ServiceDef* _upb_ServiceDefs_New( + upb_DefBuilder* ctx, int n, + const UPB_DESC(ServiceDescriptorProto) * const* protos) { + _upb_DefType_CheckPadding(sizeof(upb_ServiceDef)); + + upb_ServiceDef* s = _upb_DefBuilder_Alloc(ctx, sizeof(upb_ServiceDef) * n); + for (int i = 0; i < n; i++) { + create_service(ctx, protos[i], &s[i]); + s[i].index = i; + } + return s; +} #include @@ -12497,2170 +11715,2962 @@ TAGBYTES(r) // Must be last. -#define UPB_PB_VARINT_MAX_LEN 10 - -UPB_NOINLINE -static size_t encode_varint64(uint64_t val, char* buf) { - size_t i = 0; - do { - uint8_t byte = val & 0x7fU; - val >>= 7; - if (val) byte |= 0x80U; - buf[i++] = byte; - } while (val); - return i; -} +// A few fake field types for our tables. +enum { + kUpb_FakeFieldType_FieldNotFound = 0, + kUpb_FakeFieldType_MessageSetItem = 19, +}; -static uint32_t encode_zz32(int32_t n) { - return ((uint32_t)n << 1) ^ (n >> 31); -} -static uint64_t encode_zz64(int64_t n) { - return ((uint64_t)n << 1) ^ (n >> 63); -} +// DecodeOp: an action to be performed for a wire-type/field-type combination. +enum { + // Special ops: we don't write data to regular fields for these. + kUpb_DecodeOp_UnknownField = -1, + kUpb_DecodeOp_MessageSetItem = -2, -typedef struct { - upb_EncodeStatus status; - jmp_buf err; - upb_Arena* arena; - char *buf, *ptr, *limit; - int options; - int depth; - _upb_mapsorter sorter; -} upb_encstate; + // Scalar-only ops. + kUpb_DecodeOp_Scalar1Byte = 0, + kUpb_DecodeOp_Scalar4Byte = 2, + kUpb_DecodeOp_Scalar8Byte = 3, + kUpb_DecodeOp_Enum = 1, -static size_t upb_roundup_pow2(size_t bytes) { - size_t ret = 128; - while (ret < bytes) { - ret *= 2; - } - return ret; -} + // Scalar/repeated ops. + kUpb_DecodeOp_String = 4, + kUpb_DecodeOp_Bytes = 5, + kUpb_DecodeOp_SubMessage = 6, -UPB_NORETURN static void encode_err(upb_encstate* e, upb_EncodeStatus s) { - UPB_ASSERT(s != kUpb_EncodeStatus_Ok); - e->status = s; - UPB_LONGJMP(e->err, 1); -} + // Repeated-only ops (also see macros below). + kUpb_DecodeOp_PackedEnum = 13, +}; -UPB_NOINLINE -static void encode_growbuffer(upb_encstate* e, size_t bytes) { - size_t old_size = e->limit - e->buf; - size_t new_size = upb_roundup_pow2(bytes + (e->limit - e->ptr)); - char* new_buf = upb_Arena_Realloc(e->arena, e->buf, old_size, new_size); +// For packed fields it is helpful to be able to recover the lg2 of the data +// size from the op. +#define OP_FIXPCK_LG2(n) (n + 5) /* n in [2, 3] => op in [7, 8] */ +#define OP_VARPCK_LG2(n) (n + 9) /* n in [0, 2, 3] => op in [9, 11, 12] */ - if (!new_buf) encode_err(e, kUpb_EncodeStatus_OutOfMemory); +typedef union { + bool bool_val; + uint32_t uint32_val; + uint64_t uint64_val; + uint32_t size; +} wireval; - // We want previous data at the end, realloc() put it at the beginning. - // TODO(salo): This is somewhat inefficient since we are copying twice. - // Maybe create a realloc() that copies to the end of the new buffer? - if (old_size > 0) { - memmove(new_buf + new_size - old_size, e->buf, old_size); - } +static const char* _upb_Decoder_DecodeMessage(upb_Decoder* d, const char* ptr, + upb_Message* msg, + const upb_MiniTable* layout); - e->ptr = new_buf + new_size - (e->limit - e->ptr); - e->limit = new_buf + new_size; - e->buf = new_buf; +UPB_NORETURN static void* _upb_Decoder_ErrorJmp(upb_Decoder* d, + upb_DecodeStatus status) { + assert(status != kUpb_DecodeStatus_Ok); + d->status = status; + UPB_LONGJMP(d->err, 1); +} - e->ptr -= bytes; +const char* _upb_FastDecoder_ErrorJmp(upb_Decoder* d, int status) { + assert(status != kUpb_DecodeStatus_Ok); + d->status = status; + UPB_LONGJMP(d->err, 1); + return NULL; } -/* Call to ensure that at least "bytes" bytes are available for writing at - * e->ptr. Returns false if the bytes could not be allocated. */ -UPB_FORCEINLINE -static void encode_reserve(upb_encstate* e, size_t bytes) { - if ((size_t)(e->ptr - e->buf) < bytes) { - encode_growbuffer(e, bytes); - return; +static void _upb_Decoder_VerifyUtf8(upb_Decoder* d, const char* buf, int len) { + if (!_upb_Decoder_VerifyUtf8Inline(buf, len)) { + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_BadUtf8); } - - e->ptr -= bytes; } -/* Writes the given bytes to the buffer, handling reserve/advance. */ -static void encode_bytes(upb_encstate* e, const void* data, size_t len) { - if (len == 0) return; /* memcpy() with zero size is UB */ - encode_reserve(e, len); - memcpy(e->ptr, data, len); +static bool _upb_Decoder_Reserve(upb_Decoder* d, upb_Array* arr, size_t elem) { + bool need_realloc = arr->capacity - arr->size < elem; + if (need_realloc && !_upb_array_realloc(arr, arr->size + elem, &d->arena)) { + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); + } + return need_realloc; } -static void encode_fixed64(upb_encstate* e, uint64_t val) { - val = _upb_BigEndian_Swap64(val); - encode_bytes(e, &val, sizeof(uint64_t)); -} - -static void encode_fixed32(upb_encstate* e, uint32_t val) { - val = _upb_BigEndian_Swap32(val); - encode_bytes(e, &val, sizeof(uint32_t)); -} +typedef struct { + const char* ptr; + uint64_t val; +} _upb_DecodeLongVarintReturn; UPB_NOINLINE -static void encode_longvarint(upb_encstate* e, uint64_t val) { - size_t len; - char* start; - - encode_reserve(e, UPB_PB_VARINT_MAX_LEN); - len = encode_varint64(val, e->ptr); - start = e->ptr + UPB_PB_VARINT_MAX_LEN - len; - memmove(start, e->ptr, len); - e->ptr = start; +static _upb_DecodeLongVarintReturn _upb_Decoder_DecodeLongVarint( + const char* ptr, uint64_t val) { + _upb_DecodeLongVarintReturn ret = {NULL, 0}; + uint64_t byte; + int i; + for (i = 1; i < 10; i++) { + byte = (uint8_t)ptr[i]; + val += (byte - 1) << (i * 7); + if (!(byte & 0x80)) { + ret.ptr = ptr + i + 1; + ret.val = val; + return ret; + } + } + return ret; } UPB_FORCEINLINE -static void encode_varint(upb_encstate* e, uint64_t val) { - if (val < 128 && e->ptr != e->buf) { - --e->ptr; - *e->ptr = val; +static const char* _upb_Decoder_DecodeVarint(upb_Decoder* d, const char* ptr, + uint64_t* val) { + uint64_t byte = (uint8_t)*ptr; + if (UPB_LIKELY((byte & 0x80) == 0)) { + *val = byte; + return ptr + 1; } else { - encode_longvarint(e, val); + _upb_DecodeLongVarintReturn res = _upb_Decoder_DecodeLongVarint(ptr, byte); + if (!res.ptr) _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); + *val = res.val; + return res.ptr; } } -static void encode_double(upb_encstate* e, double d) { - uint64_t u64; - UPB_ASSERT(sizeof(double) == sizeof(uint64_t)); - memcpy(&u64, &d, sizeof(uint64_t)); - encode_fixed64(e, u64); -} - -static void encode_float(upb_encstate* e, float d) { - uint32_t u32; - UPB_ASSERT(sizeof(float) == sizeof(uint32_t)); - memcpy(&u32, &d, sizeof(uint32_t)); - encode_fixed32(e, u32); -} - -static void encode_tag(upb_encstate* e, uint32_t field_number, - uint8_t wire_type) { - encode_varint(e, (field_number << 3) | wire_type); -} - -static void encode_fixedarray(upb_encstate* e, const upb_Array* arr, - size_t elem_size, uint32_t tag) { - size_t bytes = arr->size * elem_size; - const char* data = _upb_array_constptr(arr); - const char* ptr = data + bytes - elem_size; - - if (tag || !_upb_IsLittleEndian()) { - while (true) { - if (elem_size == 4) { - uint32_t val; - memcpy(&val, ptr, sizeof(val)); - val = _upb_BigEndian_Swap32(val); - encode_bytes(e, &val, elem_size); - } else { - UPB_ASSERT(elem_size == 8); - uint64_t val; - memcpy(&val, ptr, sizeof(val)); - val = _upb_BigEndian_Swap64(val); - encode_bytes(e, &val, elem_size); - } - - if (tag) encode_varint(e, tag); - if (ptr == data) break; - ptr -= elem_size; - } +UPB_FORCEINLINE +static const char* _upb_Decoder_DecodeTag(upb_Decoder* d, const char* ptr, + uint32_t* val) { + uint64_t byte = (uint8_t)*ptr; + if (UPB_LIKELY((byte & 0x80) == 0)) { + *val = byte; + return ptr + 1; } else { - encode_bytes(e, data, bytes); + const char* start = ptr; + _upb_DecodeLongVarintReturn res = _upb_Decoder_DecodeLongVarint(ptr, byte); + if (!res.ptr || res.ptr - start > 5 || res.val > UINT32_MAX) { + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); + } + *val = res.val; + return res.ptr; } } -static void encode_message(upb_encstate* e, const upb_Message* msg, - const upb_MiniTable* m, size_t* size); - -static void encode_TaggedMessagePtr(upb_encstate* e, - upb_TaggedMessagePtr tagged, - const upb_MiniTable* m, size_t* size) { - if (upb_TaggedMessagePtr_IsEmpty(tagged)) { - m = &_kUpb_MiniTable_Empty; +UPB_FORCEINLINE +static const char* upb_Decoder_DecodeSize(upb_Decoder* d, const char* ptr, + uint32_t* size) { + uint64_t size64; + ptr = _upb_Decoder_DecodeVarint(d, ptr, &size64); + if (size64 >= INT32_MAX || + !upb_EpsCopyInputStream_CheckSize(&d->input, ptr, (int)size64)) { + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); } - encode_message(e, _upb_TaggedMessagePtr_GetMessage(tagged), m, size); + *size = size64; + return ptr; } -static void encode_scalar(upb_encstate* e, const void* _field_mem, - const upb_MiniTableSub* subs, - const upb_MiniTableField* f) { - const char* field_mem = _field_mem; - int wire_type; - -#define CASE(ctype, type, wtype, encodeval) \ - { \ - ctype val = *(ctype*)field_mem; \ - encode_##type(e, encodeval); \ - wire_type = wtype; \ - break; \ +static void _upb_Decoder_MungeInt32(wireval* val) { + if (!_upb_IsLittleEndian()) { + /* The next stage will memcpy(dst, &val, 4) */ + val->uint32_val = val->uint64_val; } +} - switch (f->UPB_PRIVATE(descriptortype)) { - case kUpb_FieldType_Double: - CASE(double, double, kUpb_WireType_64Bit, val); - case kUpb_FieldType_Float: - CASE(float, float, kUpb_WireType_32Bit, val); - case kUpb_FieldType_Int64: - case kUpb_FieldType_UInt64: - CASE(uint64_t, varint, kUpb_WireType_Varint, val); - case kUpb_FieldType_UInt32: - CASE(uint32_t, varint, kUpb_WireType_Varint, val); - case kUpb_FieldType_Int32: - case kUpb_FieldType_Enum: - CASE(int32_t, varint, kUpb_WireType_Varint, (int64_t)val); - case kUpb_FieldType_SFixed64: - case kUpb_FieldType_Fixed64: - CASE(uint64_t, fixed64, kUpb_WireType_64Bit, val); - case kUpb_FieldType_Fixed32: - case kUpb_FieldType_SFixed32: - CASE(uint32_t, fixed32, kUpb_WireType_32Bit, val); +static void _upb_Decoder_Munge(int type, wireval* val) { + switch (type) { case kUpb_FieldType_Bool: - CASE(bool, varint, kUpb_WireType_Varint, val); - case kUpb_FieldType_SInt32: - CASE(int32_t, varint, kUpb_WireType_Varint, encode_zz32(val)); - case kUpb_FieldType_SInt64: - CASE(int64_t, varint, kUpb_WireType_Varint, encode_zz64(val)); - case kUpb_FieldType_String: - case kUpb_FieldType_Bytes: { - upb_StringView view = *(upb_StringView*)field_mem; - encode_bytes(e, view.data, view.size); - encode_varint(e, view.size); - wire_type = kUpb_WireType_Delimited; + val->bool_val = val->uint64_val != 0; break; - } - case kUpb_FieldType_Group: { - size_t size; - upb_TaggedMessagePtr submsg = *(upb_TaggedMessagePtr*)field_mem; - const upb_MiniTable* subm = subs[f->UPB_PRIVATE(submsg_index)].submsg; - if (submsg == 0) { - return; - } - if (--e->depth == 0) encode_err(e, kUpb_EncodeStatus_MaxDepthExceeded); - encode_tag(e, f->number, kUpb_WireType_EndGroup); - encode_TaggedMessagePtr(e, submsg, subm, &size); - wire_type = kUpb_WireType_StartGroup; - e->depth++; + case kUpb_FieldType_SInt32: { + uint32_t n = val->uint64_val; + val->uint32_val = (n >> 1) ^ -(int32_t)(n & 1); break; } - case kUpb_FieldType_Message: { - size_t size; - upb_TaggedMessagePtr submsg = *(upb_TaggedMessagePtr*)field_mem; - const upb_MiniTable* subm = subs[f->UPB_PRIVATE(submsg_index)].submsg; - if (submsg == 0) { - return; - } - if (--e->depth == 0) encode_err(e, kUpb_EncodeStatus_MaxDepthExceeded); - encode_TaggedMessagePtr(e, submsg, subm, &size); - encode_varint(e, size); - wire_type = kUpb_WireType_Delimited; - e->depth++; + case kUpb_FieldType_SInt64: { + uint64_t n = val->uint64_val; + val->uint64_val = (n >> 1) ^ -(int64_t)(n & 1); break; } - default: - UPB_UNREACHABLE(); + case kUpb_FieldType_Int32: + case kUpb_FieldType_UInt32: + case kUpb_FieldType_Enum: + _upb_Decoder_MungeInt32(val); + break; } -#undef CASE - - encode_tag(e, f->number, wire_type); } -static void encode_array(upb_encstate* e, const upb_Message* msg, - const upb_MiniTableSub* subs, - const upb_MiniTableField* f) { - const upb_Array* arr = *UPB_PTR_AT(msg, f->offset, upb_Array*); - bool packed = f->mode & kUpb_LabelFlags_IsPacked; - size_t pre_len = e->limit - e->ptr; +static upb_Message* _upb_Decoder_NewSubMessage(upb_Decoder* d, + const upb_MiniTableSub* subs, + const upb_MiniTableField* field, + upb_TaggedMessagePtr* target) { + const upb_MiniTable* subl = subs[field->UPB_PRIVATE(submsg_index)].submsg; + UPB_ASSERT(subl); + upb_Message* msg = _upb_Message_New(subl, &d->arena); + if (!msg) _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); - if (arr == NULL || arr->size == 0) { - return; - } + // Extensions should not be unlinked. A message extension should not be + // registered until its sub-message type is available to be linked. + bool is_empty = subl == &_kUpb_MiniTable_Empty; + bool is_extension = field->mode & kUpb_LabelFlags_IsExtension; + UPB_ASSERT(!(is_empty && is_extension)); -#define VARINT_CASE(ctype, encode) \ - { \ - const ctype* start = _upb_array_constptr(arr); \ - const ctype* ptr = start + arr->size; \ - uint32_t tag = packed ? 0 : (f->number << 3) | kUpb_WireType_Varint; \ - do { \ - ptr--; \ - encode_varint(e, encode); \ - if (tag) encode_varint(e, tag); \ - } while (ptr != start); \ - } \ - break; + if (is_empty && !(d->options & kUpb_DecodeOption_ExperimentalAllowUnlinked)) { + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_UnlinkedSubMessage); + } -#define TAG(wire_type) (packed ? 0 : (f->number << 3 | wire_type)) + upb_TaggedMessagePtr tagged = _upb_TaggedMessagePtr_Pack(msg, is_empty); + memcpy(target, &tagged, sizeof(tagged)); + return msg; +} - switch (f->UPB_PRIVATE(descriptortype)) { - case kUpb_FieldType_Double: - encode_fixedarray(e, arr, sizeof(double), TAG(kUpb_WireType_64Bit)); - break; - case kUpb_FieldType_Float: - encode_fixedarray(e, arr, sizeof(float), TAG(kUpb_WireType_32Bit)); - break; - case kUpb_FieldType_SFixed64: - case kUpb_FieldType_Fixed64: - encode_fixedarray(e, arr, sizeof(uint64_t), TAG(kUpb_WireType_64Bit)); - break; - case kUpb_FieldType_Fixed32: - case kUpb_FieldType_SFixed32: - encode_fixedarray(e, arr, sizeof(uint32_t), TAG(kUpb_WireType_32Bit)); - break; - case kUpb_FieldType_Int64: - case kUpb_FieldType_UInt64: - VARINT_CASE(uint64_t, *ptr); - case kUpb_FieldType_UInt32: - VARINT_CASE(uint32_t, *ptr); - case kUpb_FieldType_Int32: - case kUpb_FieldType_Enum: - VARINT_CASE(int32_t, (int64_t)*ptr); - case kUpb_FieldType_Bool: - VARINT_CASE(bool, *ptr); - case kUpb_FieldType_SInt32: - VARINT_CASE(int32_t, encode_zz32(*ptr)); - case kUpb_FieldType_SInt64: - VARINT_CASE(int64_t, encode_zz64(*ptr)); - case kUpb_FieldType_String: - case kUpb_FieldType_Bytes: { - const upb_StringView* start = _upb_array_constptr(arr); - const upb_StringView* ptr = start + arr->size; - do { - ptr--; - encode_bytes(e, ptr->data, ptr->size); - encode_varint(e, ptr->size); - encode_tag(e, f->number, kUpb_WireType_Delimited); - } while (ptr != start); - return; - } - case kUpb_FieldType_Group: { - const upb_TaggedMessagePtr* start = _upb_array_constptr(arr); - const upb_TaggedMessagePtr* ptr = start + arr->size; - const upb_MiniTable* subm = subs[f->UPB_PRIVATE(submsg_index)].submsg; - if (--e->depth == 0) encode_err(e, kUpb_EncodeStatus_MaxDepthExceeded); - do { - size_t size; - ptr--; - encode_tag(e, f->number, kUpb_WireType_EndGroup); - encode_TaggedMessagePtr(e, *ptr, subm, &size); - encode_tag(e, f->number, kUpb_WireType_StartGroup); - } while (ptr != start); - e->depth++; - return; - } - case kUpb_FieldType_Message: { - const upb_TaggedMessagePtr* start = _upb_array_constptr(arr); - const upb_TaggedMessagePtr* ptr = start + arr->size; - const upb_MiniTable* subm = subs[f->UPB_PRIVATE(submsg_index)].submsg; - if (--e->depth == 0) encode_err(e, kUpb_EncodeStatus_MaxDepthExceeded); - do { - size_t size; - ptr--; - encode_TaggedMessagePtr(e, *ptr, subm, &size); - encode_varint(e, size); - encode_tag(e, f->number, kUpb_WireType_Delimited); - } while (ptr != start); - e->depth++; - return; - } - } -#undef VARINT_CASE - - if (packed) { - encode_varint(e, e->limit - e->ptr - pre_len); - encode_tag(e, f->number, kUpb_WireType_Delimited); +static upb_Message* _upb_Decoder_ReuseSubMessage( + upb_Decoder* d, const upb_MiniTableSub* subs, + const upb_MiniTableField* field, upb_TaggedMessagePtr* target) { + upb_TaggedMessagePtr tagged = *target; + const upb_MiniTable* subl = subs[field->UPB_PRIVATE(submsg_index)].submsg; + UPB_ASSERT(subl); + if (!upb_TaggedMessagePtr_IsEmpty(tagged) || subl == &_kUpb_MiniTable_Empty) { + return _upb_TaggedMessagePtr_GetMessage(tagged); } -} -static void encode_mapentry(upb_encstate* e, uint32_t number, - const upb_MiniTable* layout, - const upb_MapEntry* ent) { - const upb_MiniTableField* key_field = &layout->fields[0]; - const upb_MiniTableField* val_field = &layout->fields[1]; - size_t pre_len = e->limit - e->ptr; + // We found an empty message from a previous parse that was performed before + // this field was linked. But it is linked now, so we want to allocate a new + // message of the correct type and promote data into it before continuing. + upb_Message* existing = _upb_TaggedMessagePtr_GetEmptyMessage(tagged); + upb_Message* promoted = _upb_Decoder_NewSubMessage(d, subs, field, target); size_t size; - encode_scalar(e, &ent->data.v, layout->subs, val_field); - encode_scalar(e, &ent->data.k, layout->subs, key_field); - size = (e->limit - e->ptr) - pre_len; - encode_varint(e, size); - encode_tag(e, number, kUpb_WireType_Delimited); + const char* unknown = upb_Message_GetUnknown(existing, &size); + upb_DecodeStatus status = upb_Decode(unknown, size, promoted, subl, d->extreg, + d->options, &d->arena); + if (status != kUpb_DecodeStatus_Ok) _upb_Decoder_ErrorJmp(d, status); + return promoted; } -static void encode_map(upb_encstate* e, const upb_Message* msg, - const upb_MiniTableSub* subs, - const upb_MiniTableField* f) { - const upb_Map* map = *UPB_PTR_AT(msg, f->offset, const upb_Map*); - const upb_MiniTable* layout = subs[f->UPB_PRIVATE(submsg_index)].submsg; - UPB_ASSERT(layout->field_count == 2); - - if (map == NULL) return; - - if (e->options & kUpb_EncodeOption_Deterministic) { - _upb_sortedmap sorted; - _upb_mapsorter_pushmap(&e->sorter, - layout->fields[0].UPB_PRIVATE(descriptortype), map, - &sorted); - upb_MapEntry ent; - while (_upb_sortedmap_next(&e->sorter, map, &sorted, &ent)) { - encode_mapentry(e, f->number, layout, &ent); - } - _upb_mapsorter_popmap(&e->sorter, &sorted); - } else { - intptr_t iter = UPB_STRTABLE_BEGIN; - upb_StringView key; - upb_value val; - while (upb_strtable_next2(&map->table, &key, &val, &iter)) { - upb_MapEntry ent; - _upb_map_fromkey(key, &ent.data.k, map->key_size); - _upb_map_fromvalue(val, &ent.data.v, map->val_size); - encode_mapentry(e, f->number, layout, &ent); - } - } +static const char* _upb_Decoder_ReadString(upb_Decoder* d, const char* ptr, + int size, upb_StringView* str) { + const char* str_ptr = ptr; + ptr = upb_EpsCopyInputStream_ReadString(&d->input, &str_ptr, size, &d->arena); + if (!ptr) _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); + str->data = str_ptr; + str->size = size; + return ptr; } -static bool encode_shouldencode(upb_encstate* e, const upb_Message* msg, - const upb_MiniTableSub* subs, - const upb_MiniTableField* f) { - if (f->presence == 0) { - /* Proto3 presence or map/array. */ - const void* mem = UPB_PTR_AT(msg, f->offset, void); - switch (_upb_MiniTableField_GetRep(f)) { - case kUpb_FieldRep_1Byte: { - char ch; - memcpy(&ch, mem, 1); - return ch != 0; - } - case kUpb_FieldRep_4Byte: { - uint32_t u32; - memcpy(&u32, mem, 4); - return u32 != 0; - } - case kUpb_FieldRep_8Byte: { - uint64_t u64; - memcpy(&u64, mem, 8); - return u64 != 0; - } - case kUpb_FieldRep_StringView: { - const upb_StringView* str = (const upb_StringView*)mem; - return str->size != 0; - } - default: - UPB_UNREACHABLE(); - } - } else if (f->presence > 0) { - /* Proto2 presence: hasbit. */ - return _upb_hasbit_field(msg, f); - } else { - /* Field is in a oneof. */ - return _upb_getoneofcase_field(msg, f) == f->number; +UPB_FORCEINLINE +static const char* _upb_Decoder_RecurseSubMessage(upb_Decoder* d, + const char* ptr, + upb_Message* submsg, + const upb_MiniTable* subl, + uint32_t expected_end_group) { + if (--d->depth < 0) { + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_MaxDepthExceeded); } -} - -static void encode_field(upb_encstate* e, const upb_Message* msg, - const upb_MiniTableSub* subs, - const upb_MiniTableField* field) { - switch (upb_FieldMode_Get(field)) { - case kUpb_FieldMode_Array: - encode_array(e, msg, subs, field); - break; - case kUpb_FieldMode_Map: - encode_map(e, msg, subs, field); - break; - case kUpb_FieldMode_Scalar: - encode_scalar(e, UPB_PTR_AT(msg, field->offset, void), subs, field); - break; - default: - UPB_UNREACHABLE(); + ptr = _upb_Decoder_DecodeMessage(d, ptr, submsg, subl); + d->depth++; + if (d->end_group != expected_end_group) { + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); } + return ptr; } -static void encode_msgset_item(upb_encstate* e, - const upb_Message_Extension* ext) { - size_t size; - encode_tag(e, kUpb_MsgSet_Item, kUpb_WireType_EndGroup); - encode_message(e, ext->data.ptr, ext->ext->sub.submsg, &size); - encode_varint(e, size); - encode_tag(e, kUpb_MsgSet_Message, kUpb_WireType_Delimited); - encode_varint(e, ext->ext->field.number); - encode_tag(e, kUpb_MsgSet_TypeId, kUpb_WireType_Varint); - encode_tag(e, kUpb_MsgSet_Item, kUpb_WireType_StartGroup); +UPB_FORCEINLINE +static const char* _upb_Decoder_DecodeSubMessage( + upb_Decoder* d, const char* ptr, upb_Message* submsg, + const upb_MiniTableSub* subs, const upb_MiniTableField* field, int size) { + int saved_delta = upb_EpsCopyInputStream_PushLimit(&d->input, ptr, size); + const upb_MiniTable* subl = subs[field->UPB_PRIVATE(submsg_index)].submsg; + UPB_ASSERT(subl); + ptr = _upb_Decoder_RecurseSubMessage(d, ptr, submsg, subl, DECODE_NOGROUP); + upb_EpsCopyInputStream_PopLimit(&d->input, ptr, saved_delta); + return ptr; } -static void encode_ext(upb_encstate* e, const upb_Message_Extension* ext, - bool is_message_set) { - if (UPB_UNLIKELY(is_message_set)) { - encode_msgset_item(e, ext); - } else { - encode_field(e, &ext->data, &ext->ext->sub, &ext->ext->field); +UPB_FORCEINLINE +static const char* _upb_Decoder_DecodeGroup(upb_Decoder* d, const char* ptr, + upb_Message* submsg, + const upb_MiniTable* subl, + uint32_t number) { + if (_upb_Decoder_IsDone(d, &ptr)) { + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); } + ptr = _upb_Decoder_RecurseSubMessage(d, ptr, submsg, subl, number); + d->end_group = DECODE_NOGROUP; + return ptr; } -static void encode_message(upb_encstate* e, const upb_Message* msg, - const upb_MiniTable* m, size_t* size) { - size_t pre_len = e->limit - e->ptr; +UPB_FORCEINLINE +static const char* _upb_Decoder_DecodeUnknownGroup(upb_Decoder* d, + const char* ptr, + uint32_t number) { + return _upb_Decoder_DecodeGroup(d, ptr, NULL, NULL, number); +} - if ((e->options & kUpb_EncodeOption_CheckRequired) && m->required_count) { - uint64_t msg_head; - memcpy(&msg_head, msg, 8); - msg_head = _upb_BigEndian_Swap64(msg_head); - if (upb_MiniTable_requiredmask(m) & ~msg_head) { - encode_err(e, kUpb_EncodeStatus_MissingRequired); - } - } - - if ((e->options & kUpb_EncodeOption_SkipUnknown) == 0) { - size_t unknown_size; - const char* unknown = upb_Message_GetUnknown(msg, &unknown_size); +UPB_FORCEINLINE +static const char* _upb_Decoder_DecodeKnownGroup( + upb_Decoder* d, const char* ptr, upb_Message* submsg, + const upb_MiniTableSub* subs, const upb_MiniTableField* field) { + const upb_MiniTable* subl = subs[field->UPB_PRIVATE(submsg_index)].submsg; + UPB_ASSERT(subl); + return _upb_Decoder_DecodeGroup(d, ptr, submsg, subl, field->number); +} - if (unknown) { - encode_bytes(e, unknown, unknown_size); - } - } +static char* upb_Decoder_EncodeVarint32(uint32_t val, char* ptr) { + do { + uint8_t byte = val & 0x7fU; + val >>= 7; + if (val) byte |= 0x80U; + *(ptr++) = byte; + } while (val); + return ptr; +} - if (m->ext != kUpb_ExtMode_NonExtendable) { - /* Encode all extensions together. Unlike C++, we do not attempt to keep - * these in field number order relative to normal fields or even to each - * other. */ - size_t ext_count; - const upb_Message_Extension* ext = _upb_Message_Getexts(msg, &ext_count); - if (ext_count) { - if (e->options & kUpb_EncodeOption_Deterministic) { - _upb_sortedmap sorted; - _upb_mapsorter_pushexts(&e->sorter, ext, ext_count, &sorted); - while (_upb_sortedmap_nextext(&e->sorter, &sorted, &ext)) { - encode_ext(e, ext, m->ext == kUpb_ExtMode_IsMessageSet); - } - _upb_mapsorter_popmap(&e->sorter, &sorted); - } else { - const upb_Message_Extension* end = ext + ext_count; - for (; ext != end; ext++) { - encode_ext(e, ext, m->ext == kUpb_ExtMode_IsMessageSet); - } - } - } - } +static void _upb_Decoder_AddUnknownVarints(upb_Decoder* d, upb_Message* msg, + uint32_t val1, uint32_t val2) { + char buf[20]; + char* end = buf; + end = upb_Decoder_EncodeVarint32(val1, end); + end = upb_Decoder_EncodeVarint32(val2, end); - if (m->field_count) { - const upb_MiniTableField* f = &m->fields[m->field_count]; - const upb_MiniTableField* first = &m->fields[0]; - while (f != first) { - f--; - if (encode_shouldencode(e, msg, m->subs, f)) { - encode_field(e, msg, m->subs, f); - } - } + if (!_upb_Message_AddUnknown(msg, buf, end - buf, &d->arena)) { + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); } - - *size = (e->limit - e->ptr) - pre_len; } -static upb_EncodeStatus upb_Encoder_Encode(upb_encstate* const encoder, - const void* const msg, - const upb_MiniTable* const l, - char** const buf, - size_t* const size) { - // Unfortunately we must continue to perform hackery here because there are - // code paths which blindly copy the returned pointer without bothering to - // check for errors until much later (b/235839510). So we still set *buf to - // NULL on error and we still set it to non-NULL on a successful empty result. - if (UPB_SETJMP(encoder->err) == 0) { - encode_message(encoder, msg, l, size); - *size = encoder->limit - encoder->ptr; - if (*size == 0) { - static char ch; - *buf = &ch; - } else { - UPB_ASSERT(encoder->ptr); - *buf = encoder->ptr; - } - } else { - UPB_ASSERT(encoder->status != kUpb_EncodeStatus_Ok); - *buf = NULL; - *size = 0; - } +UPB_NOINLINE +static bool _upb_Decoder_CheckEnumSlow(upb_Decoder* d, const char* ptr, + upb_Message* msg, + const upb_MiniTableEnum* e, + const upb_MiniTableField* field, + uint32_t v) { + if (_upb_MiniTable_CheckEnumValueSlow(e, v)) return true; - _upb_mapsorter_destroy(&encoder->sorter); - return encoder->status; + // Unrecognized enum goes into unknown fields. + // For packed fields the tag could be arbitrarily far in the past, so we + // just re-encode the tag and value here. + uint32_t tag = ((uint32_t)field->number << 3) | kUpb_WireType_Varint; + upb_Message* unknown_msg = + field->mode & kUpb_LabelFlags_IsExtension ? d->unknown_msg : msg; + _upb_Decoder_AddUnknownVarints(d, unknown_msg, tag, v); + return false; } -upb_EncodeStatus upb_Encode(const void* msg, const upb_MiniTable* l, - int options, upb_Arena* arena, char** buf, - size_t* size) { - upb_encstate e; - unsigned depth = (unsigned)options >> 16; - - e.status = kUpb_EncodeStatus_Ok; - e.arena = arena; - e.buf = NULL; - e.limit = NULL; - e.ptr = NULL; - e.depth = depth ? depth : kUpb_WireFormat_DefaultDepthLimit; - e.options = options; - _upb_mapsorter_init(&e.sorter); +UPB_FORCEINLINE +static bool _upb_Decoder_CheckEnum(upb_Decoder* d, const char* ptr, + upb_Message* msg, const upb_MiniTableEnum* e, + const upb_MiniTableField* field, + wireval* val) { + uint32_t v = val->uint32_val; - return upb_Encoder_Encode(&e, msg, l, buf, size); + _kUpb_FastEnumCheck_Status status = _upb_MiniTable_CheckEnumValueFast(e, v); + if (UPB_LIKELY(status == _kUpb_FastEnumCheck_ValueIsInEnum)) return true; + return _upb_Decoder_CheckEnumSlow(d, ptr, msg, e, field, v); } +UPB_NOINLINE +static const char* _upb_Decoder_DecodeEnumArray(upb_Decoder* d, const char* ptr, + upb_Message* msg, + upb_Array* arr, + const upb_MiniTableSub* subs, + const upb_MiniTableField* field, + wireval* val) { + const upb_MiniTableEnum* e = subs[field->UPB_PRIVATE(submsg_index)].subenum; + if (!_upb_Decoder_CheckEnum(d, ptr, msg, e, field, val)) return ptr; + void* mem = UPB_PTR_AT(_upb_array_ptr(arr), arr->size * 4, void); + arr->size++; + memcpy(mem, val, 4); + return ptr; +} +UPB_FORCEINLINE +static const char* _upb_Decoder_DecodeFixedPacked( + upb_Decoder* d, const char* ptr, upb_Array* arr, wireval* val, + const upb_MiniTableField* field, int lg2) { + int mask = (1 << lg2) - 1; + size_t count = val->size >> lg2; + if ((val->size & mask) != 0) { + // Length isn't a round multiple of elem size. + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); + } + _upb_Decoder_Reserve(d, arr, count); + void* mem = UPB_PTR_AT(_upb_array_ptr(arr), arr->size << lg2, void); + arr->size += count; + // Note: if/when the decoder supports multi-buffer input, we will need to + // handle buffer seams here. + if (_upb_IsLittleEndian()) { + ptr = upb_EpsCopyInputStream_Copy(&d->input, ptr, mem, val->size); + } else { + int delta = upb_EpsCopyInputStream_PushLimit(&d->input, ptr, val->size); + char* dst = mem; + while (!_upb_Decoder_IsDone(d, &ptr)) { + if (lg2 == 2) { + ptr = upb_WireReader_ReadFixed32(ptr, dst); + dst += 4; + } else { + UPB_ASSERT(lg2 == 3); + ptr = upb_WireReader_ReadFixed64(ptr, dst); + dst += 8; + } + } + upb_EpsCopyInputStream_PopLimit(&d->input, ptr, delta); + } -// Must be last. + return ptr; +} -UPB_NOINLINE _upb_WireReader_ReadLongVarintRet -_upb_WireReader_ReadLongVarint(const char* ptr, uint64_t val) { - _upb_WireReader_ReadLongVarintRet ret = {NULL, 0}; - uint64_t byte; - int i; - for (i = 1; i < 10; i++) { - byte = (uint8_t)ptr[i]; - val += (byte - 1) << (i * 7); - if (!(byte & 0x80)) { - ret.ptr = ptr + i + 1; - ret.val = val; - return ret; +UPB_FORCEINLINE +static const char* _upb_Decoder_DecodeVarintPacked( + upb_Decoder* d, const char* ptr, upb_Array* arr, wireval* val, + const upb_MiniTableField* field, int lg2) { + int scale = 1 << lg2; + int saved_limit = upb_EpsCopyInputStream_PushLimit(&d->input, ptr, val->size); + char* out = UPB_PTR_AT(_upb_array_ptr(arr), arr->size << lg2, void); + while (!_upb_Decoder_IsDone(d, &ptr)) { + wireval elem; + ptr = _upb_Decoder_DecodeVarint(d, ptr, &elem.uint64_val); + _upb_Decoder_Munge(field->UPB_PRIVATE(descriptortype), &elem); + if (_upb_Decoder_Reserve(d, arr, 1)) { + out = UPB_PTR_AT(_upb_array_ptr(arr), arr->size << lg2, void); } + arr->size++; + memcpy(out, &elem, scale); + out += scale; } - return ret; + upb_EpsCopyInputStream_PopLimit(&d->input, ptr, saved_limit); + return ptr; } -const char* _upb_WireReader_SkipGroup(const char* ptr, uint32_t tag, - int depth_limit, - upb_EpsCopyInputStream* stream) { - if (--depth_limit == 0) return NULL; - uint32_t end_group_tag = (tag & ~7ULL) | kUpb_WireType_EndGroup; - while (!upb_EpsCopyInputStream_IsDone(stream, &ptr)) { - uint32_t tag; - ptr = upb_WireReader_ReadTag(ptr, &tag); - if (!ptr) return NULL; - if (tag == end_group_tag) return ptr; - ptr = _upb_WireReader_SkipValue(ptr, tag, depth_limit, stream); - if (!ptr) return NULL; +UPB_NOINLINE +static const char* _upb_Decoder_DecodeEnumPacked( + upb_Decoder* d, const char* ptr, upb_Message* msg, upb_Array* arr, + const upb_MiniTableSub* subs, const upb_MiniTableField* field, + wireval* val) { + const upb_MiniTableEnum* e = subs[field->UPB_PRIVATE(submsg_index)].subenum; + int saved_limit = upb_EpsCopyInputStream_PushLimit(&d->input, ptr, val->size); + char* out = UPB_PTR_AT(_upb_array_ptr(arr), arr->size * 4, void); + while (!_upb_Decoder_IsDone(d, &ptr)) { + wireval elem; + ptr = _upb_Decoder_DecodeVarint(d, ptr, &elem.uint64_val); + _upb_Decoder_MungeInt32(&elem); + if (!_upb_Decoder_CheckEnum(d, ptr, msg, e, field, &elem)) { + continue; + } + if (_upb_Decoder_Reserve(d, arr, 1)) { + out = UPB_PTR_AT(_upb_array_ptr(arr), arr->size * 4, void); + } + arr->size++; + memcpy(out, &elem, 4); + out += 4; } + upb_EpsCopyInputStream_PopLimit(&d->input, ptr, saved_limit); return ptr; } +upb_Array* _upb_Decoder_CreateArray(upb_Decoder* d, + const upb_MiniTableField* field) { + /* Maps descriptor type -> elem_size_lg2. */ + static const uint8_t kElemSizeLg2[] = { + [0] = -1, // invalid descriptor type + [kUpb_FieldType_Double] = 3, + [kUpb_FieldType_Float] = 2, + [kUpb_FieldType_Int64] = 3, + [kUpb_FieldType_UInt64] = 3, + [kUpb_FieldType_Int32] = 2, + [kUpb_FieldType_Fixed64] = 3, + [kUpb_FieldType_Fixed32] = 2, + [kUpb_FieldType_Bool] = 0, + [kUpb_FieldType_String] = UPB_SIZE(3, 4), + [kUpb_FieldType_Group] = UPB_SIZE(2, 3), + [kUpb_FieldType_Message] = UPB_SIZE(2, 3), + [kUpb_FieldType_Bytes] = UPB_SIZE(3, 4), + [kUpb_FieldType_UInt32] = 2, + [kUpb_FieldType_Enum] = 2, + [kUpb_FieldType_SFixed32] = 2, + [kUpb_FieldType_SFixed64] = 3, + [kUpb_FieldType_SInt32] = 2, + [kUpb_FieldType_SInt64] = 3, + }; - -// Must be last. - -typedef struct { - uint64_t present_values_mask; - uint32_t last_written_value; -} upb_MtDataEncoderInternal_EnumState; - -typedef struct { - uint64_t msg_modifiers; - uint32_t last_field_num; - enum { - kUpb_OneofState_NotStarted, - kUpb_OneofState_StartedOneof, - kUpb_OneofState_EmittedOneofField, - } oneof_state; -} upb_MtDataEncoderInternal_MsgState; - -typedef struct { - char* buf_start; // Only for checking kUpb_MtDataEncoder_MinSize. - union { - upb_MtDataEncoderInternal_EnumState enum_state; - upb_MtDataEncoderInternal_MsgState msg_state; - } state; -} upb_MtDataEncoderInternal; - -static upb_MtDataEncoderInternal* upb_MtDataEncoder_GetInternal( - upb_MtDataEncoder* e, char* buf_start) { - UPB_ASSERT(sizeof(upb_MtDataEncoderInternal) <= sizeof(e->internal)); - upb_MtDataEncoderInternal* ret = (upb_MtDataEncoderInternal*)e->internal; - ret->buf_start = buf_start; + size_t lg2 = kElemSizeLg2[field->UPB_PRIVATE(descriptortype)]; + upb_Array* ret = _upb_Array_New(&d->arena, 4, lg2); + if (!ret) _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); return ret; } -static char* upb_MtDataEncoder_PutRaw(upb_MtDataEncoder* e, char* ptr, - char ch) { - upb_MtDataEncoderInternal* in = (upb_MtDataEncoderInternal*)e->internal; - UPB_ASSERT(ptr - in->buf_start < kUpb_MtDataEncoder_MinSize); - if (ptr == e->end) return NULL; - *ptr++ = ch; - return ptr; -} - -static char* upb_MtDataEncoder_Put(upb_MtDataEncoder* e, char* ptr, char ch) { - return upb_MtDataEncoder_PutRaw(e, ptr, _upb_ToBase92(ch)); -} +static const char* _upb_Decoder_DecodeToArray(upb_Decoder* d, const char* ptr, + upb_Message* msg, + const upb_MiniTableSub* subs, + const upb_MiniTableField* field, + wireval* val, int op) { + upb_Array** arrp = UPB_PTR_AT(msg, field->offset, void); + upb_Array* arr = *arrp; + void* mem; -static char* upb_MtDataEncoder_PutBase92Varint(upb_MtDataEncoder* e, char* ptr, - uint32_t val, int min, int max) { - int shift = upb_Log2Ceiling(_upb_FromBase92(max) - _upb_FromBase92(min) + 1); - UPB_ASSERT(shift <= 6); - uint32_t mask = (1 << shift) - 1; - do { - uint32_t bits = val & mask; - ptr = upb_MtDataEncoder_Put(e, ptr, bits + _upb_FromBase92(min)); - if (!ptr) return NULL; - val >>= shift; - } while (val); - return ptr; -} + if (arr) { + _upb_Decoder_Reserve(d, arr, 1); + } else { + arr = _upb_Decoder_CreateArray(d, field); + *arrp = arr; + } -char* upb_MtDataEncoder_PutModifier(upb_MtDataEncoder* e, char* ptr, - uint64_t mod) { - if (mod) { - ptr = upb_MtDataEncoder_PutBase92Varint(e, ptr, mod, - kUpb_EncodedValue_MinModifier, - kUpb_EncodedValue_MaxModifier); + switch (op) { + case kUpb_DecodeOp_Scalar1Byte: + case kUpb_DecodeOp_Scalar4Byte: + case kUpb_DecodeOp_Scalar8Byte: + /* Append scalar value. */ + mem = UPB_PTR_AT(_upb_array_ptr(arr), arr->size << op, void); + arr->size++; + memcpy(mem, val, 1 << op); + return ptr; + case kUpb_DecodeOp_String: + _upb_Decoder_VerifyUtf8(d, ptr, val->size); + /* Fallthrough. */ + case kUpb_DecodeOp_Bytes: { + /* Append bytes. */ + upb_StringView* str = (upb_StringView*)_upb_array_ptr(arr) + arr->size; + arr->size++; + return _upb_Decoder_ReadString(d, ptr, val->size, str); + } + case kUpb_DecodeOp_SubMessage: { + /* Append submessage / group. */ + upb_TaggedMessagePtr* target = UPB_PTR_AT( + _upb_array_ptr(arr), arr->size * sizeof(void*), upb_TaggedMessagePtr); + upb_Message* submsg = _upb_Decoder_NewSubMessage(d, subs, field, target); + arr->size++; + if (UPB_UNLIKELY(field->UPB_PRIVATE(descriptortype) == + kUpb_FieldType_Group)) { + return _upb_Decoder_DecodeKnownGroup(d, ptr, submsg, subs, field); + } else { + return _upb_Decoder_DecodeSubMessage(d, ptr, submsg, subs, field, + val->size); + } + } + case OP_FIXPCK_LG2(2): + case OP_FIXPCK_LG2(3): + return _upb_Decoder_DecodeFixedPacked(d, ptr, arr, val, field, + op - OP_FIXPCK_LG2(0)); + case OP_VARPCK_LG2(0): + case OP_VARPCK_LG2(2): + case OP_VARPCK_LG2(3): + return _upb_Decoder_DecodeVarintPacked(d, ptr, arr, val, field, + op - OP_VARPCK_LG2(0)); + case kUpb_DecodeOp_Enum: + return _upb_Decoder_DecodeEnumArray(d, ptr, msg, arr, subs, field, val); + case kUpb_DecodeOp_PackedEnum: + return _upb_Decoder_DecodeEnumPacked(d, ptr, msg, arr, subs, field, val); + default: + UPB_UNREACHABLE(); } - return ptr; } -char* upb_MtDataEncoder_EncodeExtension(upb_MtDataEncoder* e, char* ptr, - upb_FieldType type, uint32_t field_num, - uint64_t field_mod) { - upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr); - in->state.msg_state.msg_modifiers = 0; - in->state.msg_state.last_field_num = 0; - in->state.msg_state.oneof_state = kUpb_OneofState_NotStarted; - - ptr = upb_MtDataEncoder_PutRaw(e, ptr, kUpb_EncodedVersion_ExtensionV1); - if (!ptr) return NULL; +upb_Map* _upb_Decoder_CreateMap(upb_Decoder* d, const upb_MiniTable* entry) { + /* Maps descriptor type -> upb map size. */ + static const uint8_t kSizeInMap[] = { + [0] = -1, // invalid descriptor type */ + [kUpb_FieldType_Double] = 8, + [kUpb_FieldType_Float] = 4, + [kUpb_FieldType_Int64] = 8, + [kUpb_FieldType_UInt64] = 8, + [kUpb_FieldType_Int32] = 4, + [kUpb_FieldType_Fixed64] = 8, + [kUpb_FieldType_Fixed32] = 4, + [kUpb_FieldType_Bool] = 1, + [kUpb_FieldType_String] = UPB_MAPTYPE_STRING, + [kUpb_FieldType_Group] = sizeof(void*), + [kUpb_FieldType_Message] = sizeof(void*), + [kUpb_FieldType_Bytes] = UPB_MAPTYPE_STRING, + [kUpb_FieldType_UInt32] = 4, + [kUpb_FieldType_Enum] = 4, + [kUpb_FieldType_SFixed32] = 4, + [kUpb_FieldType_SFixed64] = 8, + [kUpb_FieldType_SInt32] = 4, + [kUpb_FieldType_SInt64] = 8, + }; - return upb_MtDataEncoder_PutField(e, ptr, type, field_num, field_mod); + const upb_MiniTableField* key_field = &entry->fields[0]; + const upb_MiniTableField* val_field = &entry->fields[1]; + char key_size = kSizeInMap[key_field->UPB_PRIVATE(descriptortype)]; + char val_size = kSizeInMap[val_field->UPB_PRIVATE(descriptortype)]; + UPB_ASSERT(key_field->offset == offsetof(upb_MapEntryData, k)); + UPB_ASSERT(val_field->offset == offsetof(upb_MapEntryData, v)); + upb_Map* ret = _upb_Map_New(&d->arena, key_size, val_size); + if (!ret) _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); + return ret; } -char* upb_MtDataEncoder_EncodeMap(upb_MtDataEncoder* e, char* ptr, - upb_FieldType key_type, - upb_FieldType value_type, uint64_t key_mod, - uint64_t value_mod) { - upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr); - in->state.msg_state.msg_modifiers = 0; - in->state.msg_state.last_field_num = 0; - in->state.msg_state.oneof_state = kUpb_OneofState_NotStarted; - - ptr = upb_MtDataEncoder_PutRaw(e, ptr, kUpb_EncodedVersion_MapV1); - if (!ptr) return NULL; - - ptr = upb_MtDataEncoder_PutField(e, ptr, key_type, 1, key_mod); - if (!ptr) return NULL; - - return upb_MtDataEncoder_PutField(e, ptr, value_type, 2, value_mod); -} +static const char* _upb_Decoder_DecodeToMap(upb_Decoder* d, const char* ptr, + upb_Message* msg, + const upb_MiniTableSub* subs, + const upb_MiniTableField* field, + wireval* val) { + upb_Map** map_p = UPB_PTR_AT(msg, field->offset, upb_Map*); + upb_Map* map = *map_p; + upb_MapEntry ent; + UPB_ASSERT(upb_MiniTableField_Type(field) == kUpb_FieldType_Message); + const upb_MiniTable* entry = subs[field->UPB_PRIVATE(submsg_index)].submsg; -char* upb_MtDataEncoder_EncodeMessageSet(upb_MtDataEncoder* e, char* ptr) { - (void)upb_MtDataEncoder_GetInternal(e, ptr); - return upb_MtDataEncoder_PutRaw(e, ptr, kUpb_EncodedVersion_MessageSetV1); -} + UPB_ASSERT(entry); + UPB_ASSERT(entry->field_count == 2); + UPB_ASSERT(!upb_IsRepeatedOrMap(&entry->fields[0])); + UPB_ASSERT(!upb_IsRepeatedOrMap(&entry->fields[1])); -char* upb_MtDataEncoder_StartMessage(upb_MtDataEncoder* e, char* ptr, - uint64_t msg_mod) { - upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr); - in->state.msg_state.msg_modifiers = msg_mod; - in->state.msg_state.last_field_num = 0; - in->state.msg_state.oneof_state = kUpb_OneofState_NotStarted; + if (!map) { + map = _upb_Decoder_CreateMap(d, entry); + *map_p = map; + } - ptr = upb_MtDataEncoder_PutRaw(e, ptr, kUpb_EncodedVersion_MessageV1); - if (!ptr) return NULL; + // Parse map entry. + memset(&ent, 0, sizeof(ent)); - return upb_MtDataEncoder_PutModifier(e, ptr, msg_mod); -} + if (entry->fields[1].UPB_PRIVATE(descriptortype) == kUpb_FieldType_Message || + entry->fields[1].UPB_PRIVATE(descriptortype) == kUpb_FieldType_Group) { + // Create proactively to handle the case where it doesn't appear. + upb_TaggedMessagePtr msg; + _upb_Decoder_NewSubMessage(d, entry->subs, &entry->fields[1], &msg); + ent.data.v.val = upb_value_uintptr(msg); + } -static char* _upb_MtDataEncoder_MaybePutFieldSkip(upb_MtDataEncoder* e, - char* ptr, - uint32_t field_num) { - upb_MtDataEncoderInternal* in = (upb_MtDataEncoderInternal*)e->internal; - if (field_num <= in->state.msg_state.last_field_num) return NULL; - if (in->state.msg_state.last_field_num + 1 != field_num) { - // Put skip. - UPB_ASSERT(field_num > in->state.msg_state.last_field_num); - uint32_t skip = field_num - in->state.msg_state.last_field_num; - ptr = upb_MtDataEncoder_PutBase92Varint( - e, ptr, skip, kUpb_EncodedValue_MinSkip, kUpb_EncodedValue_MaxSkip); - if (!ptr) return NULL; + ptr = + _upb_Decoder_DecodeSubMessage(d, ptr, &ent.data, subs, field, val->size); + // check if ent had any unknown fields + size_t size; + upb_Message_GetUnknown(&ent.data, &size); + if (size != 0) { + char* buf; + size_t size; + uint32_t tag = ((uint32_t)field->number << 3) | kUpb_WireType_Delimited; + upb_EncodeStatus status = + upb_Encode(&ent.data, entry, 0, &d->arena, &buf, &size); + if (status != kUpb_EncodeStatus_Ok) { + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); + } + _upb_Decoder_AddUnknownVarints(d, msg, tag, size); + if (!_upb_Message_AddUnknown(msg, buf, size, &d->arena)) { + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); + } + } else { + if (_upb_Map_Insert(map, &ent.data.k, map->key_size, &ent.data.v, + map->val_size, + &d->arena) == kUpb_MapInsertStatus_OutOfMemory) { + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); + } } - in->state.msg_state.last_field_num = field_num; return ptr; } -static char* _upb_MtDataEncoder_PutFieldType(upb_MtDataEncoder* e, char* ptr, - upb_FieldType type, - uint64_t field_mod) { - static const char kUpb_TypeToEncoded[] = { - [kUpb_FieldType_Double] = kUpb_EncodedType_Double, - [kUpb_FieldType_Float] = kUpb_EncodedType_Float, - [kUpb_FieldType_Int64] = kUpb_EncodedType_Int64, - [kUpb_FieldType_UInt64] = kUpb_EncodedType_UInt64, - [kUpb_FieldType_Int32] = kUpb_EncodedType_Int32, - [kUpb_FieldType_Fixed64] = kUpb_EncodedType_Fixed64, - [kUpb_FieldType_Fixed32] = kUpb_EncodedType_Fixed32, - [kUpb_FieldType_Bool] = kUpb_EncodedType_Bool, - [kUpb_FieldType_String] = kUpb_EncodedType_String, - [kUpb_FieldType_Group] = kUpb_EncodedType_Group, - [kUpb_FieldType_Message] = kUpb_EncodedType_Message, - [kUpb_FieldType_Bytes] = kUpb_EncodedType_Bytes, - [kUpb_FieldType_UInt32] = kUpb_EncodedType_UInt32, - [kUpb_FieldType_Enum] = kUpb_EncodedType_OpenEnum, - [kUpb_FieldType_SFixed32] = kUpb_EncodedType_SFixed32, - [kUpb_FieldType_SFixed64] = kUpb_EncodedType_SFixed64, - [kUpb_FieldType_SInt32] = kUpb_EncodedType_SInt32, - [kUpb_FieldType_SInt64] = kUpb_EncodedType_SInt64, - }; +static const char* _upb_Decoder_DecodeToSubMessage( + upb_Decoder* d, const char* ptr, upb_Message* msg, + const upb_MiniTableSub* subs, const upb_MiniTableField* field, wireval* val, + int op) { + void* mem = UPB_PTR_AT(msg, field->offset, void); + int type = field->UPB_PRIVATE(descriptortype); - int encoded_type = kUpb_TypeToEncoded[type]; + if (UPB_UNLIKELY(op == kUpb_DecodeOp_Enum) && + !_upb_Decoder_CheckEnum(d, ptr, msg, + subs[field->UPB_PRIVATE(submsg_index)].subenum, + field, val)) { + return ptr; + } - if (field_mod & kUpb_FieldModifier_IsClosedEnum) { - UPB_ASSERT(type == kUpb_FieldType_Enum); - encoded_type = kUpb_EncodedType_ClosedEnum; + /* Set presence if necessary. */ + if (field->presence > 0) { + _upb_sethas_field(msg, field); + } else if (field->presence < 0) { + /* Oneof case */ + uint32_t* oneof_case = _upb_oneofcase_field(msg, field); + if (op == kUpb_DecodeOp_SubMessage && *oneof_case != field->number) { + memset(mem, 0, sizeof(void*)); + } + *oneof_case = field->number; } - if (field_mod & kUpb_FieldModifier_IsRepeated) { - // Repeated fields shift the type number up (unlike other modifiers which - // are bit flags). - encoded_type += kUpb_EncodedType_RepeatedBase; + /* Store into message. */ + switch (op) { + case kUpb_DecodeOp_SubMessage: { + upb_TaggedMessagePtr* submsgp = mem; + upb_Message* submsg; + if (*submsgp) { + submsg = _upb_Decoder_ReuseSubMessage(d, subs, field, submsgp); + } else { + submsg = _upb_Decoder_NewSubMessage(d, subs, field, submsgp); + } + if (UPB_UNLIKELY(type == kUpb_FieldType_Group)) { + ptr = _upb_Decoder_DecodeKnownGroup(d, ptr, submsg, subs, field); + } else { + ptr = _upb_Decoder_DecodeSubMessage(d, ptr, submsg, subs, field, + val->size); + } + break; + } + case kUpb_DecodeOp_String: + _upb_Decoder_VerifyUtf8(d, ptr, val->size); + /* Fallthrough. */ + case kUpb_DecodeOp_Bytes: + return _upb_Decoder_ReadString(d, ptr, val->size, mem); + case kUpb_DecodeOp_Scalar8Byte: + memcpy(mem, val, 8); + break; + case kUpb_DecodeOp_Enum: + case kUpb_DecodeOp_Scalar4Byte: + memcpy(mem, val, 4); + break; + case kUpb_DecodeOp_Scalar1Byte: + memcpy(mem, val, 1); + break; + default: + UPB_UNREACHABLE(); } - return upb_MtDataEncoder_Put(e, ptr, encoded_type); + return ptr; } -static char* _upb_MtDataEncoder_MaybePutModifiers(upb_MtDataEncoder* e, - char* ptr, upb_FieldType type, - uint64_t field_mod) { - upb_MtDataEncoderInternal* in = (upb_MtDataEncoderInternal*)e->internal; - uint32_t encoded_modifiers = 0; - if ((field_mod & kUpb_FieldModifier_IsRepeated) && - upb_FieldType_IsPackable(type)) { - bool field_is_packed = field_mod & kUpb_FieldModifier_IsPacked; - bool default_is_packed = in->state.msg_state.msg_modifiers & - kUpb_MessageModifier_DefaultIsPacked; - if (field_is_packed != default_is_packed) { - encoded_modifiers |= kUpb_EncodedFieldModifier_FlipPacked; - } +UPB_NOINLINE +const char* _upb_Decoder_CheckRequired(upb_Decoder* d, const char* ptr, + const upb_Message* msg, + const upb_MiniTable* l) { + assert(l->required_count); + if (UPB_LIKELY((d->options & kUpb_DecodeOption_CheckRequired) == 0)) { + return ptr; } - - if (field_mod & kUpb_FieldModifier_IsProto3Singular) { - encoded_modifiers |= kUpb_EncodedFieldModifier_IsProto3Singular; + uint64_t msg_head; + memcpy(&msg_head, msg, 8); + msg_head = _upb_BigEndian_Swap64(msg_head); + if (upb_MiniTable_requiredmask(l) & ~msg_head) { + d->missing_required = true; } + return ptr; +} - if (field_mod & kUpb_FieldModifier_IsRequired) { - encoded_modifiers |= kUpb_EncodedFieldModifier_IsRequired; +UPB_FORCEINLINE +static bool _upb_Decoder_TryFastDispatch(upb_Decoder* d, const char** ptr, + upb_Message* msg, + const upb_MiniTable* layout) { +#if UPB_FASTTABLE + if (layout && layout->table_mask != (unsigned char)-1) { + uint16_t tag = _upb_FastDecoder_LoadTag(*ptr); + intptr_t table = decode_totable(layout); + *ptr = _upb_FastDecoder_TagDispatch(d, *ptr, msg, table, 0, tag); + return true; } +#endif + return false; +} - return upb_MtDataEncoder_PutModifier(e, ptr, encoded_modifiers); +static const char* upb_Decoder_SkipField(upb_Decoder* d, const char* ptr, + uint32_t tag) { + int field_number = tag >> 3; + int wire_type = tag & 7; + switch (wire_type) { + case kUpb_WireType_Varint: { + uint64_t val; + return _upb_Decoder_DecodeVarint(d, ptr, &val); + } + case kUpb_WireType_64Bit: + return ptr + 8; + case kUpb_WireType_32Bit: + return ptr + 4; + case kUpb_WireType_Delimited: { + uint32_t size; + ptr = upb_Decoder_DecodeSize(d, ptr, &size); + return ptr + size; + } + case kUpb_WireType_StartGroup: + return _upb_Decoder_DecodeUnknownGroup(d, ptr, field_number); + default: + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); + } } -char* upb_MtDataEncoder_PutField(upb_MtDataEncoder* e, char* ptr, - upb_FieldType type, uint32_t field_num, - uint64_t field_mod) { - upb_MtDataEncoder_GetInternal(e, ptr); +enum { + kStartItemTag = ((kUpb_MsgSet_Item << 3) | kUpb_WireType_StartGroup), + kEndItemTag = ((kUpb_MsgSet_Item << 3) | kUpb_WireType_EndGroup), + kTypeIdTag = ((kUpb_MsgSet_TypeId << 3) | kUpb_WireType_Varint), + kMessageTag = ((kUpb_MsgSet_Message << 3) | kUpb_WireType_Delimited), +}; - ptr = _upb_MtDataEncoder_MaybePutFieldSkip(e, ptr, field_num); - if (!ptr) return NULL; +static void upb_Decoder_AddKnownMessageSetItem( + upb_Decoder* d, upb_Message* msg, const upb_MiniTableExtension* item_mt, + const char* data, uint32_t size) { + upb_Message_Extension* ext = + _upb_Message_GetOrCreateExtension(msg, item_mt, &d->arena); + if (UPB_UNLIKELY(!ext)) { + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); + } + upb_Message* submsg = _upb_Decoder_NewSubMessage( + d, &ext->ext->sub, &ext->ext->field, (upb_TaggedMessagePtr*)&ext->data); + upb_DecodeStatus status = upb_Decode(data, size, submsg, item_mt->sub.submsg, + d->extreg, d->options, &d->arena); + if (status != kUpb_DecodeStatus_Ok) _upb_Decoder_ErrorJmp(d, status); +} - ptr = _upb_MtDataEncoder_PutFieldType(e, ptr, type, field_mod); - if (!ptr) return NULL; +static void upb_Decoder_AddUnknownMessageSetItem(upb_Decoder* d, + upb_Message* msg, + uint32_t type_id, + const char* message_data, + uint32_t message_size) { + char buf[60]; + char* ptr = buf; + ptr = upb_Decoder_EncodeVarint32(kStartItemTag, ptr); + ptr = upb_Decoder_EncodeVarint32(kTypeIdTag, ptr); + ptr = upb_Decoder_EncodeVarint32(type_id, ptr); + ptr = upb_Decoder_EncodeVarint32(kMessageTag, ptr); + ptr = upb_Decoder_EncodeVarint32(message_size, ptr); + char* split = ptr; - return _upb_MtDataEncoder_MaybePutModifiers(e, ptr, type, field_mod); + ptr = upb_Decoder_EncodeVarint32(kEndItemTag, ptr); + char* end = ptr; + + if (!_upb_Message_AddUnknown(msg, buf, split - buf, &d->arena) || + !_upb_Message_AddUnknown(msg, message_data, message_size, &d->arena) || + !_upb_Message_AddUnknown(msg, split, end - split, &d->arena)) { + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); + } } -char* upb_MtDataEncoder_StartOneof(upb_MtDataEncoder* e, char* ptr) { - upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr); - if (in->state.msg_state.oneof_state == kUpb_OneofState_NotStarted) { - ptr = upb_MtDataEncoder_Put(e, ptr, _upb_FromBase92(kUpb_EncodedValue_End)); +static void upb_Decoder_AddMessageSetItem(upb_Decoder* d, upb_Message* msg, + const upb_MiniTable* t, + uint32_t type_id, const char* data, + uint32_t size) { + const upb_MiniTableExtension* item_mt = + upb_ExtensionRegistry_Lookup(d->extreg, t, type_id); + if (item_mt) { + upb_Decoder_AddKnownMessageSetItem(d, msg, item_mt, data, size); } else { - ptr = upb_MtDataEncoder_Put( - e, ptr, _upb_FromBase92(kUpb_EncodedValue_OneofSeparator)); + upb_Decoder_AddUnknownMessageSetItem(d, msg, type_id, data, size); } - in->state.msg_state.oneof_state = kUpb_OneofState_StartedOneof; - return ptr; } -char* upb_MtDataEncoder_PutOneofField(upb_MtDataEncoder* e, char* ptr, - uint32_t field_num) { - upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr); - if (in->state.msg_state.oneof_state == kUpb_OneofState_EmittedOneofField) { - ptr = upb_MtDataEncoder_Put( - e, ptr, _upb_FromBase92(kUpb_EncodedValue_FieldSeparator)); - if (!ptr) return NULL; +static const char* upb_Decoder_DecodeMessageSetItem( + upb_Decoder* d, const char* ptr, upb_Message* msg, + const upb_MiniTable* layout) { + uint32_t type_id = 0; + upb_StringView preserved = {NULL, 0}; + typedef enum { + kUpb_HaveId = 1 << 0, + kUpb_HavePayload = 1 << 1, + } StateMask; + StateMask state_mask = 0; + while (!_upb_Decoder_IsDone(d, &ptr)) { + uint32_t tag; + ptr = _upb_Decoder_DecodeTag(d, ptr, &tag); + switch (tag) { + case kEndItemTag: + return ptr; + case kTypeIdTag: { + uint64_t tmp; + ptr = _upb_Decoder_DecodeVarint(d, ptr, &tmp); + if (state_mask & kUpb_HaveId) break; // Ignore dup. + state_mask |= kUpb_HaveId; + type_id = tmp; + if (state_mask & kUpb_HavePayload) { + upb_Decoder_AddMessageSetItem(d, msg, layout, type_id, preserved.data, + preserved.size); + } + break; + } + case kMessageTag: { + uint32_t size; + ptr = upb_Decoder_DecodeSize(d, ptr, &size); + const char* data = ptr; + ptr += size; + if (state_mask & kUpb_HavePayload) break; // Ignore dup. + state_mask |= kUpb_HavePayload; + if (state_mask & kUpb_HaveId) { + upb_Decoder_AddMessageSetItem(d, msg, layout, type_id, data, size); + } else { + // Out of order, we must preserve the payload. + preserved.data = data; + preserved.size = size; + } + break; + } + default: + // We do not preserve unexpected fields inside a message set item. + ptr = upb_Decoder_SkipField(d, ptr, tag); + break; + } } - ptr = upb_MtDataEncoder_PutBase92Varint(e, ptr, field_num, _upb_ToBase92(0), - _upb_ToBase92(63)); - in->state.msg_state.oneof_state = kUpb_OneofState_EmittedOneofField; - return ptr; + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); } -char* upb_MtDataEncoder_StartEnum(upb_MtDataEncoder* e, char* ptr) { - upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr); - in->state.enum_state.present_values_mask = 0; - in->state.enum_state.last_written_value = 0; +static const upb_MiniTableField* _upb_Decoder_FindField(upb_Decoder* d, + const upb_MiniTable* t, + uint32_t field_number, + int* last_field_index) { + static upb_MiniTableField none = { + 0, 0, 0, 0, kUpb_FakeFieldType_FieldNotFound, 0}; + if (t == NULL) return &none; - return upb_MtDataEncoder_PutRaw(e, ptr, kUpb_EncodedVersion_EnumV1); -} + size_t idx = ((size_t)field_number) - 1; // 0 wraps to SIZE_MAX + if (idx < t->dense_below) { + /* Fastest case: index into dense fields. */ + goto found; + } -static char* upb_MtDataEncoder_FlushDenseEnumMask(upb_MtDataEncoder* e, - char* ptr) { - upb_MtDataEncoderInternal* in = (upb_MtDataEncoderInternal*)e->internal; - ptr = upb_MtDataEncoder_Put(e, ptr, in->state.enum_state.present_values_mask); - in->state.enum_state.present_values_mask = 0; - in->state.enum_state.last_written_value += 5; - return ptr; -} + if (t->dense_below < t->field_count) { + /* Linear search non-dense fields. Resume scanning from last_field_index + * since fields are usually in order. */ + size_t last = *last_field_index; + for (idx = last; idx < t->field_count; idx++) { + if (t->fields[idx].number == field_number) { + goto found; + } + } -char* upb_MtDataEncoder_PutEnumValue(upb_MtDataEncoder* e, char* ptr, - uint32_t val) { - // TODO(b/229641772): optimize this encoding. - upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr); - UPB_ASSERT(val >= in->state.enum_state.last_written_value); - uint32_t delta = val - in->state.enum_state.last_written_value; - if (delta >= 5 && in->state.enum_state.present_values_mask) { - ptr = upb_MtDataEncoder_FlushDenseEnumMask(e, ptr); - if (!ptr) { - return NULL; + for (idx = t->dense_below; idx < last; idx++) { + if (t->fields[idx].number == field_number) { + goto found; + } } - delta -= 5; } - if (delta >= 5) { - ptr = upb_MtDataEncoder_PutBase92Varint( - e, ptr, delta, kUpb_EncodedValue_MinSkip, kUpb_EncodedValue_MaxSkip); - in->state.enum_state.last_written_value += delta; - delta = 0; + if (d->extreg) { + switch (t->ext) { + case kUpb_ExtMode_Extendable: { + const upb_MiniTableExtension* ext = + upb_ExtensionRegistry_Lookup(d->extreg, t, field_number); + if (ext) return &ext->field; + break; + } + case kUpb_ExtMode_IsMessageSet: + if (field_number == kUpb_MsgSet_Item) { + static upb_MiniTableField item = { + 0, 0, 0, 0, kUpb_FakeFieldType_MessageSetItem, 0}; + return &item; + } + break; + } } - UPB_ASSERT((in->state.enum_state.present_values_mask >> delta) == 0); - in->state.enum_state.present_values_mask |= 1ULL << delta; - return ptr; -} + return &none; /* Unknown field. */ -char* upb_MtDataEncoder_EndEnum(upb_MtDataEncoder* e, char* ptr) { - upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr); - if (!in->state.enum_state.present_values_mask) return ptr; - return upb_MtDataEncoder_FlushDenseEnumMask(e, ptr); +found: + UPB_ASSERT(t->fields[idx].number == field_number); + *last_field_index = idx; + return &t->fields[idx]; } +int _upb_Decoder_GetVarintOp(const upb_MiniTableField* field) { + static const int8_t kVarintOps[] = { + [kUpb_FakeFieldType_FieldNotFound] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_Double] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_Float] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_Int64] = kUpb_DecodeOp_Scalar8Byte, + [kUpb_FieldType_UInt64] = kUpb_DecodeOp_Scalar8Byte, + [kUpb_FieldType_Int32] = kUpb_DecodeOp_Scalar4Byte, + [kUpb_FieldType_Fixed64] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_Fixed32] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_Bool] = kUpb_DecodeOp_Scalar1Byte, + [kUpb_FieldType_String] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_Group] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_Message] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_Bytes] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_UInt32] = kUpb_DecodeOp_Scalar4Byte, + [kUpb_FieldType_Enum] = kUpb_DecodeOp_Enum, + [kUpb_FieldType_SFixed32] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_SFixed64] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_SInt32] = kUpb_DecodeOp_Scalar4Byte, + [kUpb_FieldType_SInt64] = kUpb_DecodeOp_Scalar8Byte, + [kUpb_FakeFieldType_MessageSetItem] = kUpb_DecodeOp_UnknownField, + }; -const char _kUpb_ToBase92[] = { - ' ', '!', '#', '$', '%', '&', '(', ')', '*', '+', ',', '-', '.', '/', - '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<', '=', - '>', '?', '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', - 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', - 'Z', '[', ']', '^', '_', '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', - 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', - 'w', 'x', 'y', 'z', '{', '|', '}', '~', -}; - -const int8_t _kUpb_FromBase92[] = { - 0, 1, -1, 2, 3, 4, 5, -1, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, - 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, - 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, - 55, 56, 57, -1, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, - 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, -}; - - - -// Must be last. - -typedef struct { - upb_MdDecoder base; - upb_Arena* arena; - upb_MiniTableEnum* enum_table; - uint32_t enum_value_count; - uint32_t enum_data_count; - uint32_t enum_data_capacity; -} upb_MdEnumDecoder; - -static size_t upb_MiniTableEnum_Size(size_t count) { - return sizeof(upb_MiniTableEnum) + count * sizeof(uint32_t); + return kVarintOps[field->UPB_PRIVATE(descriptortype)]; } -static upb_MiniTableEnum* _upb_MiniTable_AddEnumDataMember(upb_MdEnumDecoder* d, - uint32_t val) { - if (d->enum_data_count == d->enum_data_capacity) { - size_t old_sz = upb_MiniTableEnum_Size(d->enum_data_capacity); - d->enum_data_capacity = UPB_MAX(2, d->enum_data_capacity * 2); - size_t new_sz = upb_MiniTableEnum_Size(d->enum_data_capacity); - d->enum_table = upb_Arena_Realloc(d->arena, d->enum_table, old_sz, new_sz); - upb_MdDecoder_CheckOutOfMemory(&d->base, d->enum_table); +UPB_FORCEINLINE +static void _upb_Decoder_CheckUnlinked(upb_Decoder* d, const upb_MiniTable* mt, + const upb_MiniTableField* field, + int* op) { + // If sub-message is not linked, treat as unknown. + if (field->mode & kUpb_LabelFlags_IsExtension) return; + const upb_MiniTableSub* sub = &mt->subs[field->UPB_PRIVATE(submsg_index)]; + if ((d->options & kUpb_DecodeOption_ExperimentalAllowUnlinked) || + sub->submsg != &_kUpb_MiniTable_Empty) { + return; } - d->enum_table->data[d->enum_data_count++] = val; - return d->enum_table; -} - -static void upb_MiniTableEnum_BuildValue(upb_MdEnumDecoder* d, uint32_t val) { - upb_MiniTableEnum* table = d->enum_table; - d->enum_value_count++; - if (table->value_count || (val > 512 && d->enum_value_count < val / 32)) { - if (table->value_count == 0) { - assert(d->enum_data_count == table->mask_limit / 32); - } - table = _upb_MiniTable_AddEnumDataMember(d, val); - table->value_count++; - } else { - uint32_t new_mask_limit = ((val / 32) + 1) * 32; - while (table->mask_limit < new_mask_limit) { - table = _upb_MiniTable_AddEnumDataMember(d, 0); - table->mask_limit += 32; - } - table->data[val / 32] |= 1ULL << (val % 32); +#ifndef NDEBUG + const upb_MiniTableField* oneof = upb_MiniTable_GetOneof(mt, field); + if (oneof) { + // All other members of the oneof must be message fields that are also + // unlinked. + do { + assert(upb_MiniTableField_CType(oneof) == kUpb_CType_Message); + const upb_MiniTableSub* oneof_sub = + &mt->subs[oneof->UPB_PRIVATE(submsg_index)]; + assert(!oneof_sub); + } while (upb_MiniTable_NextOneofField(mt, &oneof)); } +#endif // NDEBUG + *op = kUpb_DecodeOp_UnknownField; } -static upb_MiniTableEnum* upb_MtDecoder_DoBuildMiniTableEnum( - upb_MdEnumDecoder* d, const char* data, size_t len) { - // If the string is non-empty then it must begin with a version tag. - if (len) { - if (*data != kUpb_EncodedVersion_EnumV1) { - upb_MdDecoder_ErrorJmp(&d->base, "Invalid enum version: %c", *data); - } - data++; - len--; - } - - upb_MdDecoder_CheckOutOfMemory(&d->base, d->enum_table); - - // Guarantee at least 64 bits of mask without checking mask size. - d->enum_table->mask_limit = 64; - d->enum_table = _upb_MiniTable_AddEnumDataMember(d, 0); - d->enum_table = _upb_MiniTable_AddEnumDataMember(d, 0); +int _upb_Decoder_GetDelimitedOp(upb_Decoder* d, const upb_MiniTable* mt, + const upb_MiniTableField* field) { + enum { kRepeatedBase = 19 }; - d->enum_table->value_count = 0; + static const int8_t kDelimitedOps[] = { + /* For non-repeated field type. */ + [kUpb_FakeFieldType_FieldNotFound] = + kUpb_DecodeOp_UnknownField, // Field not found. + [kUpb_FieldType_Double] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_Float] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_Int64] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_UInt64] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_Int32] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_Fixed64] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_Fixed32] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_Bool] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_String] = kUpb_DecodeOp_String, + [kUpb_FieldType_Group] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_Message] = kUpb_DecodeOp_SubMessage, + [kUpb_FieldType_Bytes] = kUpb_DecodeOp_Bytes, + [kUpb_FieldType_UInt32] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_Enum] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_SFixed32] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_SFixed64] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_SInt32] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_SInt64] = kUpb_DecodeOp_UnknownField, + [kUpb_FakeFieldType_MessageSetItem] = kUpb_DecodeOp_UnknownField, + // For repeated field type. */ + [kRepeatedBase + kUpb_FieldType_Double] = OP_FIXPCK_LG2(3), + [kRepeatedBase + kUpb_FieldType_Float] = OP_FIXPCK_LG2(2), + [kRepeatedBase + kUpb_FieldType_Int64] = OP_VARPCK_LG2(3), + [kRepeatedBase + kUpb_FieldType_UInt64] = OP_VARPCK_LG2(3), + [kRepeatedBase + kUpb_FieldType_Int32] = OP_VARPCK_LG2(2), + [kRepeatedBase + kUpb_FieldType_Fixed64] = OP_FIXPCK_LG2(3), + [kRepeatedBase + kUpb_FieldType_Fixed32] = OP_FIXPCK_LG2(2), + [kRepeatedBase + kUpb_FieldType_Bool] = OP_VARPCK_LG2(0), + [kRepeatedBase + kUpb_FieldType_String] = kUpb_DecodeOp_String, + [kRepeatedBase + kUpb_FieldType_Group] = kUpb_DecodeOp_SubMessage, + [kRepeatedBase + kUpb_FieldType_Message] = kUpb_DecodeOp_SubMessage, + [kRepeatedBase + kUpb_FieldType_Bytes] = kUpb_DecodeOp_Bytes, + [kRepeatedBase + kUpb_FieldType_UInt32] = OP_VARPCK_LG2(2), + [kRepeatedBase + kUpb_FieldType_Enum] = kUpb_DecodeOp_PackedEnum, + [kRepeatedBase + kUpb_FieldType_SFixed32] = OP_FIXPCK_LG2(2), + [kRepeatedBase + kUpb_FieldType_SFixed64] = OP_FIXPCK_LG2(3), + [kRepeatedBase + kUpb_FieldType_SInt32] = OP_VARPCK_LG2(2), + [kRepeatedBase + kUpb_FieldType_SInt64] = OP_VARPCK_LG2(3), + // Omitting kUpb_FakeFieldType_MessageSetItem, because we never emit a + // repeated msgset type + }; - const char* ptr = data; - uint32_t base = 0; + int ndx = field->UPB_PRIVATE(descriptortype); + if (upb_FieldMode_Get(field) == kUpb_FieldMode_Array) ndx += kRepeatedBase; + int op = kDelimitedOps[ndx]; - while (ptr < d->base.end) { - char ch = *ptr++; - if (ch <= kUpb_EncodedValue_MaxEnumMask) { - uint32_t mask = _upb_FromBase92(ch); - for (int i = 0; i < 5; i++, base++, mask >>= 1) { - if (mask & 1) upb_MiniTableEnum_BuildValue(d, base); - } - } else if (kUpb_EncodedValue_MinSkip <= ch && - ch <= kUpb_EncodedValue_MaxSkip) { - uint32_t skip; - ptr = upb_MdDecoder_DecodeBase92Varint(&d->base, ptr, ch, - kUpb_EncodedValue_MinSkip, - kUpb_EncodedValue_MaxSkip, &skip); - base += skip; - } else { - upb_MdDecoder_ErrorJmp(&d->base, "Unexpected character: %c", ch); - } + if (op == kUpb_DecodeOp_SubMessage) { + _upb_Decoder_CheckUnlinked(d, mt, field, &op); } - return d->enum_table; + return op; } -static upb_MiniTableEnum* upb_MtDecoder_BuildMiniTableEnum( - upb_MdEnumDecoder* const decoder, const char* const data, size_t const len) { - if (UPB_SETJMP(decoder->base.err) != 0) return NULL; - return upb_MtDecoder_DoBuildMiniTableEnum(decoder, data, len); -} +UPB_FORCEINLINE +static const char* _upb_Decoder_DecodeWireValue(upb_Decoder* d, const char* ptr, + const upb_MiniTable* mt, + const upb_MiniTableField* field, + int wire_type, wireval* val, + int* op) { + static const unsigned kFixed32OkMask = (1 << kUpb_FieldType_Float) | + (1 << kUpb_FieldType_Fixed32) | + (1 << kUpb_FieldType_SFixed32); -upb_MiniTableEnum* upb_MiniDescriptor_BuildEnum(const char* data, size_t len, - upb_Arena* arena, - upb_Status* status) { - upb_MdEnumDecoder decoder = { - .base = - { - .end = UPB_PTRADD(data, len), - .status = status, - }, - .arena = arena, - .enum_table = upb_Arena_Malloc(arena, upb_MiniTableEnum_Size(2)), - .enum_value_count = 0, - .enum_data_count = 0, - .enum_data_capacity = 1, - }; + static const unsigned kFixed64OkMask = (1 << kUpb_FieldType_Double) | + (1 << kUpb_FieldType_Fixed64) | + (1 << kUpb_FieldType_SFixed64); - return upb_MtDecoder_BuildMiniTableEnum(&decoder, data, len); + switch (wire_type) { + case kUpb_WireType_Varint: + ptr = _upb_Decoder_DecodeVarint(d, ptr, &val->uint64_val); + *op = _upb_Decoder_GetVarintOp(field); + _upb_Decoder_Munge(field->UPB_PRIVATE(descriptortype), val); + return ptr; + case kUpb_WireType_32Bit: + *op = kUpb_DecodeOp_Scalar4Byte; + if (((1 << field->UPB_PRIVATE(descriptortype)) & kFixed32OkMask) == 0) { + *op = kUpb_DecodeOp_UnknownField; + } + return upb_WireReader_ReadFixed32(ptr, &val->uint32_val); + case kUpb_WireType_64Bit: + *op = kUpb_DecodeOp_Scalar8Byte; + if (((1 << field->UPB_PRIVATE(descriptortype)) & kFixed64OkMask) == 0) { + *op = kUpb_DecodeOp_UnknownField; + } + return upb_WireReader_ReadFixed64(ptr, &val->uint64_val); + case kUpb_WireType_Delimited: + ptr = upb_Decoder_DecodeSize(d, ptr, &val->size); + *op = _upb_Decoder_GetDelimitedOp(d, mt, field); + return ptr; + case kUpb_WireType_StartGroup: + val->uint32_val = field->number; + if (field->UPB_PRIVATE(descriptortype) == kUpb_FieldType_Group) { + *op = kUpb_DecodeOp_SubMessage; + _upb_Decoder_CheckUnlinked(d, mt, field, op); + } else if (field->UPB_PRIVATE(descriptortype) == + kUpb_FakeFieldType_MessageSetItem) { + *op = kUpb_DecodeOp_MessageSetItem; + } else { + *op = kUpb_DecodeOp_UnknownField; + } + return ptr; + default: + break; + } + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); } +UPB_FORCEINLINE +static const char* _upb_Decoder_DecodeKnownField( + upb_Decoder* d, const char* ptr, upb_Message* msg, + const upb_MiniTable* layout, const upb_MiniTableField* field, int op, + wireval* val) { + const upb_MiniTableSub* subs = layout->subs; + uint8_t mode = field->mode; -#include -#include - + if (UPB_UNLIKELY(mode & kUpb_LabelFlags_IsExtension)) { + const upb_MiniTableExtension* ext_layout = + (const upb_MiniTableExtension*)field; + upb_Message_Extension* ext = + _upb_Message_GetOrCreateExtension(msg, ext_layout, &d->arena); + if (UPB_UNLIKELY(!ext)) { + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); + } + d->unknown_msg = msg; + msg = &ext->data; + subs = &ext->ext->sub; + } -// Must be last. + switch (mode & kUpb_FieldMode_Mask) { + case kUpb_FieldMode_Array: + return _upb_Decoder_DecodeToArray(d, ptr, msg, subs, field, val, op); + case kUpb_FieldMode_Map: + return _upb_Decoder_DecodeToMap(d, ptr, msg, subs, field, val); + case kUpb_FieldMode_Scalar: + return _upb_Decoder_DecodeToSubMessage(d, ptr, msg, subs, field, val, op); + default: + UPB_UNREACHABLE(); + } +} -// Note: we sort by this number when calculating layout order. -typedef enum { - kUpb_LayoutItemType_OneofCase, // Oneof case. - kUpb_LayoutItemType_OneofField, // Oneof field data. - kUpb_LayoutItemType_Field, // Non-oneof field data. +static const char* _upb_Decoder_ReverseSkipVarint(const char* ptr, + uint32_t val) { + uint32_t seen = 0; + do { + ptr--; + seen <<= 7; + seen |= *ptr & 0x7f; + } while (seen != val); + return ptr; +} - kUpb_LayoutItemType_Max = kUpb_LayoutItemType_Field, -} upb_LayoutItemType; +static const char* _upb_Decoder_DecodeUnknownField(upb_Decoder* d, + const char* ptr, + upb_Message* msg, + int field_number, + int wire_type, wireval val) { + if (field_number == 0) _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); -#define kUpb_LayoutItem_IndexSentinel ((uint16_t)-1) + // Since unknown fields are the uncommon case, we do a little extra work here + // to walk backwards through the buffer to find the field start. This frees + // up a register in the fast paths (when the field is known), which leads to + // significant speedups in benchmarks. + const char* start = ptr; -typedef struct { - // Index of the corresponding field. When this is a oneof field, the field's - // offset will be the index of the next field in a linked list. - uint16_t field_index; - uint16_t offset; - upb_FieldRep rep; - upb_LayoutItemType type; -} upb_LayoutItem; + if (wire_type == kUpb_WireType_Delimited) ptr += val.size; + if (msg) { + switch (wire_type) { + case kUpb_WireType_Varint: + case kUpb_WireType_Delimited: + start--; + while (start[-1] & 0x80) start--; + break; + case kUpb_WireType_32Bit: + start -= 4; + break; + case kUpb_WireType_64Bit: + start -= 8; + break; + default: + break; + } -typedef struct { - upb_LayoutItem* data; - size_t size; - size_t capacity; -} upb_LayoutItemVector; + assert(start == d->debug_valstart); + uint32_t tag = ((uint32_t)field_number << 3) | wire_type; + start = _upb_Decoder_ReverseSkipVarint(start, tag); + assert(start == d->debug_tagstart); -typedef struct { - upb_MdDecoder base; - upb_MiniTable* table; - upb_MiniTableField* fields; - upb_MiniTablePlatform platform; - upb_LayoutItemVector vec; - upb_Arena* arena; -} upb_MtDecoder; + if (wire_type == kUpb_WireType_StartGroup) { + d->unknown = start; + d->unknown_msg = msg; + ptr = _upb_Decoder_DecodeUnknownGroup(d, ptr, field_number); + start = d->unknown; + d->unknown = NULL; + } + if (!_upb_Message_AddUnknown(msg, start, ptr - start, &d->arena)) { + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); + } + } else if (wire_type == kUpb_WireType_StartGroup) { + ptr = _upb_Decoder_DecodeUnknownGroup(d, ptr, field_number); + } + return ptr; +} -// In each field's offset, we temporarily store a presence classifier: -enum PresenceClass { - kNoPresence = 0, - kHasbitPresence = 1, - kRequiredPresence = 2, - kOneofBase = 3, - // Negative values refer to a specific oneof with that number. Positive - // values >= kOneofBase indicate that this field is in a oneof, and specify - // the next field in this oneof's linked list. -}; +UPB_NOINLINE +static const char* _upb_Decoder_DecodeMessage(upb_Decoder* d, const char* ptr, + upb_Message* msg, + const upb_MiniTable* layout) { + int last_field_index = 0; -static bool upb_MtDecoder_FieldIsPackable(upb_MiniTableField* field) { - return (field->mode & kUpb_FieldMode_Array) && - upb_FieldType_IsPackable(field->UPB_PRIVATE(descriptortype)); -} +#if UPB_FASTTABLE + // The first time we want to skip fast dispatch, because we may have just been + // invoked by the fast parser to handle a case that it bailed on. + if (!_upb_Decoder_IsDone(d, &ptr)) goto nofast; +#endif -typedef struct { - uint16_t submsg_count; - uint16_t subenum_count; -} upb_SubCounts; + while (!_upb_Decoder_IsDone(d, &ptr)) { + uint32_t tag; + const upb_MiniTableField* field; + int field_number; + int wire_type; + wireval val; + int op; -static void upb_MiniTable_SetTypeAndSub(upb_MiniTableField* field, - upb_FieldType type, - upb_SubCounts* sub_counts, - uint64_t msg_modifiers, - bool is_proto3_enum) { - if (is_proto3_enum) { - UPB_ASSERT(type == kUpb_FieldType_Enum); - type = kUpb_FieldType_Int32; - field->mode |= kUpb_LabelFlags_IsAlternate; - } else if (type == kUpb_FieldType_String && - !(msg_modifiers & kUpb_MessageModifier_ValidateUtf8)) { - type = kUpb_FieldType_Bytes; - field->mode |= kUpb_LabelFlags_IsAlternate; - } + if (_upb_Decoder_TryFastDispatch(d, &ptr, msg, layout)) break; - field->UPB_PRIVATE(descriptortype) = type; +#if UPB_FASTTABLE + nofast: +#endif - if (upb_MtDecoder_FieldIsPackable(field) && - (msg_modifiers & kUpb_MessageModifier_DefaultIsPacked)) { - field->mode |= kUpb_LabelFlags_IsPacked; - } +#ifndef NDEBUG + d->debug_tagstart = ptr; +#endif - if (type == kUpb_FieldType_Message || type == kUpb_FieldType_Group) { - field->UPB_PRIVATE(submsg_index) = sub_counts->submsg_count++; - } else if (type == kUpb_FieldType_Enum) { - // We will need to update this later once we know the total number of - // submsg fields. - field->UPB_PRIVATE(submsg_index) = sub_counts->subenum_count++; - } else { - field->UPB_PRIVATE(submsg_index) = kUpb_NoSub; - } -} + UPB_ASSERT(ptr < d->input.limit_ptr); + ptr = _upb_Decoder_DecodeTag(d, ptr, &tag); + field_number = tag >> 3; + wire_type = tag & 7; -static const char kUpb_EncodedToType[] = { - [kUpb_EncodedType_Double] = kUpb_FieldType_Double, - [kUpb_EncodedType_Float] = kUpb_FieldType_Float, - [kUpb_EncodedType_Int64] = kUpb_FieldType_Int64, - [kUpb_EncodedType_UInt64] = kUpb_FieldType_UInt64, - [kUpb_EncodedType_Int32] = kUpb_FieldType_Int32, - [kUpb_EncodedType_Fixed64] = kUpb_FieldType_Fixed64, - [kUpb_EncodedType_Fixed32] = kUpb_FieldType_Fixed32, - [kUpb_EncodedType_Bool] = kUpb_FieldType_Bool, - [kUpb_EncodedType_String] = kUpb_FieldType_String, - [kUpb_EncodedType_Group] = kUpb_FieldType_Group, - [kUpb_EncodedType_Message] = kUpb_FieldType_Message, - [kUpb_EncodedType_Bytes] = kUpb_FieldType_Bytes, - [kUpb_EncodedType_UInt32] = kUpb_FieldType_UInt32, - [kUpb_EncodedType_OpenEnum] = kUpb_FieldType_Enum, - [kUpb_EncodedType_SFixed32] = kUpb_FieldType_SFixed32, - [kUpb_EncodedType_SFixed64] = kUpb_FieldType_SFixed64, - [kUpb_EncodedType_SInt32] = kUpb_FieldType_SInt32, - [kUpb_EncodedType_SInt64] = kUpb_FieldType_SInt64, - [kUpb_EncodedType_ClosedEnum] = kUpb_FieldType_Enum, -}; +#ifndef NDEBUG + d->debug_valstart = ptr; +#endif -static void upb_MiniTable_SetField(upb_MtDecoder* d, uint8_t ch, - upb_MiniTableField* field, - uint64_t msg_modifiers, - upb_SubCounts* sub_counts) { - static const char kUpb_EncodedToFieldRep[] = { - [kUpb_EncodedType_Double] = kUpb_FieldRep_8Byte, - [kUpb_EncodedType_Float] = kUpb_FieldRep_4Byte, - [kUpb_EncodedType_Int64] = kUpb_FieldRep_8Byte, - [kUpb_EncodedType_UInt64] = kUpb_FieldRep_8Byte, - [kUpb_EncodedType_Int32] = kUpb_FieldRep_4Byte, - [kUpb_EncodedType_Fixed64] = kUpb_FieldRep_8Byte, - [kUpb_EncodedType_Fixed32] = kUpb_FieldRep_4Byte, - [kUpb_EncodedType_Bool] = kUpb_FieldRep_1Byte, - [kUpb_EncodedType_String] = kUpb_FieldRep_StringView, - [kUpb_EncodedType_Bytes] = kUpb_FieldRep_StringView, - [kUpb_EncodedType_UInt32] = kUpb_FieldRep_4Byte, - [kUpb_EncodedType_OpenEnum] = kUpb_FieldRep_4Byte, - [kUpb_EncodedType_SFixed32] = kUpb_FieldRep_4Byte, - [kUpb_EncodedType_SFixed64] = kUpb_FieldRep_8Byte, - [kUpb_EncodedType_SInt32] = kUpb_FieldRep_4Byte, - [kUpb_EncodedType_SInt64] = kUpb_FieldRep_8Byte, - [kUpb_EncodedType_ClosedEnum] = kUpb_FieldRep_4Byte, - }; + if (wire_type == kUpb_WireType_EndGroup) { + d->end_group = field_number; + return ptr; + } - char pointer_rep = d->platform == kUpb_MiniTablePlatform_32Bit - ? kUpb_FieldRep_4Byte - : kUpb_FieldRep_8Byte; + field = _upb_Decoder_FindField(d, layout, field_number, &last_field_index); + ptr = _upb_Decoder_DecodeWireValue(d, ptr, layout, field, wire_type, &val, + &op); - int8_t type = _upb_FromBase92(ch); - if (ch >= _upb_ToBase92(kUpb_EncodedType_RepeatedBase)) { - type -= kUpb_EncodedType_RepeatedBase; - field->mode = kUpb_FieldMode_Array; - field->mode |= pointer_rep << kUpb_FieldRep_Shift; - field->offset = kNoPresence; - } else { - field->mode = kUpb_FieldMode_Scalar; - field->offset = kHasbitPresence; - if (type == kUpb_EncodedType_Group || type == kUpb_EncodedType_Message) { - field->mode |= pointer_rep << kUpb_FieldRep_Shift; - } else if ((unsigned long)type >= sizeof(kUpb_EncodedToFieldRep)) { - upb_MdDecoder_ErrorJmp(&d->base, "Invalid field type: %d", (int)type); + if (op >= 0) { + ptr = _upb_Decoder_DecodeKnownField(d, ptr, msg, layout, field, op, &val); } else { - field->mode |= kUpb_EncodedToFieldRep[type] << kUpb_FieldRep_Shift; + switch (op) { + case kUpb_DecodeOp_UnknownField: + ptr = _upb_Decoder_DecodeUnknownField(d, ptr, msg, field_number, + wire_type, val); + break; + case kUpb_DecodeOp_MessageSetItem: + ptr = upb_Decoder_DecodeMessageSetItem(d, ptr, msg, layout); + break; + } } } - if ((unsigned long)type >= sizeof(kUpb_EncodedToType)) { - upb_MdDecoder_ErrorJmp(&d->base, "Invalid field type: %d", (int)type); - } - upb_MiniTable_SetTypeAndSub(field, kUpb_EncodedToType[type], sub_counts, - msg_modifiers, type == kUpb_EncodedType_OpenEnum); -} -static void upb_MtDecoder_ModifyField(upb_MtDecoder* d, - uint32_t message_modifiers, - uint32_t field_modifiers, - upb_MiniTableField* field) { - if (field_modifiers & kUpb_EncodedFieldModifier_FlipPacked) { - if (!upb_MtDecoder_FieldIsPackable(field)) { - upb_MdDecoder_ErrorJmp(&d->base, - "Cannot flip packed on unpackable field %" PRIu32, - field->number); - } - field->mode ^= kUpb_LabelFlags_IsPacked; - } + return UPB_UNLIKELY(layout && layout->required_count) + ? _upb_Decoder_CheckRequired(d, ptr, msg, layout) + : ptr; +} - bool singular = field_modifiers & kUpb_EncodedFieldModifier_IsProto3Singular; - bool required = field_modifiers & kUpb_EncodedFieldModifier_IsRequired; +const char* _upb_FastDecoder_DecodeGeneric(struct upb_Decoder* d, + const char* ptr, upb_Message* msg, + intptr_t table, uint64_t hasbits, + uint64_t data) { + (void)data; + *(uint32_t*)msg |= hasbits; + return _upb_Decoder_DecodeMessage(d, ptr, msg, decode_totablep(table)); +} - // Validate. - if ((singular || required) && field->offset != kHasbitPresence) { - upb_MdDecoder_ErrorJmp(&d->base, - "Invalid modifier(s) for repeated field %" PRIu32, - field->number); - } - if (singular && required) { - upb_MdDecoder_ErrorJmp( - &d->base, "Field %" PRIu32 " cannot be both singular and required", - field->number); +static upb_DecodeStatus _upb_Decoder_DecodeTop(struct upb_Decoder* d, + const char* buf, void* msg, + const upb_MiniTable* l) { + if (!_upb_Decoder_TryFastDispatch(d, &buf, msg, l)) { + _upb_Decoder_DecodeMessage(d, buf, msg, l); } + if (d->end_group != DECODE_NOGROUP) return kUpb_DecodeStatus_Malformed; + if (d->missing_required) return kUpb_DecodeStatus_MissingRequired; + return kUpb_DecodeStatus_Ok; +} - if (singular) field->offset = kNoPresence; - if (required) { - field->offset = kRequiredPresence; - } +UPB_NOINLINE +const char* _upb_Decoder_IsDoneFallback(upb_EpsCopyInputStream* e, + const char* ptr, int overrun) { + return _upb_EpsCopyInputStream_IsDoneFallbackInline( + e, ptr, overrun, _upb_Decoder_BufferFlipCallback); } -static void upb_MtDecoder_PushItem(upb_MtDecoder* d, upb_LayoutItem item) { - if (d->vec.size == d->vec.capacity) { - size_t new_cap = UPB_MAX(8, d->vec.size * 2); - d->vec.data = realloc(d->vec.data, new_cap * sizeof(*d->vec.data)); - upb_MdDecoder_CheckOutOfMemory(&d->base, d->vec.data); - d->vec.capacity = new_cap; +static upb_DecodeStatus upb_Decoder_Decode(upb_Decoder* const decoder, + const char* const buf, + void* const msg, + const upb_MiniTable* const l, + upb_Arena* const arena) { + if (UPB_SETJMP(decoder->err) == 0) { + decoder->status = _upb_Decoder_DecodeTop(decoder, buf, msg, l); + } else { + UPB_ASSERT(decoder->status != kUpb_DecodeStatus_Ok); } - d->vec.data[d->vec.size++] = item; -} -static void upb_MtDecoder_PushOneof(upb_MtDecoder* d, upb_LayoutItem item) { - if (item.field_index == kUpb_LayoutItem_IndexSentinel) { - upb_MdDecoder_ErrorJmp(&d->base, "Empty oneof"); - } - item.field_index -= kOneofBase; + _upb_MemBlock* blocks = + upb_Atomic_Load(&decoder->arena.blocks, memory_order_relaxed); + arena->head = decoder->arena.head; + upb_Atomic_Store(&arena->blocks, blocks, memory_order_relaxed); + return decoder->status; +} - // Push oneof data. - item.type = kUpb_LayoutItemType_OneofField; - upb_MtDecoder_PushItem(d, item); +upb_DecodeStatus upb_Decode(const char* buf, size_t size, void* msg, + const upb_MiniTable* l, + const upb_ExtensionRegistry* extreg, int options, + upb_Arena* arena) { + upb_Decoder decoder; + unsigned depth = (unsigned)options >> 16; - // Push oneof case. - item.rep = kUpb_FieldRep_4Byte; // Field Number. - item.type = kUpb_LayoutItemType_OneofCase; - upb_MtDecoder_PushItem(d, item); -} + upb_EpsCopyInputStream_Init(&decoder.input, &buf, size, + options & kUpb_DecodeOption_AliasString); -size_t upb_MtDecoder_SizeOfRep(upb_FieldRep rep, - upb_MiniTablePlatform platform) { - static const uint8_t kRepToSize32[] = { - [kUpb_FieldRep_1Byte] = 1, - [kUpb_FieldRep_4Byte] = 4, - [kUpb_FieldRep_StringView] = 8, - [kUpb_FieldRep_8Byte] = 8, - }; - static const uint8_t kRepToSize64[] = { - [kUpb_FieldRep_1Byte] = 1, - [kUpb_FieldRep_4Byte] = 4, - [kUpb_FieldRep_StringView] = 16, - [kUpb_FieldRep_8Byte] = 8, - }; - UPB_ASSERT(sizeof(upb_StringView) == - UPB_SIZE(kRepToSize32, kRepToSize64)[kUpb_FieldRep_StringView]); - return platform == kUpb_MiniTablePlatform_32Bit ? kRepToSize32[rep] - : kRepToSize64[rep]; -} + decoder.extreg = extreg; + decoder.unknown = NULL; + decoder.depth = depth ? depth : kUpb_WireFormat_DefaultDepthLimit; + decoder.end_group = DECODE_NOGROUP; + decoder.options = (uint16_t)options; + decoder.missing_required = false; + decoder.status = kUpb_DecodeStatus_Ok; -size_t upb_MtDecoder_AlignOfRep(upb_FieldRep rep, - upb_MiniTablePlatform platform) { - static const uint8_t kRepToAlign32[] = { - [kUpb_FieldRep_1Byte] = 1, - [kUpb_FieldRep_4Byte] = 4, - [kUpb_FieldRep_StringView] = 4, - [kUpb_FieldRep_8Byte] = 8, - }; - static const uint8_t kRepToAlign64[] = { - [kUpb_FieldRep_1Byte] = 1, - [kUpb_FieldRep_4Byte] = 4, - [kUpb_FieldRep_StringView] = 8, - [kUpb_FieldRep_8Byte] = 8, - }; - UPB_ASSERT(UPB_ALIGN_OF(upb_StringView) == - UPB_SIZE(kRepToAlign32, kRepToAlign64)[kUpb_FieldRep_StringView]); - return platform == kUpb_MiniTablePlatform_32Bit ? kRepToAlign32[rep] - : kRepToAlign64[rep]; + // Violating the encapsulation of the arena for performance reasons. + // This is a temporary arena that we swap into and swap out of when we are + // done. The temporary arena only needs to be able to handle allocation, + // not fuse or free, so it does not need many of the members to be initialized + // (particularly parent_or_count). + _upb_MemBlock* blocks = upb_Atomic_Load(&arena->blocks, memory_order_relaxed); + decoder.arena.head = arena->head; + decoder.arena.block_alloc = arena->block_alloc; + upb_Atomic_Init(&decoder.arena.blocks, blocks); + + return upb_Decoder_Decode(&decoder, buf, msg, l, arena); } -static const char* upb_MtDecoder_DecodeOneofField(upb_MtDecoder* d, - const char* ptr, - char first_ch, - upb_LayoutItem* item) { - uint32_t field_num; - ptr = upb_MdDecoder_DecodeBase92Varint( - &d->base, ptr, first_ch, kUpb_EncodedValue_MinOneofField, - kUpb_EncodedValue_MaxOneofField, &field_num); - upb_MiniTableField* f = - (void*)upb_MiniTable_FindFieldByNumber(d->table, field_num); +#undef OP_FIXPCK_LG2 +#undef OP_VARPCK_LG2 - if (!f) { - upb_MdDecoder_ErrorJmp(&d->base, - "Couldn't add field number %" PRIu32 - " to oneof, no such field number.", - field_num); +// Fast decoder: ~3x the speed of decode.c, but requires x86-64/ARM64. +// Also the table size grows by 2x. +// +// Could potentially be ported to other 64-bit archs that pass at least six +// arguments in registers and have 8 unused high bits in pointers. +// +// The overall design is to create specialized functions for every possible +// field type (eg. oneof boolean field with a 1 byte tag) and then dispatch +// to the specialized function as quickly as possible. + + + +// Must be last. + +#if UPB_FASTTABLE + +// The standard set of arguments passed to each parsing function. +// Thanks to x86-64 calling conventions, these will stay in registers. +#define UPB_PARSE_PARAMS \ + upb_Decoder *d, const char *ptr, upb_Message *msg, intptr_t table, \ + uint64_t hasbits, uint64_t data + +#define UPB_PARSE_ARGS d, ptr, msg, table, hasbits, data + +#define RETURN_GENERIC(m) \ + /* Uncomment either of these for debugging purposes. */ \ + /* fprintf(stderr, m); */ \ + /*__builtin_trap(); */ \ + return _upb_FastDecoder_DecodeGeneric(d, ptr, msg, table, hasbits, 0); + +typedef enum { + CARD_s = 0, /* Singular (optional, non-repeated) */ + CARD_o = 1, /* Oneof */ + CARD_r = 2, /* Repeated */ + CARD_p = 3 /* Packed Repeated */ +} upb_card; + +UPB_NOINLINE +static const char* fastdecode_isdonefallback(UPB_PARSE_PARAMS) { + int overrun = data; + ptr = _upb_EpsCopyInputStream_IsDoneFallbackInline( + &d->input, ptr, overrun, _upb_Decoder_BufferFlipCallback); + data = _upb_FastDecoder_LoadTag(ptr); + UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); +} + +UPB_FORCEINLINE +static const char* fastdecode_dispatch(UPB_PARSE_PARAMS) { + int overrun; + switch (upb_EpsCopyInputStream_IsDoneStatus(&d->input, ptr, &overrun)) { + case kUpb_IsDoneStatus_Done: + *(uint32_t*)msg |= hasbits; // Sync hasbits. + const upb_MiniTable* l = decode_totablep(table); + return UPB_UNLIKELY(l->required_count) + ? _upb_Decoder_CheckRequired(d, ptr, msg, l) + : ptr; + case kUpb_IsDoneStatus_NotDone: + break; + case kUpb_IsDoneStatus_NeedFallback: + data = overrun; + UPB_MUSTTAIL return fastdecode_isdonefallback(UPB_PARSE_ARGS); } - if (f->offset != kHasbitPresence) { - upb_MdDecoder_ErrorJmp( - &d->base, - "Cannot add repeated, required, or singular field %" PRIu32 - " to oneof.", - field_num); + + // Read two bytes of tag data (for a one-byte tag, the high byte is junk). + data = _upb_FastDecoder_LoadTag(ptr); + UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); +} + +UPB_FORCEINLINE +static bool fastdecode_checktag(uint16_t data, int tagbytes) { + if (tagbytes == 1) { + return (data & 0xff) == 0; + } else { + return data == 0; } +} - // Oneof storage must be large enough to accommodate the largest member. - int rep = f->mode >> kUpb_FieldRep_Shift; - if (upb_MtDecoder_SizeOfRep(rep, d->platform) > - upb_MtDecoder_SizeOfRep(item->rep, d->platform)) { - item->rep = rep; +UPB_FORCEINLINE +static const char* fastdecode_longsize(const char* ptr, int* size) { + int i; + UPB_ASSERT(*size & 0x80); + *size &= 0xff; + for (i = 0; i < 3; i++) { + ptr++; + size_t byte = (uint8_t)ptr[-1]; + *size += (byte - 1) << (7 + 7 * i); + if (UPB_LIKELY((byte & 0x80) == 0)) return ptr; } - // Prepend this field to the linked list. - f->offset = item->field_index; - item->field_index = (f - d->fields) + kOneofBase; + ptr++; + size_t byte = (uint8_t)ptr[-1]; + // len is limited by 2gb not 4gb, hence 8 and not 16 as normally expected + // for a 32 bit varint. + if (UPB_UNLIKELY(byte >= 8)) return NULL; + *size += (byte - 1) << 28; return ptr; } -static const char* upb_MtDecoder_DecodeOneofs(upb_MtDecoder* d, - const char* ptr) { - upb_LayoutItem item = {.rep = 0, - .field_index = kUpb_LayoutItem_IndexSentinel}; - while (ptr < d->base.end) { - char ch = *ptr++; - if (ch == kUpb_EncodedValue_FieldSeparator) { - // Field separator, no action needed. - } else if (ch == kUpb_EncodedValue_OneofSeparator) { - // End of oneof. - upb_MtDecoder_PushOneof(d, item); - item.field_index = kUpb_LayoutItem_IndexSentinel; // Move to next oneof. - } else { - ptr = upb_MtDecoder_DecodeOneofField(d, ptr, ch, &item); +UPB_FORCEINLINE +static const char* fastdecode_delimited( + upb_Decoder* d, const char* ptr, + upb_EpsCopyInputStream_ParseDelimitedFunc* func, void* ctx) { + ptr++; + + // Sign-extend so varint greater than one byte becomes negative, causing + // fast delimited parse to fail. + int len = (int8_t)ptr[-1]; + + if (!upb_EpsCopyInputStream_TryParseDelimitedFast(&d->input, &ptr, len, func, + ctx)) { + // Slow case: Sub-message is >=128 bytes and/or exceeds the current buffer. + // If it exceeds the buffer limit, limit/limit_ptr will change during + // sub-message parsing, so we need to preserve delta, not limit. + if (UPB_UNLIKELY(len & 0x80)) { + // Size varint >1 byte (length >= 128). + ptr = fastdecode_longsize(ptr, &len); + if (!ptr) { + // Corrupt wire format: size exceeded INT_MAX. + return NULL; + } + } + if (!upb_EpsCopyInputStream_CheckSize(&d->input, ptr, len)) { + // Corrupt wire format: invalid limit. + return NULL; } + int delta = upb_EpsCopyInputStream_PushLimit(&d->input, ptr, len); + ptr = func(&d->input, ptr, ctx); + upb_EpsCopyInputStream_PopLimit(&d->input, ptr, delta); } - - // Push final oneof. - upb_MtDecoder_PushOneof(d, item); return ptr; } -static const char* upb_MtDecoder_ParseModifier(upb_MtDecoder* d, - const char* ptr, char first_ch, - upb_MiniTableField* last_field, - uint64_t* msg_modifiers) { - uint32_t mod; - ptr = upb_MdDecoder_DecodeBase92Varint(&d->base, ptr, first_ch, - kUpb_EncodedValue_MinModifier, - kUpb_EncodedValue_MaxModifier, &mod); - if (last_field) { - upb_MtDecoder_ModifyField(d, *msg_modifiers, mod, last_field); +/* singular, oneof, repeated field handling ***********************************/ + +typedef struct { + upb_Array* arr; + void* end; +} fastdecode_arr; + +typedef enum { + FD_NEXT_ATLIMIT, + FD_NEXT_SAMEFIELD, + FD_NEXT_OTHERFIELD +} fastdecode_next; + +typedef struct { + void* dst; + fastdecode_next next; + uint32_t tag; +} fastdecode_nextret; + +UPB_FORCEINLINE +static void* fastdecode_resizearr(upb_Decoder* d, void* dst, + fastdecode_arr* farr, int valbytes) { + if (UPB_UNLIKELY(dst == farr->end)) { + size_t old_size = farr->arr->capacity; + size_t old_bytes = old_size * valbytes; + size_t new_size = old_size * 2; + size_t new_bytes = new_size * valbytes; + char* old_ptr = _upb_array_ptr(farr->arr); + char* new_ptr = upb_Arena_Realloc(&d->arena, old_ptr, old_bytes, new_bytes); + uint8_t elem_size_lg2 = __builtin_ctz(valbytes); + farr->arr->capacity = new_size; + farr->arr->data = _upb_array_tagptr(new_ptr, elem_size_lg2); + dst = (void*)(new_ptr + (old_size * valbytes)); + farr->end = (void*)(new_ptr + (new_size * valbytes)); + } + return dst; +} + +UPB_FORCEINLINE +static bool fastdecode_tagmatch(uint32_t tag, uint64_t data, int tagbytes) { + if (tagbytes == 1) { + return (uint8_t)tag == (uint8_t)data; + } else { + return (uint16_t)tag == (uint16_t)data; + } +} + +UPB_FORCEINLINE +static void fastdecode_commitarr(void* dst, fastdecode_arr* farr, + int valbytes) { + farr->arr->size = + (size_t)((char*)dst - (char*)_upb_array_ptr(farr->arr)) / valbytes; +} + +UPB_FORCEINLINE +static fastdecode_nextret fastdecode_nextrepeated(upb_Decoder* d, void* dst, + const char** ptr, + fastdecode_arr* farr, + uint64_t data, int tagbytes, + int valbytes) { + fastdecode_nextret ret; + dst = (char*)dst + valbytes; + + if (UPB_LIKELY(!_upb_Decoder_IsDone(d, ptr))) { + ret.tag = _upb_FastDecoder_LoadTag(*ptr); + if (fastdecode_tagmatch(ret.tag, data, tagbytes)) { + ret.next = FD_NEXT_SAMEFIELD; + } else { + fastdecode_commitarr(dst, farr, valbytes); + ret.next = FD_NEXT_OTHERFIELD; + } + } else { + fastdecode_commitarr(dst, farr, valbytes); + ret.next = FD_NEXT_ATLIMIT; + } + + ret.dst = dst; + return ret; +} + +UPB_FORCEINLINE +static void* fastdecode_fieldmem(upb_Message* msg, uint64_t data) { + size_t ofs = data >> 48; + return (char*)msg + ofs; +} + +UPB_FORCEINLINE +static void* fastdecode_getfield(upb_Decoder* d, const char* ptr, + upb_Message* msg, uint64_t* data, + uint64_t* hasbits, fastdecode_arr* farr, + int valbytes, upb_card card) { + switch (card) { + case CARD_s: { + uint8_t hasbit_index = *data >> 24; + // Set hasbit and return pointer to scalar field. + *hasbits |= 1ull << hasbit_index; + return fastdecode_fieldmem(msg, *data); + } + case CARD_o: { + uint16_t case_ofs = *data >> 32; + uint32_t* oneof_case = UPB_PTR_AT(msg, case_ofs, uint32_t); + uint8_t field_number = *data >> 24; + *oneof_case = field_number; + return fastdecode_fieldmem(msg, *data); + } + case CARD_r: { + // Get pointer to upb_Array and allocate/expand if necessary. + uint8_t elem_size_lg2 = __builtin_ctz(valbytes); + upb_Array** arr_p = fastdecode_fieldmem(msg, *data); + char* begin; + *(uint32_t*)msg |= *hasbits; + *hasbits = 0; + if (UPB_LIKELY(!*arr_p)) { + farr->arr = _upb_Array_New(&d->arena, 8, elem_size_lg2); + *arr_p = farr->arr; + } else { + farr->arr = *arr_p; + } + begin = _upb_array_ptr(farr->arr); + farr->end = begin + (farr->arr->capacity * valbytes); + *data = _upb_FastDecoder_LoadTag(ptr); + return begin + (farr->arr->size * valbytes); + } + default: + UPB_UNREACHABLE(); + } +} + +UPB_FORCEINLINE +static bool fastdecode_flippacked(uint64_t* data, int tagbytes) { + *data ^= (0x2 ^ 0x0); // Patch data to match packed wiretype. + return fastdecode_checktag(*data, tagbytes); +} + +#define FASTDECODE_CHECKPACKED(tagbytes, card, func) \ + if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { \ + if (card == CARD_r && fastdecode_flippacked(&data, tagbytes)) { \ + UPB_MUSTTAIL return func(UPB_PARSE_ARGS); \ + } \ + RETURN_GENERIC("packed check tag mismatch\n"); \ + } + +/* varint fields **************************************************************/ + +UPB_FORCEINLINE +static uint64_t fastdecode_munge(uint64_t val, int valbytes, bool zigzag) { + if (valbytes == 1) { + return val != 0; + } else if (zigzag) { + if (valbytes == 4) { + uint32_t n = val; + return (n >> 1) ^ -(int32_t)(n & 1); + } else if (valbytes == 8) { + return (val >> 1) ^ -(int64_t)(val & 1); + } + UPB_UNREACHABLE(); + } + return val; +} + +UPB_FORCEINLINE +static const char* fastdecode_varint64(const char* ptr, uint64_t* val) { + ptr++; + *val = (uint8_t)ptr[-1]; + if (UPB_UNLIKELY(*val & 0x80)) { + int i; + for (i = 0; i < 8; i++) { + ptr++; + uint64_t byte = (uint8_t)ptr[-1]; + *val += (byte - 1) << (7 + 7 * i); + if (UPB_LIKELY((byte & 0x80) == 0)) goto done; + } + ptr++; + uint64_t byte = (uint8_t)ptr[-1]; + if (byte > 1) { + return NULL; + } + *val += (byte - 1) << 63; + } +done: + UPB_ASSUME(ptr != NULL); + return ptr; +} + +#define FASTDECODE_UNPACKEDVARINT(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, card, zigzag, packed) \ + uint64_t val; \ + void* dst; \ + fastdecode_arr farr; \ + \ + FASTDECODE_CHECKPACKED(tagbytes, card, packed); \ + \ + dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, valbytes, \ + card); \ + if (card == CARD_r) { \ + if (UPB_UNLIKELY(!dst)) { \ + RETURN_GENERIC("need array resize\n"); \ + } \ + } \ + \ + again: \ + if (card == CARD_r) { \ + dst = fastdecode_resizearr(d, dst, &farr, valbytes); \ + } \ + \ + ptr += tagbytes; \ + ptr = fastdecode_varint64(ptr, &val); \ + if (ptr == NULL) _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \ + val = fastdecode_munge(val, valbytes, zigzag); \ + memcpy(dst, &val, valbytes); \ + \ + if (card == CARD_r) { \ + fastdecode_nextret ret = fastdecode_nextrepeated( \ + d, dst, &ptr, &farr, data, tagbytes, valbytes); \ + switch (ret.next) { \ + case FD_NEXT_SAMEFIELD: \ + dst = ret.dst; \ + goto again; \ + case FD_NEXT_OTHERFIELD: \ + data = ret.tag; \ + UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); \ + case FD_NEXT_ATLIMIT: \ + return ptr; \ + } \ + } \ + \ + UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); + +typedef struct { + uint8_t valbytes; + bool zigzag; + void* dst; + fastdecode_arr farr; +} fastdecode_varintdata; + +UPB_FORCEINLINE +static const char* fastdecode_topackedvarint(upb_EpsCopyInputStream* e, + const char* ptr, void* ctx) { + upb_Decoder* d = (upb_Decoder*)e; + fastdecode_varintdata* data = ctx; + void* dst = data->dst; + uint64_t val; + + while (!_upb_Decoder_IsDone(d, &ptr)) { + dst = fastdecode_resizearr(d, dst, &data->farr, data->valbytes); + ptr = fastdecode_varint64(ptr, &val); + if (ptr == NULL) return NULL; + val = fastdecode_munge(val, data->valbytes, data->zigzag); + memcpy(dst, &val, data->valbytes); + dst = (char*)dst + data->valbytes; + } + + fastdecode_commitarr(dst, &data->farr, data->valbytes); + return ptr; +} + +#define FASTDECODE_PACKEDVARINT(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, zigzag, unpacked) \ + fastdecode_varintdata ctx = {valbytes, zigzag}; \ + \ + FASTDECODE_CHECKPACKED(tagbytes, CARD_r, unpacked); \ + \ + ctx.dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &ctx.farr, \ + valbytes, CARD_r); \ + if (UPB_UNLIKELY(!ctx.dst)) { \ + RETURN_GENERIC("need array resize\n"); \ + } \ + \ + ptr += tagbytes; \ + ptr = fastdecode_delimited(d, ptr, &fastdecode_topackedvarint, &ctx); \ + \ + if (UPB_UNLIKELY(ptr == NULL)) { \ + _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \ + } \ + \ + UPB_MUSTTAIL return fastdecode_dispatch(d, ptr, msg, table, hasbits, 0); + +#define FASTDECODE_VARINT(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, card, zigzag, unpacked, packed) \ + if (card == CARD_p) { \ + FASTDECODE_PACKEDVARINT(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, zigzag, unpacked); \ + } else { \ + FASTDECODE_UNPACKEDVARINT(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, card, zigzag, packed); \ + } + +#define z_ZZ true +#define b_ZZ false +#define v_ZZ false + +/* Generate all combinations: + * {s,o,r,p} x {b1,v4,z4,v8,z8} x {1bt,2bt} */ + +#define F(card, type, valbytes, tagbytes) \ + UPB_NOINLINE \ + const char* upb_p##card##type##valbytes##_##tagbytes##bt(UPB_PARSE_PARAMS) { \ + FASTDECODE_VARINT(d, ptr, msg, table, hasbits, data, tagbytes, valbytes, \ + CARD_##card, type##_ZZ, \ + upb_pr##type##valbytes##_##tagbytes##bt, \ + upb_pp##type##valbytes##_##tagbytes##bt); \ + } + +#define TYPES(card, tagbytes) \ + F(card, b, 1, tagbytes) \ + F(card, v, 4, tagbytes) \ + F(card, v, 8, tagbytes) \ + F(card, z, 4, tagbytes) \ + F(card, z, 8, tagbytes) + +#define TAGBYTES(card) \ + TYPES(card, 1) \ + TYPES(card, 2) + +TAGBYTES(s) +TAGBYTES(o) +TAGBYTES(r) +TAGBYTES(p) + +#undef z_ZZ +#undef b_ZZ +#undef v_ZZ +#undef o_ONEOF +#undef s_ONEOF +#undef r_ONEOF +#undef F +#undef TYPES +#undef TAGBYTES +#undef FASTDECODE_UNPACKEDVARINT +#undef FASTDECODE_PACKEDVARINT +#undef FASTDECODE_VARINT + +/* fixed fields ***************************************************************/ + +#define FASTDECODE_UNPACKEDFIXED(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, card, packed) \ + void* dst; \ + fastdecode_arr farr; \ + \ + FASTDECODE_CHECKPACKED(tagbytes, card, packed) \ + \ + dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, valbytes, \ + card); \ + if (card == CARD_r) { \ + if (UPB_UNLIKELY(!dst)) { \ + RETURN_GENERIC("couldn't allocate array in arena\n"); \ + } \ + } \ + \ + again: \ + if (card == CARD_r) { \ + dst = fastdecode_resizearr(d, dst, &farr, valbytes); \ + } \ + \ + ptr += tagbytes; \ + memcpy(dst, ptr, valbytes); \ + ptr += valbytes; \ + \ + if (card == CARD_r) { \ + fastdecode_nextret ret = fastdecode_nextrepeated( \ + d, dst, &ptr, &farr, data, tagbytes, valbytes); \ + switch (ret.next) { \ + case FD_NEXT_SAMEFIELD: \ + dst = ret.dst; \ + goto again; \ + case FD_NEXT_OTHERFIELD: \ + data = ret.tag; \ + UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); \ + case FD_NEXT_ATLIMIT: \ + return ptr; \ + } \ + } \ + \ + UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); + +#define FASTDECODE_PACKEDFIXED(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, unpacked) \ + FASTDECODE_CHECKPACKED(tagbytes, CARD_r, unpacked) \ + \ + ptr += tagbytes; \ + int size = (uint8_t)ptr[0]; \ + ptr++; \ + if (size & 0x80) { \ + ptr = fastdecode_longsize(ptr, &size); \ + } \ + \ + if (UPB_UNLIKELY(!upb_EpsCopyInputStream_CheckDataSizeAvailable( \ + &d->input, ptr, size) || \ + (size % valbytes) != 0)) { \ + _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \ + } \ + \ + upb_Array** arr_p = fastdecode_fieldmem(msg, data); \ + upb_Array* arr = *arr_p; \ + uint8_t elem_size_lg2 = __builtin_ctz(valbytes); \ + int elems = size / valbytes; \ + \ + if (UPB_LIKELY(!arr)) { \ + *arr_p = arr = _upb_Array_New(&d->arena, elems, elem_size_lg2); \ + if (!arr) { \ + _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \ + } \ + } else { \ + _upb_Array_ResizeUninitialized(arr, elems, &d->arena); \ + } \ + \ + char* dst = _upb_array_ptr(arr); \ + memcpy(dst, ptr, size); \ + arr->size = elems; \ + \ + ptr += size; \ + UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); + +#define FASTDECODE_FIXED(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, card, unpacked, packed) \ + if (card == CARD_p) { \ + FASTDECODE_PACKEDFIXED(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, unpacked); \ + } else { \ + FASTDECODE_UNPACKEDFIXED(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, card, packed); \ + } + +/* Generate all combinations: + * {s,o,r,p} x {f4,f8} x {1bt,2bt} */ + +#define F(card, valbytes, tagbytes) \ + UPB_NOINLINE \ + const char* upb_p##card##f##valbytes##_##tagbytes##bt(UPB_PARSE_PARAMS) { \ + FASTDECODE_FIXED(d, ptr, msg, table, hasbits, data, tagbytes, valbytes, \ + CARD_##card, upb_ppf##valbytes##_##tagbytes##bt, \ + upb_prf##valbytes##_##tagbytes##bt); \ + } + +#define TYPES(card, tagbytes) \ + F(card, 4, tagbytes) \ + F(card, 8, tagbytes) + +#define TAGBYTES(card) \ + TYPES(card, 1) \ + TYPES(card, 2) + +TAGBYTES(s) +TAGBYTES(o) +TAGBYTES(r) +TAGBYTES(p) + +#undef F +#undef TYPES +#undef TAGBYTES +#undef FASTDECODE_UNPACKEDFIXED +#undef FASTDECODE_PACKEDFIXED + +/* string fields **************************************************************/ + +typedef const char* fastdecode_copystr_func(struct upb_Decoder* d, + const char* ptr, upb_Message* msg, + const upb_MiniTable* table, + uint64_t hasbits, + upb_StringView* dst); + +UPB_NOINLINE +static const char* fastdecode_verifyutf8(upb_Decoder* d, const char* ptr, + upb_Message* msg, intptr_t table, + uint64_t hasbits, uint64_t data) { + upb_StringView* dst = (upb_StringView*)data; + if (!_upb_Decoder_VerifyUtf8Inline(dst->data, dst->size)) { + _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_BadUtf8); + } + UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); +} + +#define FASTDECODE_LONGSTRING(d, ptr, msg, table, hasbits, dst, validate_utf8) \ + int size = (uint8_t)ptr[0]; /* Could plumb through hasbits. */ \ + ptr++; \ + if (size & 0x80) { \ + ptr = fastdecode_longsize(ptr, &size); \ + } \ + \ + if (UPB_UNLIKELY(!upb_EpsCopyInputStream_CheckSize(&d->input, ptr, size))) { \ + dst->size = 0; \ + _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \ + } \ + \ + const char* s_ptr = ptr; \ + ptr = upb_EpsCopyInputStream_ReadString(&d->input, &s_ptr, size, &d->arena); \ + if (!ptr) _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); \ + dst->data = s_ptr; \ + dst->size = size; \ + \ + if (validate_utf8) { \ + data = (uint64_t)dst; \ + UPB_MUSTTAIL return fastdecode_verifyutf8(UPB_PARSE_ARGS); \ + } else { \ + UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); \ + } + +UPB_NOINLINE +static const char* fastdecode_longstring_utf8(struct upb_Decoder* d, + const char* ptr, upb_Message* msg, + intptr_t table, uint64_t hasbits, + uint64_t data) { + upb_StringView* dst = (upb_StringView*)data; + FASTDECODE_LONGSTRING(d, ptr, msg, table, hasbits, dst, true); +} + +UPB_NOINLINE +static const char* fastdecode_longstring_noutf8( + struct upb_Decoder* d, const char* ptr, upb_Message* msg, intptr_t table, + uint64_t hasbits, uint64_t data) { + upb_StringView* dst = (upb_StringView*)data; + FASTDECODE_LONGSTRING(d, ptr, msg, table, hasbits, dst, false); +} + +UPB_FORCEINLINE +static void fastdecode_docopy(upb_Decoder* d, const char* ptr, uint32_t size, + int copy, char* data, upb_StringView* dst) { + d->arena.head.ptr += copy; + dst->data = data; + UPB_UNPOISON_MEMORY_REGION(data, copy); + memcpy(data, ptr, copy); + UPB_POISON_MEMORY_REGION(data + size, copy - size); +} + +#define FASTDECODE_COPYSTRING(d, ptr, msg, table, hasbits, data, tagbytes, \ + card, validate_utf8) \ + upb_StringView* dst; \ + fastdecode_arr farr; \ + int64_t size; \ + size_t arena_has; \ + size_t common_has; \ + char* buf; \ + \ + UPB_ASSERT(!upb_EpsCopyInputStream_AliasingAvailable(&d->input, ptr, 0)); \ + UPB_ASSERT(fastdecode_checktag(data, tagbytes)); \ + \ + dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, \ + sizeof(upb_StringView), card); \ + \ + again: \ + if (card == CARD_r) { \ + dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_StringView)); \ + } \ + \ + size = (uint8_t)ptr[tagbytes]; \ + ptr += tagbytes + 1; \ + dst->size = size; \ + \ + buf = d->arena.head.ptr; \ + arena_has = _upb_ArenaHas(&d->arena); \ + common_has = UPB_MIN(arena_has, \ + upb_EpsCopyInputStream_BytesAvailable(&d->input, ptr)); \ + \ + if (UPB_LIKELY(size <= 15 - tagbytes)) { \ + if (arena_has < 16) goto longstr; \ + d->arena.head.ptr += 16; \ + memcpy(buf, ptr - tagbytes - 1, 16); \ + dst->data = buf + tagbytes + 1; \ + } else if (UPB_LIKELY(size <= 32)) { \ + if (UPB_UNLIKELY(common_has < 32)) goto longstr; \ + fastdecode_docopy(d, ptr, size, 32, buf, dst); \ + } else if (UPB_LIKELY(size <= 64)) { \ + if (UPB_UNLIKELY(common_has < 64)) goto longstr; \ + fastdecode_docopy(d, ptr, size, 64, buf, dst); \ + } else if (UPB_LIKELY(size < 128)) { \ + if (UPB_UNLIKELY(common_has < 128)) goto longstr; \ + fastdecode_docopy(d, ptr, size, 128, buf, dst); \ + } else { \ + goto longstr; \ + } \ + \ + ptr += size; \ + \ + if (card == CARD_r) { \ + if (validate_utf8 && \ + !_upb_Decoder_VerifyUtf8Inline(dst->data, dst->size)) { \ + _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_BadUtf8); \ + } \ + fastdecode_nextret ret = fastdecode_nextrepeated( \ + d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_StringView)); \ + switch (ret.next) { \ + case FD_NEXT_SAMEFIELD: \ + dst = ret.dst; \ + goto again; \ + case FD_NEXT_OTHERFIELD: \ + data = ret.tag; \ + UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); \ + case FD_NEXT_ATLIMIT: \ + return ptr; \ + } \ + } \ + \ + if (card != CARD_r && validate_utf8) { \ + data = (uint64_t)dst; \ + UPB_MUSTTAIL return fastdecode_verifyutf8(UPB_PARSE_ARGS); \ + } \ + \ + UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); \ + \ + longstr: \ + if (card == CARD_r) { \ + fastdecode_commitarr(dst + 1, &farr, sizeof(upb_StringView)); \ + } \ + ptr--; \ + if (validate_utf8) { \ + UPB_MUSTTAIL return fastdecode_longstring_utf8(d, ptr, msg, table, \ + hasbits, (uint64_t)dst); \ + } else { \ + UPB_MUSTTAIL return fastdecode_longstring_noutf8(d, ptr, msg, table, \ + hasbits, (uint64_t)dst); \ + } + +#define FASTDECODE_STRING(d, ptr, msg, table, hasbits, data, tagbytes, card, \ + copyfunc, validate_utf8) \ + upb_StringView* dst; \ + fastdecode_arr farr; \ + int64_t size; \ + \ + if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { \ + RETURN_GENERIC("string field tag mismatch\n"); \ + } \ + \ + if (UPB_UNLIKELY( \ + !upb_EpsCopyInputStream_AliasingAvailable(&d->input, ptr, 0))) { \ + UPB_MUSTTAIL return copyfunc(UPB_PARSE_ARGS); \ + } \ + \ + dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, \ + sizeof(upb_StringView), card); \ + \ + again: \ + if (card == CARD_r) { \ + dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_StringView)); \ + } \ + \ + size = (int8_t)ptr[tagbytes]; \ + ptr += tagbytes + 1; \ + \ + if (UPB_UNLIKELY( \ + !upb_EpsCopyInputStream_AliasingAvailable(&d->input, ptr, size))) { \ + ptr--; \ + if (validate_utf8) { \ + return fastdecode_longstring_utf8(d, ptr, msg, table, hasbits, \ + (uint64_t)dst); \ + } else { \ + return fastdecode_longstring_noutf8(d, ptr, msg, table, hasbits, \ + (uint64_t)dst); \ + } \ + } \ + \ + dst->data = ptr; \ + dst->size = size; \ + ptr = upb_EpsCopyInputStream_ReadStringAliased(&d->input, &dst->data, \ + dst->size); \ + \ + if (card == CARD_r) { \ + if (validate_utf8 && \ + !_upb_Decoder_VerifyUtf8Inline(dst->data, dst->size)) { \ + _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_BadUtf8); \ + } \ + fastdecode_nextret ret = fastdecode_nextrepeated( \ + d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_StringView)); \ + switch (ret.next) { \ + case FD_NEXT_SAMEFIELD: \ + dst = ret.dst; \ + goto again; \ + case FD_NEXT_OTHERFIELD: \ + data = ret.tag; \ + UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); \ + case FD_NEXT_ATLIMIT: \ + return ptr; \ + } \ + } \ + \ + if (card != CARD_r && validate_utf8) { \ + data = (uint64_t)dst; \ + UPB_MUSTTAIL return fastdecode_verifyutf8(UPB_PARSE_ARGS); \ + } \ + \ + UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); + +/* Generate all combinations: + * {p,c} x {s,o,r} x {s, b} x {1bt,2bt} */ + +#define s_VALIDATE true +#define b_VALIDATE false + +#define F(card, tagbytes, type) \ + UPB_NOINLINE \ + const char* upb_c##card##type##_##tagbytes##bt(UPB_PARSE_PARAMS) { \ + FASTDECODE_COPYSTRING(d, ptr, msg, table, hasbits, data, tagbytes, \ + CARD_##card, type##_VALIDATE); \ + } \ + const char* upb_p##card##type##_##tagbytes##bt(UPB_PARSE_PARAMS) { \ + FASTDECODE_STRING(d, ptr, msg, table, hasbits, data, tagbytes, \ + CARD_##card, upb_c##card##type##_##tagbytes##bt, \ + type##_VALIDATE); \ + } + +#define UTF8(card, tagbytes) \ + F(card, tagbytes, s) \ + F(card, tagbytes, b) + +#define TAGBYTES(card) \ + UTF8(card, 1) \ + UTF8(card, 2) + +TAGBYTES(s) +TAGBYTES(o) +TAGBYTES(r) + +#undef s_VALIDATE +#undef b_VALIDATE +#undef F +#undef TAGBYTES +#undef FASTDECODE_LONGSTRING +#undef FASTDECODE_COPYSTRING +#undef FASTDECODE_STRING + +/* message fields *************************************************************/ + +UPB_INLINE +upb_Message* decode_newmsg_ceil(upb_Decoder* d, const upb_MiniTable* l, + int msg_ceil_bytes) { + size_t size = l->size + sizeof(upb_Message_Internal); + char* msg_data; + if (UPB_LIKELY(msg_ceil_bytes > 0 && + _upb_ArenaHas(&d->arena) >= msg_ceil_bytes)) { + UPB_ASSERT(size <= (size_t)msg_ceil_bytes); + msg_data = d->arena.head.ptr; + d->arena.head.ptr += size; + UPB_UNPOISON_MEMORY_REGION(msg_data, msg_ceil_bytes); + memset(msg_data, 0, msg_ceil_bytes); + UPB_POISON_MEMORY_REGION(msg_data + size, msg_ceil_bytes - size); } else { - if (!d->table) { - upb_MdDecoder_ErrorJmp(&d->base, - "Extensions cannot have message modifiers"); - } - *msg_modifiers = mod; + msg_data = (char*)upb_Arena_Malloc(&d->arena, size); + memset(msg_data, 0, size); } + return msg_data + sizeof(upb_Message_Internal); +} + +typedef struct { + intptr_t table; + upb_Message* msg; +} fastdecode_submsgdata; +UPB_FORCEINLINE +static const char* fastdecode_tosubmsg(upb_EpsCopyInputStream* e, + const char* ptr, void* ctx) { + upb_Decoder* d = (upb_Decoder*)e; + fastdecode_submsgdata* submsg = ctx; + ptr = fastdecode_dispatch(d, ptr, submsg->msg, submsg->table, 0, 0); + UPB_ASSUME(ptr != NULL); return ptr; } -static void upb_MtDecoder_AllocateSubs(upb_MtDecoder* d, - upb_SubCounts sub_counts) { - uint32_t total_count = sub_counts.submsg_count + sub_counts.subenum_count; - size_t subs_bytes = sizeof(*d->table->subs) * total_count; - upb_MiniTableSub* subs = upb_Arena_Malloc(d->arena, subs_bytes); - upb_MdDecoder_CheckOutOfMemory(&d->base, subs); - uint32_t i = 0; - for (; i < sub_counts.submsg_count; i++) { - subs[i].submsg = &_kUpb_MiniTable_Empty; - } - if (sub_counts.subenum_count) { - upb_MiniTableField* f = d->fields; - upb_MiniTableField* end_f = f + d->table->field_count; - for (; f < end_f; f++) { - if (f->UPB_PRIVATE(descriptortype) == kUpb_FieldType_Enum) { - f->UPB_PRIVATE(submsg_index) += sub_counts.submsg_count; - } - } - for (; i < sub_counts.submsg_count + sub_counts.subenum_count; i++) { - subs[i].subenum = NULL; - } +#define FASTDECODE_SUBMSG(d, ptr, msg, table, hasbits, data, tagbytes, \ + msg_ceil_bytes, card) \ + \ + if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { \ + RETURN_GENERIC("submessage field tag mismatch\n"); \ + } \ + \ + if (--d->depth == 0) { \ + _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_MaxDepthExceeded); \ + } \ + \ + upb_Message** dst; \ + uint32_t submsg_idx = (data >> 16) & 0xff; \ + const upb_MiniTable* tablep = decode_totablep(table); \ + const upb_MiniTable* subtablep = tablep->subs[submsg_idx].submsg; \ + fastdecode_submsgdata submsg = {decode_totable(subtablep)}; \ + fastdecode_arr farr; \ + \ + if (subtablep->table_mask == (uint8_t)-1) { \ + RETURN_GENERIC("submessage doesn't have fast tables."); \ + } \ + \ + dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, \ + sizeof(upb_Message*), card); \ + \ + if (card == CARD_s) { \ + *(uint32_t*)msg |= hasbits; \ + hasbits = 0; \ + } \ + \ + again: \ + if (card == CARD_r) { \ + dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_Message*)); \ + } \ + \ + submsg.msg = *dst; \ + \ + if (card == CARD_r || UPB_LIKELY(!submsg.msg)) { \ + *dst = submsg.msg = decode_newmsg_ceil(d, subtablep, msg_ceil_bytes); \ + } \ + \ + ptr += tagbytes; \ + ptr = fastdecode_delimited(d, ptr, fastdecode_tosubmsg, &submsg); \ + \ + if (UPB_UNLIKELY(ptr == NULL || d->end_group != DECODE_NOGROUP)) { \ + _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \ + } \ + \ + if (card == CARD_r) { \ + fastdecode_nextret ret = fastdecode_nextrepeated( \ + d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_Message*)); \ + switch (ret.next) { \ + case FD_NEXT_SAMEFIELD: \ + dst = ret.dst; \ + goto again; \ + case FD_NEXT_OTHERFIELD: \ + d->depth++; \ + data = ret.tag; \ + UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); \ + case FD_NEXT_ATLIMIT: \ + d->depth++; \ + return ptr; \ + } \ + } \ + \ + d->depth++; \ + UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); + +#define F(card, tagbytes, size_ceil, ceil_arg) \ + const char* upb_p##card##m_##tagbytes##bt_max##size_ceil##b( \ + UPB_PARSE_PARAMS) { \ + FASTDECODE_SUBMSG(d, ptr, msg, table, hasbits, data, tagbytes, ceil_arg, \ + CARD_##card); \ } - d->table->subs = subs; -} -static const char* upb_MtDecoder_Parse(upb_MtDecoder* d, const char* ptr, - size_t len, void* fields, - size_t field_size, uint16_t* field_count, - upb_SubCounts* sub_counts) { - uint64_t msg_modifiers = 0; - uint32_t last_field_number = 0; - upb_MiniTableField* last_field = NULL; - bool need_dense_below = d->table != NULL; +#define SIZES(card, tagbytes) \ + F(card, tagbytes, 64, 64) \ + F(card, tagbytes, 128, 128) \ + F(card, tagbytes, 192, 192) \ + F(card, tagbytes, 256, 256) \ + F(card, tagbytes, max, -1) - d->base.end = UPB_PTRADD(ptr, len); +#define TAGBYTES(card) \ + SIZES(card, 1) \ + SIZES(card, 2) - while (ptr < d->base.end) { - char ch = *ptr++; - if (ch <= kUpb_EncodedValue_MaxField) { - if (!d->table && last_field) { - // For extensions, consume only a single field and then return. - return --ptr; - } - upb_MiniTableField* field = fields; - *field_count += 1; - fields = (char*)fields + field_size; - field->number = ++last_field_number; - last_field = field; - upb_MiniTable_SetField(d, ch, field, msg_modifiers, sub_counts); - } else if (kUpb_EncodedValue_MinModifier <= ch && - ch <= kUpb_EncodedValue_MaxModifier) { - ptr = upb_MtDecoder_ParseModifier(d, ptr, ch, last_field, &msg_modifiers); - if (msg_modifiers & kUpb_MessageModifier_IsExtendable) { - d->table->ext |= kUpb_ExtMode_Extendable; - } - } else if (ch == kUpb_EncodedValue_End) { - if (!d->table) { - upb_MdDecoder_ErrorJmp(&d->base, "Extensions cannot have oneofs."); - } - ptr = upb_MtDecoder_DecodeOneofs(d, ptr); - } else if (kUpb_EncodedValue_MinSkip <= ch && - ch <= kUpb_EncodedValue_MaxSkip) { - if (need_dense_below) { - d->table->dense_below = d->table->field_count; - need_dense_below = false; - } - uint32_t skip; - ptr = upb_MdDecoder_DecodeBase92Varint(&d->base, ptr, ch, - kUpb_EncodedValue_MinSkip, - kUpb_EncodedValue_MaxSkip, &skip); - last_field_number += skip; - last_field_number--; // Next field seen will increment. - } else { - upb_MdDecoder_ErrorJmp(&d->base, "Invalid char: %c", ch); - } - } +TAGBYTES(s) +TAGBYTES(o) +TAGBYTES(r) - if (need_dense_below) { - d->table->dense_below = d->table->field_count; - } +#undef TAGBYTES +#undef SIZES +#undef F +#undef FASTDECODE_SUBMSG - return ptr; -} +#endif /* UPB_FASTTABLE */ -static void upb_MtDecoder_ParseMessage(upb_MtDecoder* d, const char* data, - size_t len) { - // Buffer length is an upper bound on the number of fields. We will return - // what we don't use. - d->fields = upb_Arena_Malloc(d->arena, sizeof(*d->fields) * len); - upb_MdDecoder_CheckOutOfMemory(&d->base, d->fields); +// We encode backwards, to avoid pre-computing lengths (one-pass encode). - upb_SubCounts sub_counts = {0, 0}; - d->table->field_count = 0; - d->table->fields = d->fields; - upb_MtDecoder_Parse(d, data, len, d->fields, sizeof(*d->fields), - &d->table->field_count, &sub_counts); - upb_Arena_ShrinkLast(d->arena, d->fields, sizeof(*d->fields) * len, - sizeof(*d->fields) * d->table->field_count); - d->table->fields = d->fields; - upb_MtDecoder_AllocateSubs(d, sub_counts); -} +#include -int upb_MtDecoder_CompareFields(const void* _a, const void* _b) { - const upb_LayoutItem* a = _a; - const upb_LayoutItem* b = _b; - // Currently we just sort by: - // 1. rep (smallest fields first) - // 2. type (oneof cases first) - // 2. field_index (smallest numbers first) - // The main goal of this is to reduce space lost to padding. - // Later we may have more subtle reasons to prefer a different ordering. - const int rep_bits = upb_Log2Ceiling(kUpb_FieldRep_Max); - const int type_bits = upb_Log2Ceiling(kUpb_LayoutItemType_Max); - const int idx_bits = (sizeof(a->field_index) * 8); - UPB_ASSERT(idx_bits + rep_bits + type_bits < 32); -#define UPB_COMBINE(rep, ty, idx) (((rep << type_bits) | ty) << idx_bits) | idx - uint32_t a_packed = UPB_COMBINE(a->rep, a->type, a->field_index); - uint32_t b_packed = UPB_COMBINE(b->rep, b->type, b->field_index); - assert(a_packed != b_packed); -#undef UPB_COMBINE - return a_packed < b_packed ? -1 : 1; -} -static bool upb_MtDecoder_SortLayoutItems(upb_MtDecoder* d) { - // Add items for all non-oneof fields (oneofs were already added). - int n = d->table->field_count; - for (int i = 0; i < n; i++) { - upb_MiniTableField* f = &d->fields[i]; - if (f->offset >= kOneofBase) continue; - upb_LayoutItem item = {.field_index = i, - .rep = f->mode >> kUpb_FieldRep_Shift, - .type = kUpb_LayoutItemType_Field}; - upb_MtDecoder_PushItem(d, item); - } +// Must be last. + +#define UPB_PB_VARINT_MAX_LEN 10 + +UPB_NOINLINE +static size_t encode_varint64(uint64_t val, char* buf) { + size_t i = 0; + do { + uint8_t byte = val & 0x7fU; + val >>= 7; + if (val) byte |= 0x80U; + buf[i++] = byte; + } while (val); + return i; +} - if (d->vec.size) { - qsort(d->vec.data, d->vec.size, sizeof(*d->vec.data), - upb_MtDecoder_CompareFields); - } +static uint32_t encode_zz32(int32_t n) { + return ((uint32_t)n << 1) ^ (n >> 31); +} +static uint64_t encode_zz64(int64_t n) { + return ((uint64_t)n << 1) ^ (n >> 63); +} - return true; +typedef struct { + upb_EncodeStatus status; + jmp_buf err; + upb_Arena* arena; + char *buf, *ptr, *limit; + int options; + int depth; + _upb_mapsorter sorter; +} upb_encstate; + +static size_t upb_roundup_pow2(size_t bytes) { + size_t ret = 128; + while (ret < bytes) { + ret *= 2; + } + return ret; } -static size_t upb_MiniTable_DivideRoundUp(size_t n, size_t d) { - return (n + d - 1) / d; +UPB_NORETURN static void encode_err(upb_encstate* e, upb_EncodeStatus s) { + UPB_ASSERT(s != kUpb_EncodeStatus_Ok); + e->status = s; + UPB_LONGJMP(e->err, 1); } -static void upb_MtDecoder_AssignHasbits(upb_MtDecoder* d) { - upb_MiniTable* ret = d->table; - int n = ret->field_count; - int last_hasbit = 0; // 0 cannot be used. +UPB_NOINLINE +static void encode_growbuffer(upb_encstate* e, size_t bytes) { + size_t old_size = e->limit - e->buf; + size_t new_size = upb_roundup_pow2(bytes + (e->limit - e->ptr)); + char* new_buf = upb_Arena_Realloc(e->arena, e->buf, old_size, new_size); - // First assign required fields, which must have the lowest hasbits. - for (int i = 0; i < n; i++) { - upb_MiniTableField* field = (upb_MiniTableField*)&ret->fields[i]; - if (field->offset == kRequiredPresence) { - field->presence = ++last_hasbit; - } else if (field->offset == kNoPresence) { - field->presence = 0; - } - } - ret->required_count = last_hasbit; + if (!new_buf) encode_err(e, kUpb_EncodeStatus_OutOfMemory); - if (ret->required_count > 63) { - upb_MdDecoder_ErrorJmp(&d->base, "Too many required fields"); + // We want previous data at the end, realloc() put it at the beginning. + // TODO(salo): This is somewhat inefficient since we are copying twice. + // Maybe create a realloc() that copies to the end of the new buffer? + if (old_size > 0) { + memmove(new_buf + new_size - old_size, e->buf, old_size); } - // Next assign non-required hasbit fields. - for (int i = 0; i < n; i++) { - upb_MiniTableField* field = (upb_MiniTableField*)&ret->fields[i]; - if (field->offset == kHasbitPresence) { - field->presence = ++last_hasbit; - } - } + e->ptr = new_buf + new_size - (e->limit - e->ptr); + e->limit = new_buf + new_size; + e->buf = new_buf; - ret->size = last_hasbit ? upb_MiniTable_DivideRoundUp(last_hasbit + 1, 8) : 0; + e->ptr -= bytes; } -size_t upb_MtDecoder_Place(upb_MtDecoder* d, upb_FieldRep rep) { - size_t size = upb_MtDecoder_SizeOfRep(rep, d->platform); - size_t align = upb_MtDecoder_AlignOfRep(rep, d->platform); - size_t ret = UPB_ALIGN_UP(d->table->size, align); - static const size_t max = UINT16_MAX; - size_t new_size = ret + size; - if (new_size > max) { - upb_MdDecoder_ErrorJmp( - &d->base, "Message size exceeded maximum size of %zu bytes", max); +/* Call to ensure that at least "bytes" bytes are available for writing at + * e->ptr. Returns false if the bytes could not be allocated. */ +UPB_FORCEINLINE +static void encode_reserve(upb_encstate* e, size_t bytes) { + if ((size_t)(e->ptr - e->buf) < bytes) { + encode_growbuffer(e, bytes); + return; } - d->table->size = new_size; - return ret; -} - -static void upb_MtDecoder_AssignOffsets(upb_MtDecoder* d) { - upb_LayoutItem* end = UPB_PTRADD(d->vec.data, d->vec.size); - // Compute offsets. - for (upb_LayoutItem* item = d->vec.data; item < end; item++) { - item->offset = upb_MtDecoder_Place(d, item->rep); - } + e->ptr -= bytes; +} - // Assign oneof case offsets. We must do these first, since assigning - // actual offsets will overwrite the links of the linked list. - for (upb_LayoutItem* item = d->vec.data; item < end; item++) { - if (item->type != kUpb_LayoutItemType_OneofCase) continue; - upb_MiniTableField* f = &d->fields[item->field_index]; - while (true) { - f->presence = ~item->offset; - if (f->offset == kUpb_LayoutItem_IndexSentinel) break; - UPB_ASSERT(f->offset - kOneofBase < d->table->field_count); - f = &d->fields[f->offset - kOneofBase]; - } - } +/* Writes the given bytes to the buffer, handling reserve/advance. */ +static void encode_bytes(upb_encstate* e, const void* data, size_t len) { + if (len == 0) return; /* memcpy() with zero size is UB */ + encode_reserve(e, len); + memcpy(e->ptr, data, len); +} - // Assign offsets. - for (upb_LayoutItem* item = d->vec.data; item < end; item++) { - upb_MiniTableField* f = &d->fields[item->field_index]; - switch (item->type) { - case kUpb_LayoutItemType_OneofField: - while (true) { - uint16_t next_offset = f->offset; - f->offset = item->offset; - if (next_offset == kUpb_LayoutItem_IndexSentinel) break; - f = &d->fields[next_offset - kOneofBase]; - } - break; - case kUpb_LayoutItemType_Field: - f->offset = item->offset; - break; - default: - break; - } - } +static void encode_fixed64(upb_encstate* e, uint64_t val) { + val = _upb_BigEndian_Swap64(val); + encode_bytes(e, &val, sizeof(uint64_t)); +} - // The fasttable parser (supported on 64-bit only) depends on this being a - // multiple of 8 in order to satisfy UPB_MALLOC_ALIGN, which is also 8. - // - // On 32-bit we could potentially make this smaller, but there is no - // compelling reason to optimize this right now. - d->table->size = UPB_ALIGN_UP(d->table->size, 8); +static void encode_fixed32(upb_encstate* e, uint32_t val) { + val = _upb_BigEndian_Swap32(val); + encode_bytes(e, &val, sizeof(uint32_t)); } -static void upb_MtDecoder_ValidateEntryField(upb_MtDecoder* d, - const upb_MiniTableField* f, - uint32_t expected_num) { - const char* name = expected_num == 1 ? "key" : "val"; - if (f->number != expected_num) { - upb_MdDecoder_ErrorJmp(&d->base, - "map %s did not have expected number (%d vs %d)", - name, expected_num, (int)f->number); - } +UPB_NOINLINE +static void encode_longvarint(upb_encstate* e, uint64_t val) { + size_t len; + char* start; - if (upb_IsRepeatedOrMap(f)) { - upb_MdDecoder_ErrorJmp( - &d->base, "map %s cannot be repeated or map, or be in oneof", name); - } + encode_reserve(e, UPB_PB_VARINT_MAX_LEN); + len = encode_varint64(val, e->ptr); + start = e->ptr + UPB_PB_VARINT_MAX_LEN - len; + memmove(start, e->ptr, len); + e->ptr = start; +} - uint32_t not_ok_types; - if (expected_num == 1) { - not_ok_types = (1 << kUpb_FieldType_Float) | (1 << kUpb_FieldType_Double) | - (1 << kUpb_FieldType_Message) | (1 << kUpb_FieldType_Group) | - (1 << kUpb_FieldType_Bytes) | (1 << kUpb_FieldType_Enum); +UPB_FORCEINLINE +static void encode_varint(upb_encstate* e, uint64_t val) { + if (val < 128 && e->ptr != e->buf) { + --e->ptr; + *e->ptr = val; } else { - not_ok_types = 1 << kUpb_FieldType_Group; - } - - if ((1 << upb_MiniTableField_Type(f)) & not_ok_types) { - upb_MdDecoder_ErrorJmp(&d->base, "map %s cannot have type %d", name, - (int)f->UPB_PRIVATE(descriptortype)); + encode_longvarint(e, val); } } -static void upb_MtDecoder_ParseMap(upb_MtDecoder* d, const char* data, - size_t len) { - upb_MtDecoder_ParseMessage(d, data, len); - upb_MtDecoder_AssignHasbits(d); +static void encode_double(upb_encstate* e, double d) { + uint64_t u64; + UPB_ASSERT(sizeof(double) == sizeof(uint64_t)); + memcpy(&u64, &d, sizeof(uint64_t)); + encode_fixed64(e, u64); +} - if (UPB_UNLIKELY(d->table->field_count != 2)) { - upb_MdDecoder_ErrorJmp(&d->base, "%hu fields in map", - d->table->field_count); - UPB_UNREACHABLE(); - } +static void encode_float(upb_encstate* e, float d) { + uint32_t u32; + UPB_ASSERT(sizeof(float) == sizeof(uint32_t)); + memcpy(&u32, &d, sizeof(uint32_t)); + encode_fixed32(e, u32); +} - upb_LayoutItem* end = UPB_PTRADD(d->vec.data, d->vec.size); - for (upb_LayoutItem* item = d->vec.data; item < end; item++) { - if (item->type == kUpb_LayoutItemType_OneofCase) { - upb_MdDecoder_ErrorJmp(&d->base, "Map entry cannot have oneof"); - } - } +static void encode_tag(upb_encstate* e, uint32_t field_number, + uint8_t wire_type) { + encode_varint(e, (field_number << 3) | wire_type); +} - upb_MtDecoder_ValidateEntryField(d, &d->table->fields[0], 1); - upb_MtDecoder_ValidateEntryField(d, &d->table->fields[1], 2); +static void encode_fixedarray(upb_encstate* e, const upb_Array* arr, + size_t elem_size, uint32_t tag) { + size_t bytes = arr->size * elem_size; + const char* data = _upb_array_constptr(arr); + const char* ptr = data + bytes - elem_size; - // Map entries have a pre-determined layout, regardless of types. - // NOTE: sync with mini_table/message_internal.h. - const size_t kv_size = d->platform == kUpb_MiniTablePlatform_32Bit ? 8 : 16; - const size_t hasbit_size = 8; - d->fields[0].offset = hasbit_size; - d->fields[1].offset = hasbit_size + kv_size; - d->table->size = UPB_ALIGN_UP(hasbit_size + kv_size + kv_size, 8); + if (tag || !_upb_IsLittleEndian()) { + while (true) { + if (elem_size == 4) { + uint32_t val; + memcpy(&val, ptr, sizeof(val)); + val = _upb_BigEndian_Swap32(val); + encode_bytes(e, &val, elem_size); + } else { + UPB_ASSERT(elem_size == 8); + uint64_t val; + memcpy(&val, ptr, sizeof(val)); + val = _upb_BigEndian_Swap64(val); + encode_bytes(e, &val, elem_size); + } - // Map entries have a special bit set to signal it's a map entry, used in - // upb_MiniTable_SetSubMessage() below. - d->table->ext |= kUpb_ExtMode_IsMapEntry; + if (tag) encode_varint(e, tag); + if (ptr == data) break; + ptr -= elem_size; + } + } else { + encode_bytes(e, data, bytes); + } } -static void upb_MtDecoder_ParseMessageSet(upb_MtDecoder* d, const char* data, - size_t len) { - if (len > 0) { - upb_MdDecoder_ErrorJmp(&d->base, "Invalid message set encode length: %zu", - len); - } +static void encode_message(upb_encstate* e, const upb_Message* msg, + const upb_MiniTable* m, size_t* size); - upb_MiniTable* ret = d->table; - ret->size = 0; - ret->field_count = 0; - ret->ext = kUpb_ExtMode_IsMessageSet; - ret->dense_below = 0; - ret->table_mask = -1; - ret->required_count = 0; +static void encode_TaggedMessagePtr(upb_encstate* e, + upb_TaggedMessagePtr tagged, + const upb_MiniTable* m, size_t* size) { + if (upb_TaggedMessagePtr_IsEmpty(tagged)) { + m = &_kUpb_MiniTable_Empty; + } + encode_message(e, _upb_TaggedMessagePtr_GetMessage(tagged), m, size); } -static upb_MiniTable* upb_MtDecoder_DoBuildMiniTableWithBuf( - upb_MtDecoder* decoder, const char* data, size_t len, void** buf, - size_t* buf_size) { - upb_MdDecoder_CheckOutOfMemory(&decoder->base, decoder->table); - - decoder->table->size = 0; - decoder->table->field_count = 0; - decoder->table->ext = kUpb_ExtMode_NonExtendable; - decoder->table->dense_below = 0; - decoder->table->table_mask = -1; - decoder->table->required_count = 0; +static void encode_scalar(upb_encstate* e, const void* _field_mem, + const upb_MiniTableSub* subs, + const upb_MiniTableField* f) { + const char* field_mem = _field_mem; + int wire_type; - // Strip off and verify the version tag. - if (!len--) goto done; - const char vers = *data++; +#define CASE(ctype, type, wtype, encodeval) \ + { \ + ctype val = *(ctype*)field_mem; \ + encode_##type(e, encodeval); \ + wire_type = wtype; \ + break; \ + } - switch (vers) { - case kUpb_EncodedVersion_MapV1: - upb_MtDecoder_ParseMap(decoder, data, len); + switch (f->UPB_PRIVATE(descriptortype)) { + case kUpb_FieldType_Double: + CASE(double, double, kUpb_WireType_64Bit, val); + case kUpb_FieldType_Float: + CASE(float, float, kUpb_WireType_32Bit, val); + case kUpb_FieldType_Int64: + case kUpb_FieldType_UInt64: + CASE(uint64_t, varint, kUpb_WireType_Varint, val); + case kUpb_FieldType_UInt32: + CASE(uint32_t, varint, kUpb_WireType_Varint, val); + case kUpb_FieldType_Int32: + case kUpb_FieldType_Enum: + CASE(int32_t, varint, kUpb_WireType_Varint, (int64_t)val); + case kUpb_FieldType_SFixed64: + case kUpb_FieldType_Fixed64: + CASE(uint64_t, fixed64, kUpb_WireType_64Bit, val); + case kUpb_FieldType_Fixed32: + case kUpb_FieldType_SFixed32: + CASE(uint32_t, fixed32, kUpb_WireType_32Bit, val); + case kUpb_FieldType_Bool: + CASE(bool, varint, kUpb_WireType_Varint, val); + case kUpb_FieldType_SInt32: + CASE(int32_t, varint, kUpb_WireType_Varint, encode_zz32(val)); + case kUpb_FieldType_SInt64: + CASE(int64_t, varint, kUpb_WireType_Varint, encode_zz64(val)); + case kUpb_FieldType_String: + case kUpb_FieldType_Bytes: { + upb_StringView view = *(upb_StringView*)field_mem; + encode_bytes(e, view.data, view.size); + encode_varint(e, view.size); + wire_type = kUpb_WireType_Delimited; break; - - case kUpb_EncodedVersion_MessageV1: - upb_MtDecoder_ParseMessage(decoder, data, len); - upb_MtDecoder_AssignHasbits(decoder); - upb_MtDecoder_SortLayoutItems(decoder); - upb_MtDecoder_AssignOffsets(decoder); + } + case kUpb_FieldType_Group: { + size_t size; + upb_TaggedMessagePtr submsg = *(upb_TaggedMessagePtr*)field_mem; + const upb_MiniTable* subm = subs[f->UPB_PRIVATE(submsg_index)].submsg; + if (submsg == 0) { + return; + } + if (--e->depth == 0) encode_err(e, kUpb_EncodeStatus_MaxDepthExceeded); + encode_tag(e, f->number, kUpb_WireType_EndGroup); + encode_TaggedMessagePtr(e, submsg, subm, &size); + wire_type = kUpb_WireType_StartGroup; + e->depth++; break; - - case kUpb_EncodedVersion_MessageSetV1: - upb_MtDecoder_ParseMessageSet(decoder, data, len); + } + case kUpb_FieldType_Message: { + size_t size; + upb_TaggedMessagePtr submsg = *(upb_TaggedMessagePtr*)field_mem; + const upb_MiniTable* subm = subs[f->UPB_PRIVATE(submsg_index)].submsg; + if (submsg == 0) { + return; + } + if (--e->depth == 0) encode_err(e, kUpb_EncodeStatus_MaxDepthExceeded); + encode_TaggedMessagePtr(e, submsg, subm, &size); + encode_varint(e, size); + wire_type = kUpb_WireType_Delimited; + e->depth++; break; - + } default: - upb_MdDecoder_ErrorJmp(&decoder->base, "Invalid message version: %c", - vers); + UPB_UNREACHABLE(); } +#undef CASE -done: - *buf = decoder->vec.data; - *buf_size = decoder->vec.capacity * sizeof(*decoder->vec.data); - return decoder->table; + encode_tag(e, f->number, wire_type); } -static upb_MiniTable* upb_MtDecoder_BuildMiniTableWithBuf( - upb_MtDecoder* const decoder, const char* const data, const size_t len, - void** const buf, size_t* const buf_size) { - if (UPB_SETJMP(decoder->base.err) != 0) { - *buf = decoder->vec.data; - *buf_size = decoder->vec.capacity * sizeof(*decoder->vec.data); - return NULL; - } +static void encode_array(upb_encstate* e, const upb_Message* msg, + const upb_MiniTableSub* subs, + const upb_MiniTableField* f) { + const upb_Array* arr = *UPB_PTR_AT(msg, f->offset, upb_Array*); + bool packed = f->mode & kUpb_LabelFlags_IsPacked; + size_t pre_len = e->limit - e->ptr; - return upb_MtDecoder_DoBuildMiniTableWithBuf(decoder, data, len, buf, - buf_size); -} + if (arr == NULL || arr->size == 0) { + return; + } -upb_MiniTable* upb_MiniTable_BuildWithBuf(const char* data, size_t len, - upb_MiniTablePlatform platform, - upb_Arena* arena, void** buf, - size_t* buf_size, - upb_Status* status) { - upb_MtDecoder decoder = { - .base = {.status = status}, - .platform = platform, - .vec = - { - .data = *buf, - .capacity = *buf_size / sizeof(*decoder.vec.data), - .size = 0, - }, - .arena = arena, - .table = upb_Arena_Malloc(arena, sizeof(*decoder.table)), - }; +#define VARINT_CASE(ctype, encode) \ + { \ + const ctype* start = _upb_array_constptr(arr); \ + const ctype* ptr = start + arr->size; \ + uint32_t tag = packed ? 0 : (f->number << 3) | kUpb_WireType_Varint; \ + do { \ + ptr--; \ + encode_varint(e, encode); \ + if (tag) encode_varint(e, tag); \ + } while (ptr != start); \ + } \ + break; - return upb_MtDecoder_BuildMiniTableWithBuf(&decoder, data, len, buf, - buf_size); -} +#define TAG(wire_type) (packed ? 0 : (f->number << 3 | wire_type)) -static const char* upb_MtDecoder_DoBuildMiniTableExtension( - upb_MtDecoder* decoder, const char* data, size_t len, - upb_MiniTableExtension* ext, const upb_MiniTable* extendee, - upb_MiniTableSub sub) { - // If the string is non-empty then it must begin with a version tag. - if (len) { - if (*data != kUpb_EncodedVersion_ExtensionV1) { - upb_MdDecoder_ErrorJmp(&decoder->base, "Invalid ext version: %c", *data); + switch (f->UPB_PRIVATE(descriptortype)) { + case kUpb_FieldType_Double: + encode_fixedarray(e, arr, sizeof(double), TAG(kUpb_WireType_64Bit)); + break; + case kUpb_FieldType_Float: + encode_fixedarray(e, arr, sizeof(float), TAG(kUpb_WireType_32Bit)); + break; + case kUpb_FieldType_SFixed64: + case kUpb_FieldType_Fixed64: + encode_fixedarray(e, arr, sizeof(uint64_t), TAG(kUpb_WireType_64Bit)); + break; + case kUpb_FieldType_Fixed32: + case kUpb_FieldType_SFixed32: + encode_fixedarray(e, arr, sizeof(uint32_t), TAG(kUpb_WireType_32Bit)); + break; + case kUpb_FieldType_Int64: + case kUpb_FieldType_UInt64: + VARINT_CASE(uint64_t, *ptr); + case kUpb_FieldType_UInt32: + VARINT_CASE(uint32_t, *ptr); + case kUpb_FieldType_Int32: + case kUpb_FieldType_Enum: + VARINT_CASE(int32_t, (int64_t)*ptr); + case kUpb_FieldType_Bool: + VARINT_CASE(bool, *ptr); + case kUpb_FieldType_SInt32: + VARINT_CASE(int32_t, encode_zz32(*ptr)); + case kUpb_FieldType_SInt64: + VARINT_CASE(int64_t, encode_zz64(*ptr)); + case kUpb_FieldType_String: + case kUpb_FieldType_Bytes: { + const upb_StringView* start = _upb_array_constptr(arr); + const upb_StringView* ptr = start + arr->size; + do { + ptr--; + encode_bytes(e, ptr->data, ptr->size); + encode_varint(e, ptr->size); + encode_tag(e, f->number, kUpb_WireType_Delimited); + } while (ptr != start); + return; + } + case kUpb_FieldType_Group: { + const upb_TaggedMessagePtr* start = _upb_array_constptr(arr); + const upb_TaggedMessagePtr* ptr = start + arr->size; + const upb_MiniTable* subm = subs[f->UPB_PRIVATE(submsg_index)].submsg; + if (--e->depth == 0) encode_err(e, kUpb_EncodeStatus_MaxDepthExceeded); + do { + size_t size; + ptr--; + encode_tag(e, f->number, kUpb_WireType_EndGroup); + encode_TaggedMessagePtr(e, *ptr, subm, &size); + encode_tag(e, f->number, kUpb_WireType_StartGroup); + } while (ptr != start); + e->depth++; + return; + } + case kUpb_FieldType_Message: { + const upb_TaggedMessagePtr* start = _upb_array_constptr(arr); + const upb_TaggedMessagePtr* ptr = start + arr->size; + const upb_MiniTable* subm = subs[f->UPB_PRIVATE(submsg_index)].submsg; + if (--e->depth == 0) encode_err(e, kUpb_EncodeStatus_MaxDepthExceeded); + do { + size_t size; + ptr--; + encode_TaggedMessagePtr(e, *ptr, subm, &size); + encode_varint(e, size); + encode_tag(e, f->number, kUpb_WireType_Delimited); + } while (ptr != start); + e->depth++; + return; } - data++; - len--; } +#undef VARINT_CASE - uint16_t count = 0; - upb_SubCounts sub_counts = {0, 0}; - const char* ret = upb_MtDecoder_Parse(decoder, data, len, ext, sizeof(*ext), - &count, &sub_counts); - if (!ret || count != 1) return NULL; - - upb_MiniTableField* f = &ext->field; - - f->mode |= kUpb_LabelFlags_IsExtension; - f->offset = 0; - f->presence = 0; - - if (extendee->ext & kUpb_ExtMode_IsMessageSet) { - // Extensions of MessageSet must be messages. - if (!upb_IsSubMessage(f)) return NULL; - - // Extensions of MessageSet must be non-repeating. - if ((f->mode & kUpb_FieldMode_Mask) == kUpb_FieldMode_Array) return NULL; + if (packed) { + encode_varint(e, e->limit - e->ptr - pre_len); + encode_tag(e, f->number, kUpb_WireType_Delimited); } - - ext->extendee = extendee; - ext->sub = sub; - - return ret; -} - -static const char* upb_MtDecoder_BuildMiniTableExtension( - upb_MtDecoder* const decoder, const char* const data, const size_t len, - upb_MiniTableExtension* const ext, const upb_MiniTable* const extendee, - const upb_MiniTableSub sub) { - if (UPB_SETJMP(decoder->base.err) != 0) return NULL; - return upb_MtDecoder_DoBuildMiniTableExtension(decoder, data, len, ext, - extendee, sub); } -const char* _upb_MiniTableExtension_Init(const char* data, size_t len, - upb_MiniTableExtension* ext, - const upb_MiniTable* extendee, - upb_MiniTableSub sub, - upb_MiniTablePlatform platform, - upb_Status* status) { - upb_MtDecoder decoder = { - .base = {.status = status}, - .arena = NULL, - .table = NULL, - .platform = platform, - }; - - return upb_MtDecoder_BuildMiniTableExtension(&decoder, data, len, ext, - extendee, sub); +static void encode_mapentry(upb_encstate* e, uint32_t number, + const upb_MiniTable* layout, + const upb_MapEntry* ent) { + const upb_MiniTableField* key_field = &layout->fields[0]; + const upb_MiniTableField* val_field = &layout->fields[1]; + size_t pre_len = e->limit - e->ptr; + size_t size; + encode_scalar(e, &ent->data.v, layout->subs, val_field); + encode_scalar(e, &ent->data.k, layout->subs, key_field); + size = (e->limit - e->ptr) - pre_len; + encode_varint(e, size); + encode_tag(e, number, kUpb_WireType_Delimited); } -upb_MiniTableExtension* _upb_MiniTableExtension_Build( - const char* data, size_t len, const upb_MiniTable* extendee, - upb_MiniTableSub sub, upb_MiniTablePlatform platform, upb_Arena* arena, - upb_Status* status) { - upb_MiniTableExtension* ext = - upb_Arena_Malloc(arena, sizeof(upb_MiniTableExtension)); - if (UPB_UNLIKELY(!ext)) return NULL; +static void encode_map(upb_encstate* e, const upb_Message* msg, + const upb_MiniTableSub* subs, + const upb_MiniTableField* f) { + const upb_Map* map = *UPB_PTR_AT(msg, f->offset, const upb_Map*); + const upb_MiniTable* layout = subs[f->UPB_PRIVATE(submsg_index)].submsg; + UPB_ASSERT(layout->field_count == 2); - const char* ptr = _upb_MiniTableExtension_Init(data, len, ext, extendee, sub, - platform, status); - if (UPB_UNLIKELY(!ptr)) return NULL; + if (map == NULL) return; - return ext; + if (e->options & kUpb_EncodeOption_Deterministic) { + _upb_sortedmap sorted; + _upb_mapsorter_pushmap(&e->sorter, + layout->fields[0].UPB_PRIVATE(descriptortype), map, + &sorted); + upb_MapEntry ent; + while (_upb_sortedmap_next(&e->sorter, map, &sorted, &ent)) { + encode_mapentry(e, f->number, layout, &ent); + } + _upb_mapsorter_popmap(&e->sorter, &sorted); + } else { + intptr_t iter = UPB_STRTABLE_BEGIN; + upb_StringView key; + upb_value val; + while (upb_strtable_next2(&map->table, &key, &val, &iter)) { + upb_MapEntry ent; + _upb_map_fromkey(key, &ent.data.k, map->key_size); + _upb_map_fromvalue(val, &ent.data.v, map->val_size); + encode_mapentry(e, f->number, layout, &ent); + } + } } -upb_MiniTable* _upb_MiniTable_Build(const char* data, size_t len, - upb_MiniTablePlatform platform, - upb_Arena* arena, upb_Status* status) { - void* buf = NULL; - size_t size = 0; - upb_MiniTable* ret = upb_MiniTable_BuildWithBuf(data, len, platform, arena, - &buf, &size, status); - free(buf); - return ret; +static bool encode_shouldencode(upb_encstate* e, const upb_Message* msg, + const upb_MiniTableSub* subs, + const upb_MiniTableField* f) { + if (f->presence == 0) { + /* Proto3 presence or map/array. */ + const void* mem = UPB_PTR_AT(msg, f->offset, void); + switch (_upb_MiniTableField_GetRep(f)) { + case kUpb_FieldRep_1Byte: { + char ch; + memcpy(&ch, mem, 1); + return ch != 0; + } + case kUpb_FieldRep_4Byte: { + uint32_t u32; + memcpy(&u32, mem, 4); + return u32 != 0; + } + case kUpb_FieldRep_8Byte: { + uint64_t u64; + memcpy(&u64, mem, 8); + return u64 != 0; + } + case kUpb_FieldRep_StringView: { + const upb_StringView* str = (const upb_StringView*)mem; + return str->size != 0; + } + default: + UPB_UNREACHABLE(); + } + } else if (f->presence > 0) { + /* Proto2 presence: hasbit. */ + return _upb_hasbit_field(msg, f); + } else { + /* Field is in a oneof. */ + return _upb_getoneofcase_field(msg, f) == f->number; + } } - -// Must be last. - -bool upb_MiniTable_SetSubMessage(upb_MiniTable* table, - upb_MiniTableField* field, - const upb_MiniTable* sub) { - UPB_ASSERT((uintptr_t)table->fields <= (uintptr_t)field && - (uintptr_t)field < - (uintptr_t)(table->fields + table->field_count)); - UPB_ASSERT(sub); - - const bool sub_is_map = sub->ext & kUpb_ExtMode_IsMapEntry; - - switch (field->UPB_PRIVATE(descriptortype)) { - case kUpb_FieldType_Message: - if (sub_is_map) { - const bool table_is_map = table->ext & kUpb_ExtMode_IsMapEntry; - if (UPB_UNLIKELY(table_is_map)) return false; - - field->mode = (field->mode & ~kUpb_FieldMode_Mask) | kUpb_FieldMode_Map; - } +static void encode_field(upb_encstate* e, const upb_Message* msg, + const upb_MiniTableSub* subs, + const upb_MiniTableField* field) { + switch (upb_FieldMode_Get(field)) { + case kUpb_FieldMode_Array: + encode_array(e, msg, subs, field); break; - - case kUpb_FieldType_Group: - if (UPB_UNLIKELY(sub_is_map)) return false; + case kUpb_FieldMode_Map: + encode_map(e, msg, subs, field); + break; + case kUpb_FieldMode_Scalar: + encode_scalar(e, UPB_PTR_AT(msg, field->offset, void), subs, field); break; - default: - return false; + UPB_UNREACHABLE(); } - - upb_MiniTableSub* table_sub = - (void*)&table->subs[field->UPB_PRIVATE(submsg_index)]; - // TODO(haberman): Add this assert back once YouTube is updated to not call - // this function repeatedly. - // UPB_ASSERT(table_sub->submsg == &_kUpb_MiniTable_Empty); - table_sub->submsg = sub; - return true; } -bool upb_MiniTable_SetSubEnum(upb_MiniTable* table, upb_MiniTableField* field, - const upb_MiniTableEnum* sub) { - UPB_ASSERT((uintptr_t)table->fields <= (uintptr_t)field && - (uintptr_t)field < - (uintptr_t)(table->fields + table->field_count)); - UPB_ASSERT(sub); - - upb_MiniTableSub* table_sub = - (void*)&table->subs[field->UPB_PRIVATE(submsg_index)]; - table_sub->subenum = sub; - return true; +static void encode_msgset_item(upb_encstate* e, + const upb_Message_Extension* ext) { + size_t size; + encode_tag(e, kUpb_MsgSet_Item, kUpb_WireType_EndGroup); + encode_message(e, ext->data.ptr, ext->ext->sub.submsg, &size); + encode_varint(e, size); + encode_tag(e, kUpb_MsgSet_Message, kUpb_WireType_Delimited); + encode_varint(e, ext->ext->field.number); + encode_tag(e, kUpb_MsgSet_TypeId, kUpb_WireType_Varint); + encode_tag(e, kUpb_MsgSet_Item, kUpb_WireType_StartGroup); } -uint32_t upb_MiniTable_GetSubList(const upb_MiniTable* mt, - const upb_MiniTableField** subs) { - uint32_t msg_count = 0; - uint32_t enum_count = 0; - - for (int i = 0; i < mt->field_count; i++) { - const upb_MiniTableField* f = &mt->fields[i]; - if (upb_MiniTableField_CType(f) == kUpb_CType_Message) { - *subs = f; - ++subs; - msg_count++; - } +static void encode_ext(upb_encstate* e, const upb_Message_Extension* ext, + bool is_message_set) { + if (UPB_UNLIKELY(is_message_set)) { + encode_msgset_item(e, ext); + } else { + encode_field(e, &ext->data, &ext->ext->sub, &ext->ext->field); } +} - for (int i = 0; i < mt->field_count; i++) { - const upb_MiniTableField* f = &mt->fields[i]; - if (upb_MiniTableField_CType(f) == kUpb_CType_Enum) { - *subs = f; - ++subs; - enum_count++; +static void encode_message(upb_encstate* e, const upb_Message* msg, + const upb_MiniTable* m, size_t* size) { + size_t pre_len = e->limit - e->ptr; + + if ((e->options & kUpb_EncodeOption_CheckRequired) && m->required_count) { + uint64_t msg_head; + memcpy(&msg_head, msg, 8); + msg_head = _upb_BigEndian_Swap64(msg_head); + if (upb_MiniTable_requiredmask(m) & ~msg_head) { + encode_err(e, kUpb_EncodeStatus_MissingRequired); } } - return (msg_count << 16) | enum_count; -} + if ((e->options & kUpb_EncodeOption_SkipUnknown) == 0) { + size_t unknown_size; + const char* unknown = upb_Message_GetUnknown(msg, &unknown_size); -// The list of sub_tables and sub_enums must exactly match the number and order -// of sub-message fields and sub-enum fields given by upb_MiniTable_GetSubList() -// above. -bool upb_MiniTable_Link(upb_MiniTable* mt, const upb_MiniTable** sub_tables, - size_t sub_table_count, - const upb_MiniTableEnum** sub_enums, - size_t sub_enum_count) { - uint32_t msg_count = 0; - uint32_t enum_count = 0; + if (unknown) { + encode_bytes(e, unknown, unknown_size); + } + } - for (int i = 0; i < mt->field_count; i++) { - upb_MiniTableField* f = (upb_MiniTableField*)&mt->fields[i]; - if (upb_MiniTableField_CType(f) == kUpb_CType_Message) { - const upb_MiniTable* sub = sub_tables[msg_count++]; - if (msg_count > sub_table_count) return false; - if (sub != NULL) { - if (!upb_MiniTable_SetSubMessage(mt, f, sub)) return false; + if (m->ext != kUpb_ExtMode_NonExtendable) { + /* Encode all extensions together. Unlike C++, we do not attempt to keep + * these in field number order relative to normal fields or even to each + * other. */ + size_t ext_count; + const upb_Message_Extension* ext = _upb_Message_Getexts(msg, &ext_count); + if (ext_count) { + if (e->options & kUpb_EncodeOption_Deterministic) { + _upb_sortedmap sorted; + _upb_mapsorter_pushexts(&e->sorter, ext, ext_count, &sorted); + while (_upb_sortedmap_nextext(&e->sorter, &sorted, &ext)) { + encode_ext(e, ext, m->ext == kUpb_ExtMode_IsMessageSet); + } + _upb_mapsorter_popmap(&e->sorter, &sorted); + } else { + const upb_Message_Extension* end = ext + ext_count; + for (; ext != end; ext++) { + encode_ext(e, ext, m->ext == kUpb_ExtMode_IsMessageSet); + } } } } - for (int i = 0; i < mt->field_count; i++) { - upb_MiniTableField* f = (upb_MiniTableField*)&mt->fields[i]; - if (upb_MiniTableField_IsClosedEnum(f)) { - const upb_MiniTableEnum* sub = sub_enums[enum_count++]; - if (enum_count > sub_enum_count) return false; - if (sub != NULL) { - if (!upb_MiniTable_SetSubEnum(mt, f, sub)) return false; + if (m->field_count) { + const upb_MiniTableField* f = &m->fields[m->field_count]; + const upb_MiniTableField* first = &m->fields[0]; + while (f != first) { + f--; + if (encode_shouldencode(e, msg, m->subs, f)) { + encode_field(e, msg, m->subs, f); } } } - return true; -} - - -const struct upb_MiniTable _kUpb_MiniTable_Empty = { - .subs = NULL, - .fields = NULL, - .size = 0, - .field_count = 0, - .ext = kUpb_ExtMode_NonExtendable, - .dense_below = 0, - .table_mask = -1, - .required_count = 0, -}; - - - -// Must be last. - -#define EXTREG_KEY_SIZE (sizeof(upb_MiniTable*) + sizeof(uint32_t)) - -struct upb_ExtensionRegistry { - upb_Arena* arena; - upb_strtable exts; // Key is upb_MiniTable* concatenated with fieldnum. -}; - -static void extreg_key(char* buf, const upb_MiniTable* l, uint32_t fieldnum) { - memcpy(buf, &l, sizeof(l)); - memcpy(buf + sizeof(l), &fieldnum, sizeof(fieldnum)); + *size = (e->limit - e->ptr) - pre_len; } -upb_ExtensionRegistry* upb_ExtensionRegistry_New(upb_Arena* arena) { - upb_ExtensionRegistry* r = upb_Arena_Malloc(arena, sizeof(*r)); - if (!r) return NULL; - r->arena = arena; - if (!upb_strtable_init(&r->exts, 8, arena)) return NULL; - return r; -} +static upb_EncodeStatus upb_Encoder_Encode(upb_encstate* const encoder, + const void* const msg, + const upb_MiniTable* const l, + char** const buf, + size_t* const size) { + // Unfortunately we must continue to perform hackery here because there are + // code paths which blindly copy the returned pointer without bothering to + // check for errors until much later (b/235839510). So we still set *buf to + // NULL on error and we still set it to non-NULL on a successful empty result. + if (UPB_SETJMP(encoder->err) == 0) { + encode_message(encoder, msg, l, size); + *size = encoder->limit - encoder->ptr; + if (*size == 0) { + static char ch; + *buf = &ch; + } else { + UPB_ASSERT(encoder->ptr); + *buf = encoder->ptr; + } + } else { + UPB_ASSERT(encoder->status != kUpb_EncodeStatus_Ok); + *buf = NULL; + *size = 0; + } -UPB_API bool upb_ExtensionRegistry_Add(upb_ExtensionRegistry* r, - const upb_MiniTableExtension* e) { - char buf[EXTREG_KEY_SIZE]; - extreg_key(buf, e->extendee, e->field.number); - if (upb_strtable_lookup2(&r->exts, buf, EXTREG_KEY_SIZE, NULL)) return false; - return upb_strtable_insert(&r->exts, buf, EXTREG_KEY_SIZE, - upb_value_constptr(e), r->arena); + _upb_mapsorter_destroy(&encoder->sorter); + return encoder->status; } -bool upb_ExtensionRegistry_AddArray(upb_ExtensionRegistry* r, - const upb_MiniTableExtension** e, - size_t count) { - const upb_MiniTableExtension** start = e; - const upb_MiniTableExtension** end = UPB_PTRADD(e, count); - for (; e < end; e++) { - if (!upb_ExtensionRegistry_Add(r, *e)) goto failure; - } - return true; +upb_EncodeStatus upb_Encode(const void* msg, const upb_MiniTable* l, + int options, upb_Arena* arena, char** buf, + size_t* size) { + upb_encstate e; + unsigned depth = (unsigned)options >> 16; -failure: - // Back out the entries previously added. - for (end = e, e = start; e < end; e++) { - const upb_MiniTableExtension* ext = *e; - char buf[EXTREG_KEY_SIZE]; - extreg_key(buf, ext->extendee, ext->field.number); - upb_strtable_remove2(&r->exts, buf, EXTREG_KEY_SIZE, NULL); - } - return false; -} + e.status = kUpb_EncodeStatus_Ok; + e.arena = arena; + e.buf = NULL; + e.limit = NULL; + e.ptr = NULL; + e.depth = depth ? depth : kUpb_WireFormat_DefaultDepthLimit; + e.options = options; + _upb_mapsorter_init(&e.sorter); -const upb_MiniTableExtension* upb_ExtensionRegistry_Lookup( - const upb_ExtensionRegistry* r, const upb_MiniTable* t, uint32_t num) { - char buf[EXTREG_KEY_SIZE]; - upb_value v; - extreg_key(buf, t, num); - if (upb_strtable_lookup2(&r->exts, buf, EXTREG_KEY_SIZE, &v)) { - return upb_value_getconstptr(v); - } else { - return NULL; - } + return upb_Encoder_Encode(&e, msg, l, buf, size); } -#include - // Must be last. -const upb_MiniTableField* upb_MiniTable_FindFieldByNumber( - const upb_MiniTable* t, uint32_t number) { - const size_t i = ((size_t)number) - 1; // 0 wraps to SIZE_MAX - - // Ideal case: index into dense fields - if (i < t->dense_below) { - UPB_ASSERT(t->fields[i].number == number); - return &t->fields[i]; - } - - // Slow case: binary search - int lo = t->dense_below; - int hi = t->field_count - 1; - while (lo <= hi) { - int mid = (lo + hi) / 2; - uint32_t num = t->fields[mid].number; - if (num < number) { - lo = mid + 1; - continue; - } - if (num > number) { - hi = mid - 1; - continue; - } - return &t->fields[mid]; - } - return NULL; -} - -static bool upb_MiniTable_Is_Oneof(const upb_MiniTableField* f) { - return f->presence < 0; -} - -const upb_MiniTableField* upb_MiniTable_GetOneof(const upb_MiniTable* m, - const upb_MiniTableField* f) { - if (UPB_UNLIKELY(!upb_MiniTable_Is_Oneof(f))) { - return NULL; - } - const upb_MiniTableField* ptr = &m->fields[0]; - const upb_MiniTableField* end = &m->fields[m->field_count]; - while (++ptr < end) { - if (ptr->presence == (*f).presence) { - return ptr; +UPB_NOINLINE _upb_WireReader_ReadLongVarintRet +_upb_WireReader_ReadLongVarint(const char* ptr, uint64_t val) { + _upb_WireReader_ReadLongVarintRet ret = {NULL, 0}; + uint64_t byte; + int i; + for (i = 1; i < 10; i++) { + byte = (uint8_t)ptr[i]; + val += (byte - 1) << (i * 7); + if (!(byte & 0x80)) { + ret.ptr = ptr + i + 1; + ret.val = val; + return ret; } } - return NULL; + return ret; } -bool upb_MiniTable_NextOneofField(const upb_MiniTable* m, - const upb_MiniTableField** f) { - const upb_MiniTableField* ptr = *f; - const upb_MiniTableField* end = &m->fields[m->field_count]; - while (++ptr < end) { - if (ptr->presence == (*f)->presence) { - *f = ptr; - return true; - } +const char* _upb_WireReader_SkipGroup(const char* ptr, uint32_t tag, + int depth_limit, + upb_EpsCopyInputStream* stream) { + if (--depth_limit == 0) return NULL; + uint32_t end_group_tag = (tag & ~7ULL) | kUpb_WireType_EndGroup; + while (!upb_EpsCopyInputStream_IsDone(stream, &ptr)) { + uint32_t tag; + ptr = upb_WireReader_ReadTag(ptr, &tag); + if (!ptr) return NULL; + if (tag == end_group_tag) return ptr; + ptr = _upb_WireReader_SkipValue(ptr, tag, depth_limit, stream); + if (!ptr) return NULL; } - return false; + return ptr; } // This should #undef all macros #defined in def.inc diff --git a/php/ext/google/protobuf/php-upb.h b/php/ext/google/protobuf/php-upb.h index d56d7f714f..b646e10cb3 100644 --- a/php/ext/google/protobuf/php-upb.h +++ b/php/ext/google/protobuf/php-upb.h @@ -357,8 +357,8 @@ void upb_Status_VAppendErrorFormat(upb_Status* status, const char* fmt, #endif /* UPB_BASE_STATUS_H_ */ -#ifndef UPB_INTERNAL_ARRAY_INTERNAL_H_ -#define UPB_INTERNAL_ARRAY_INTERNAL_H_ +#ifndef UPB_COLLECTIONS_INTERNAL_ARRAY_H_ +#define UPB_COLLECTIONS_INTERNAL_ARRAY_H_ #include @@ -1364,7 +1364,7 @@ UPB_INLINE void _upb_array_detach(const void* msg, size_t ofs) { #endif -#endif /* UPB_INTERNAL_ARRAY_INTERNAL_H_ */ +#endif /* UPB_COLLECTIONS_INTERNAL_ARRAY_H_ */ #ifndef UPB_COLLECTIONS_MAP_H_ #define UPB_COLLECTIONS_MAP_H_ @@ -1477,8 +1477,8 @@ UPB_API upb_MessageValue upb_MapIterator_Value(const upb_Map* map, size_t iter); // EVERYTHING BELOW THIS LINE IS INTERNAL - DO NOT USE ///////////////////////// -#ifndef UPB_COLLECTIONS_MAP_INTERNAL_H_ -#define UPB_COLLECTIONS_MAP_INTERNAL_H_ +#ifndef UPB_COLLECTIONS_INTERNAL_MAP_H_ +#define UPB_COLLECTIONS_INTERNAL_MAP_H_ #ifndef UPB_HASH_STR_TABLE_H_ @@ -1914,47 +1914,18 @@ upb_Map* _upb_Map_New(upb_Arena* a, size_t key_size, size_t value_size); #endif -#endif /* UPB_COLLECTIONS_MAP_INTERNAL_H_ */ - -#ifndef UPB_BASE_LOG2_H_ -#define UPB_BASE_LOG2_H_ - -// Must be last. - -#ifdef __cplusplus -extern "C" { -#endif - -UPB_INLINE int upb_Log2Ceiling(int x) { - if (x <= 1) return 0; -#ifdef __GNUC__ - return 32 - __builtin_clz(x - 1); -#else - int lg2 = 0; - while ((1 << lg2) < x) lg2++; - return lg2; -#endif -} - -UPB_INLINE int upb_Log2CeilingSize(int x) { return 1 << upb_Log2Ceiling(x); } - -#ifdef __cplusplus -} /* extern "C" */ -#endif - - -#endif /* UPB_BASE_LOG2_H_ */ +#endif /* UPB_COLLECTIONS_INTERNAL_MAP_H_ */ // EVERYTHING BELOW THIS LINE IS INTERNAL - DO NOT USE ///////////////////////// -#ifndef UPB_COLLECTIONS_MAP_SORTER_INTERNAL_H_ -#define UPB_COLLECTIONS_MAP_SORTER_INTERNAL_H_ +#ifndef UPB_COLLECTIONS_INTERNAL_MAP_SORTER_H_ +#define UPB_COLLECTIONS_INTERNAL_MAP_SORTER_H_ #include -#ifndef UPB_MESSAGE_EXTENSION_INTERNAL_H_ -#define UPB_MESSAGE_EXTENSION_INTERNAL_H_ +#ifndef UPB_MESSAGE_INTERNAL_EXTENSION_H_ +#define UPB_MESSAGE_INTERNAL_EXTENSION_H_ // Public APIs for message operations that do not depend on the schema. @@ -2068,7 +2039,7 @@ const upb_Message_Extension* _upb_Message_Getext( #endif -#endif /* UPB_MESSAGE_EXTENSION_INTERNAL_H_ */ +#endif /* UPB_MESSAGE_INTERNAL_EXTENSION_H_ */ #ifndef UPB_MINI_TABLE_INTERNAL_MAP_ENTRY_DATA_H_ #define UPB_MINI_TABLE_INTERNAL_MAP_ENTRY_DATA_H_ @@ -2108,7 +2079,7 @@ typedef struct { // require 8-byte alignment. double d; }; - // LINT.ThenChange(//depot/google3/third_party/upb/upb/message/internal.h:internal_layout) + // LINT.ThenChange(//depot/google3/third_party/upb/upb/message/internal/message.h:internal_layout) upb_MapEntryData data; } upb_MapEntry; @@ -2182,7 +2153,36 @@ bool _upb_mapsorter_pushexts(_upb_mapsorter* s, #endif -#endif /* UPB_COLLECTIONS_MAP_SORTER_INTERNAL_H_ */ +#endif /* UPB_COLLECTIONS_INTERNAL_MAP_SORTER_H_ */ + +#ifndef UPB_BASE_LOG2_H_ +#define UPB_BASE_LOG2_H_ + +// Must be last. + +#ifdef __cplusplus +extern "C" { +#endif + +UPB_INLINE int upb_Log2Ceiling(int x) { + if (x <= 1) return 0; +#ifdef __GNUC__ + return 32 - __builtin_clz(x - 1); +#else + int lg2 = 0; + while ((1 << lg2) < x) lg2++; + return lg2; +#endif +} + +UPB_INLINE int upb_Log2CeilingSize(int x) { return 1 << upb_Log2Ceiling(x); } + +#ifdef __cplusplus +} /* extern "C" */ +#endif + + +#endif /* UPB_BASE_LOG2_H_ */ #ifndef UPB_REFLECTION_DEF_H_ #define UPB_REFLECTION_DEF_H_ @@ -2280,6 +2280,10 @@ UPB_INLINE void _upb_msg_map_set_value(void* msg, const void* val, #define UPB_MESSAGE_ACCESSORS_H_ +#ifndef UPB_MESSAGE_INTERNAL_ACCESSORS_H_ +#define UPB_MESSAGE_INTERNAL_ACCESSORS_H_ + + /* ** Our memory representation for parsing tables and messages themselves. ** Functions in this file are used by generated code and possibly reflection. @@ -2459,10 +2463,6 @@ bool _upb_Message_AddUnknown(upb_Message* msg, const char* data, size_t len, #endif /* UPB_MESSAGE_INTERNAL_H_ */ -#ifndef UPB_MESSAGE_INTERNAL_ACCESSORS_H_ -#define UPB_MESSAGE_INTERNAL_ACCESSORS_H_ - - // Must be last. #if defined(__GNUC__) && !defined(__clang__) @@ -11952,123 +11952,53 @@ UPB_INLINE const char* upb_WireReader_SkipValue( #endif // UPB_WIRE_READER_H_ -#ifndef UPB_REFLECTION_DEF_BUILDER_INTERNAL_H_ -#define UPB_REFLECTION_DEF_BUILDER_INTERNAL_H_ +#ifndef UPB_MINI_DESCRIPTOR_INTERNAL_DECODER_H_ +#define UPB_MINI_DESCRIPTOR_INTERNAL_DECODER_H_ -// Must be last. +#ifndef UPB_MINI_DESCRIPTOR_INTERNAL_BASE92_H_ +#define UPB_MINI_DESCRIPTOR_INTERNAL_BASE92_H_ -// We want to copy the options verbatim into the destination options proto. -// We use serialize+parse as our deep copy. -#define UPB_DEF_SET_OPTIONS(target, desc_type, options_type, proto) \ - if (UPB_DESC(desc_type##_has_options)(proto)) { \ - size_t size; \ - char* pb = UPB_DESC(options_type##_serialize)( \ - UPB_DESC(desc_type##_options)(proto), ctx->tmp_arena, &size); \ - if (!pb) _upb_DefBuilder_OomErr(ctx); \ - target = \ - UPB_DESC(options_type##_parse)(pb, size, _upb_DefBuilder_Arena(ctx)); \ - if (!target) _upb_DefBuilder_OomErr(ctx); \ - } else { \ - target = (const UPB_DESC(options_type)*)kUpbDefOptDefault; \ - } + +// Must be last. #ifdef __cplusplus extern "C" { #endif -struct upb_DefBuilder { - upb_DefPool* symtab; - upb_FileDef* file; // File we are building. - upb_Arena* arena; // Allocate defs here. - upb_Arena* tmp_arena; // For temporary allocations. - upb_Status* status; // Record errors here. - const upb_MiniTableFile* layout; // NULL if we should build layouts. - upb_MiniTablePlatform platform; // Platform we are targeting. - int enum_count; // Count of enums built so far. - int msg_count; // Count of messages built so far. - int ext_count; // Count of extensions built so far. - jmp_buf err; // longjmp() on error. -}; - -extern const char* kUpbDefOptDefault; - -// ctx->status has already been set elsewhere so just fail/longjmp() -UPB_NORETURN void _upb_DefBuilder_FailJmp(upb_DefBuilder* ctx); - -UPB_NORETURN void _upb_DefBuilder_Errf(upb_DefBuilder* ctx, const char* fmt, - ...) UPB_PRINTF(2, 3); -UPB_NORETURN void _upb_DefBuilder_OomErr(upb_DefBuilder* ctx); - -const char* _upb_DefBuilder_MakeFullName(upb_DefBuilder* ctx, - const char* prefix, - upb_StringView name); - -// Given a symbol and the base symbol inside which it is defined, -// find the symbol's definition. -const void* _upb_DefBuilder_ResolveAny(upb_DefBuilder* ctx, - const char* from_name_dbg, - const char* base, upb_StringView sym, - upb_deftype_t* type); - -const void* _upb_DefBuilder_Resolve(upb_DefBuilder* ctx, - const char* from_name_dbg, const char* base, - upb_StringView sym, upb_deftype_t type); - -char _upb_DefBuilder_ParseEscape(upb_DefBuilder* ctx, const upb_FieldDef* f, - const char** src, const char* end); - -const char* _upb_DefBuilder_FullToShort(const char* fullname); - -UPB_INLINE void* _upb_DefBuilder_Alloc(upb_DefBuilder* ctx, size_t bytes) { - if (bytes == 0) return NULL; - void* ret = upb_Arena_Malloc(ctx->arena, bytes); - if (!ret) _upb_DefBuilder_OomErr(ctx); - return ret; -} - -// Adds a symbol |v| to the symtab, which must be a def pointer previously -// packed with pack_def(). The def's pointer to upb_FileDef* must be set before -// adding, so we know which entries to remove if building this file fails. -UPB_INLINE void _upb_DefBuilder_Add(upb_DefBuilder* ctx, const char* name, - upb_value v) { - upb_StringView sym = {.data = name, .size = strlen(name)}; - bool ok = _upb_DefPool_InsertSym(ctx->symtab, sym, v, ctx->status); - if (!ok) _upb_DefBuilder_FailJmp(ctx); -} - -UPB_INLINE upb_Arena* _upb_DefBuilder_Arena(const upb_DefBuilder* ctx) { - return ctx->arena; +UPB_INLINE char _upb_ToBase92(int8_t ch) { + extern const char _kUpb_ToBase92[]; + UPB_ASSERT(0 <= ch && ch < 92); + return _kUpb_ToBase92[ch]; } -UPB_INLINE upb_FileDef* _upb_DefBuilder_File(const upb_DefBuilder* ctx) { - return ctx->file; +UPB_INLINE char _upb_FromBase92(uint8_t ch) { + extern const int8_t _kUpb_FromBase92[]; + if (' ' > ch || ch > '~') return -1; + return _kUpb_FromBase92[ch - ' ']; } -// This version of CheckIdent() is only called by other, faster versions after -// they detect a parsing error. -void _upb_DefBuilder_CheckIdentSlow(upb_DefBuilder* ctx, upb_StringView name, - bool full); - -// Verify a full identifier string. This is slightly more complicated than -// verifying a relative identifier string because we must track '.' chars. -UPB_INLINE void _upb_DefBuilder_CheckIdentFull(upb_DefBuilder* ctx, - upb_StringView name) { - bool good = name.size > 0; - bool start = true; - - for (size_t i = 0; i < name.size; i++) { - const char c = name.data[i]; - const char d = c | 0x20; // force lowercase - const bool is_alpha = (('a' <= d) & (d <= 'z')) | (c == '_'); - const bool is_numer = ('0' <= c) & (c <= '9') & !start; - const bool is_dot = (c == '.') & !start; - - good &= is_alpha | is_numer | is_dot; - start = is_dot; +UPB_INLINE const char* _upb_Base92_DecodeVarint(const char* ptr, + const char* end, char first_ch, + uint8_t min, uint8_t max, + uint32_t* out_val) { + uint32_t val = 0; + uint32_t shift = 0; + const int bits_per_char = + upb_Log2Ceiling(_upb_FromBase92(max) - _upb_FromBase92(min)); + char ch = first_ch; + while (1) { + uint32_t bits = _upb_FromBase92(ch) - _upb_FromBase92(min); + val |= bits << shift; + if (ptr == end || *ptr < min || max < *ptr) { + *out_val = val; + UPB_ASSUME(ptr != NULL); + return ptr; + } + ch = *ptr++; + shift += bits_per_char; + if (shift >= 32) return NULL; } - - if (!good) _upb_DefBuilder_CheckIdentSlow(ctx, name, true); } #ifdef __cplusplus @@ -12076,34 +12006,364 @@ UPB_INLINE void _upb_DefBuilder_CheckIdentFull(upb_DefBuilder* ctx, #endif -#endif /* UPB_REFLECTION_DEF_BUILDER_INTERNAL_H_ */ - -#ifndef UPB_REFLECTION_ENUM_DEF_INTERNAL_H_ -#define UPB_REFLECTION_ENUM_DEF_INTERNAL_H_ - +#endif // UPB_MINI_DESCRIPTOR_INTERNAL_BASE92_H_ // Must be last. -#ifdef __cplusplus -extern "C" { -#endif +// upb_MdDecoder: used internally for decoding MiniDescriptors for messages, +// extensions, and enums. +typedef struct { + const char* end; + upb_Status* status; + jmp_buf err; +} upb_MdDecoder; -upb_EnumDef* _upb_EnumDef_At(const upb_EnumDef* e, int i); -bool _upb_EnumDef_Insert(upb_EnumDef* e, upb_EnumValueDef* v, upb_Arena* a); -const upb_MiniTableEnum* _upb_EnumDef_MiniTable(const upb_EnumDef* e); +UPB_PRINTF(2, 3) +UPB_NORETURN UPB_INLINE void upb_MdDecoder_ErrorJmp(upb_MdDecoder* d, + const char* fmt, ...) { + if (d->status) { + va_list argp; + upb_Status_SetErrorMessage(d->status, "Error building mini table: "); + va_start(argp, fmt); + upb_Status_VAppendErrorFormat(d->status, fmt, argp); + va_end(argp); + } + UPB_LONGJMP(d->err, 1); +} -// Allocate and initialize an array of |n| enum defs. -upb_EnumDef* _upb_EnumDefs_New( - upb_DefBuilder* ctx, int n, - const UPB_DESC(EnumDescriptorProto) * const* protos, - const upb_MessageDef* containing_type); +UPB_INLINE void upb_MdDecoder_CheckOutOfMemory(upb_MdDecoder* d, + const void* ptr) { + if (!ptr) upb_MdDecoder_ErrorJmp(d, "Out of memory"); +} -#ifdef __cplusplus -} /* extern "C" */ -#endif +UPB_INLINE const char* upb_MdDecoder_DecodeBase92Varint( + upb_MdDecoder* d, const char* ptr, char first_ch, uint8_t min, uint8_t max, + uint32_t* out_val) { + ptr = _upb_Base92_DecodeVarint(ptr, d->end, first_ch, min, max, out_val); + if (!ptr) upb_MdDecoder_ErrorJmp(d, "Overlong varint"); + return ptr; +} -#endif /* UPB_REFLECTION_ENUM_DEF_INTERNAL_H_ */ +#endif // UPB_MINI_DESCRIPTOR_INTERNAL_DECODER_H_ + +#ifndef UPB_MINI_DESCRIPTOR_INTERNAL_WIRE_CONSTANTS_H_ +#define UPB_MINI_DESCRIPTOR_INTERNAL_WIRE_CONSTANTS_H_ + + +// Must be last. + +typedef enum { + kUpb_EncodedType_Double = 0, + kUpb_EncodedType_Float = 1, + kUpb_EncodedType_Fixed32 = 2, + kUpb_EncodedType_Fixed64 = 3, + kUpb_EncodedType_SFixed32 = 4, + kUpb_EncodedType_SFixed64 = 5, + kUpb_EncodedType_Int32 = 6, + kUpb_EncodedType_UInt32 = 7, + kUpb_EncodedType_SInt32 = 8, + kUpb_EncodedType_Int64 = 9, + kUpb_EncodedType_UInt64 = 10, + kUpb_EncodedType_SInt64 = 11, + kUpb_EncodedType_OpenEnum = 12, + kUpb_EncodedType_Bool = 13, + kUpb_EncodedType_Bytes = 14, + kUpb_EncodedType_String = 15, + kUpb_EncodedType_Group = 16, + kUpb_EncodedType_Message = 17, + kUpb_EncodedType_ClosedEnum = 18, + + kUpb_EncodedType_RepeatedBase = 20, +} upb_EncodedType; + +typedef enum { + kUpb_EncodedFieldModifier_FlipPacked = 1 << 0, + kUpb_EncodedFieldModifier_IsRequired = 1 << 1, + kUpb_EncodedFieldModifier_IsProto3Singular = 1 << 2, +} upb_EncodedFieldModifier; + +enum { + kUpb_EncodedValue_MinField = ' ', + kUpb_EncodedValue_MaxField = 'I', + kUpb_EncodedValue_MinModifier = 'L', + kUpb_EncodedValue_MaxModifier = '[', + kUpb_EncodedValue_End = '^', + kUpb_EncodedValue_MinSkip = '_', + kUpb_EncodedValue_MaxSkip = '~', + kUpb_EncodedValue_OneofSeparator = '~', + kUpb_EncodedValue_FieldSeparator = '|', + kUpb_EncodedValue_MinOneofField = ' ', + kUpb_EncodedValue_MaxOneofField = 'b', + kUpb_EncodedValue_MaxEnumMask = 'A', +}; + +enum { + kUpb_EncodedVersion_EnumV1 = '!', + kUpb_EncodedVersion_ExtensionV1 = '#', + kUpb_EncodedVersion_MapV1 = '%', + kUpb_EncodedVersion_MessageV1 = '$', + kUpb_EncodedVersion_MessageSetV1 = '&', +}; + + +#endif // UPB_MINI_DESCRIPTOR_INTERNAL_WIRE_CONSTANTS_H_ + +#ifndef UPB_MINI_DESCRIPTOR_INTERNAL_MODIFIERS_H_ +#define UPB_MINI_DESCRIPTOR_INTERNAL_MODIFIERS_H_ + +// Must be last. + +typedef enum { + kUpb_FieldModifier_IsRepeated = 1 << 0, + kUpb_FieldModifier_IsPacked = 1 << 1, + kUpb_FieldModifier_IsClosedEnum = 1 << 2, + kUpb_FieldModifier_IsProto3Singular = 1 << 3, + kUpb_FieldModifier_IsRequired = 1 << 4, +} kUpb_FieldModifier; + +typedef enum { + kUpb_MessageModifier_ValidateUtf8 = 1 << 0, + kUpb_MessageModifier_DefaultIsPacked = 1 << 1, + kUpb_MessageModifier_IsExtendable = 1 << 2, +} kUpb_MessageModifier; + + +#endif // UPB_MINI_DESCRIPTOR_INTERNAL_MODIFIERS_H_ + +#ifndef UPB_MINI_DESCRIPTOR_INTERNAL_ENCODE_H_ +#define UPB_MINI_DESCRIPTOR_INTERNAL_ENCODE_H_ + + +// Must be last. + +// If the input buffer has at least this many bytes available, the encoder call +// is guaranteed to succeed (as long as field number order is maintained). +#define kUpb_MtDataEncoder_MinSize 16 + +typedef struct { + char* end; // Limit of the buffer passed as a parameter. + // Aliased to internal-only members in .cc. + char internal[32]; +} upb_MtDataEncoder; + +#ifdef __cplusplus +extern "C" { +#endif + +// Encodes field/oneof information for a given message. The sequence of calls +// should look like: +// +// upb_MtDataEncoder e; +// char buf[256]; +// char* ptr = buf; +// e.end = ptr + sizeof(buf); +// unit64_t msg_mod = ...; // bitwise & of kUpb_MessageModifiers or zero +// ptr = upb_MtDataEncoder_StartMessage(&e, ptr, msg_mod); +// // Fields *must* be in field number order. +// ptr = upb_MtDataEncoder_PutField(&e, ptr, ...); +// ptr = upb_MtDataEncoder_PutField(&e, ptr, ...); +// ptr = upb_MtDataEncoder_PutField(&e, ptr, ...); +// +// // If oneofs are present. Oneofs must be encoded after regular fields. +// ptr = upb_MiniTable_StartOneof(&e, ptr) +// ptr = upb_MiniTable_PutOneofField(&e, ptr, ...); +// ptr = upb_MiniTable_PutOneofField(&e, ptr, ...); +// +// ptr = upb_MiniTable_StartOneof(&e, ptr); +// ptr = upb_MiniTable_PutOneofField(&e, ptr, ...); +// ptr = upb_MiniTable_PutOneofField(&e, ptr, ...); +// +// Oneofs must be encoded after all regular fields. +char* upb_MtDataEncoder_StartMessage(upb_MtDataEncoder* e, char* ptr, + uint64_t msg_mod); +char* upb_MtDataEncoder_PutField(upb_MtDataEncoder* e, char* ptr, + upb_FieldType type, uint32_t field_num, + uint64_t field_mod); +char* upb_MtDataEncoder_StartOneof(upb_MtDataEncoder* e, char* ptr); +char* upb_MtDataEncoder_PutOneofField(upb_MtDataEncoder* e, char* ptr, + uint32_t field_num); + +// Encodes the set of values for a given enum. The values must be given in +// order (after casting to uint32_t), and repeats are not allowed. +char* upb_MtDataEncoder_StartEnum(upb_MtDataEncoder* e, char* ptr); +char* upb_MtDataEncoder_PutEnumValue(upb_MtDataEncoder* e, char* ptr, + uint32_t val); +char* upb_MtDataEncoder_EndEnum(upb_MtDataEncoder* e, char* ptr); + +// Encodes an entire mini descriptor for an extension. +char* upb_MtDataEncoder_EncodeExtension(upb_MtDataEncoder* e, char* ptr, + upb_FieldType type, uint32_t field_num, + uint64_t field_mod); + +// Encodes an entire mini descriptor for a map. +char* upb_MtDataEncoder_EncodeMap(upb_MtDataEncoder* e, char* ptr, + upb_FieldType key_type, + upb_FieldType value_type, uint64_t key_mod, + uint64_t value_mod); + +// Encodes an entire mini descriptor for a message set. +char* upb_MtDataEncoder_EncodeMessageSet(upb_MtDataEncoder* e, char* ptr); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + + +#endif /* UPB_MINI_DESCRIPTOR_INTERNAL_ENCODE_H_ */ + +#ifndef UPB_REFLECTION_DEF_BUILDER_INTERNAL_H_ +#define UPB_REFLECTION_DEF_BUILDER_INTERNAL_H_ + + +// Must be last. + +// We want to copy the options verbatim into the destination options proto. +// We use serialize+parse as our deep copy. +#define UPB_DEF_SET_OPTIONS(target, desc_type, options_type, proto) \ + if (UPB_DESC(desc_type##_has_options)(proto)) { \ + size_t size; \ + char* pb = UPB_DESC(options_type##_serialize)( \ + UPB_DESC(desc_type##_options)(proto), ctx->tmp_arena, &size); \ + if (!pb) _upb_DefBuilder_OomErr(ctx); \ + target = \ + UPB_DESC(options_type##_parse)(pb, size, _upb_DefBuilder_Arena(ctx)); \ + if (!target) _upb_DefBuilder_OomErr(ctx); \ + } else { \ + target = (const UPB_DESC(options_type)*)kUpbDefOptDefault; \ + } + +#ifdef __cplusplus +extern "C" { +#endif + +struct upb_DefBuilder { + upb_DefPool* symtab; + upb_FileDef* file; // File we are building. + upb_Arena* arena; // Allocate defs here. + upb_Arena* tmp_arena; // For temporary allocations. + upb_Status* status; // Record errors here. + const upb_MiniTableFile* layout; // NULL if we should build layouts. + upb_MiniTablePlatform platform; // Platform we are targeting. + int enum_count; // Count of enums built so far. + int msg_count; // Count of messages built so far. + int ext_count; // Count of extensions built so far. + jmp_buf err; // longjmp() on error. +}; + +extern const char* kUpbDefOptDefault; + +// ctx->status has already been set elsewhere so just fail/longjmp() +UPB_NORETURN void _upb_DefBuilder_FailJmp(upb_DefBuilder* ctx); + +UPB_NORETURN void _upb_DefBuilder_Errf(upb_DefBuilder* ctx, const char* fmt, + ...) UPB_PRINTF(2, 3); +UPB_NORETURN void _upb_DefBuilder_OomErr(upb_DefBuilder* ctx); + +const char* _upb_DefBuilder_MakeFullName(upb_DefBuilder* ctx, + const char* prefix, + upb_StringView name); + +// Given a symbol and the base symbol inside which it is defined, +// find the symbol's definition. +const void* _upb_DefBuilder_ResolveAny(upb_DefBuilder* ctx, + const char* from_name_dbg, + const char* base, upb_StringView sym, + upb_deftype_t* type); + +const void* _upb_DefBuilder_Resolve(upb_DefBuilder* ctx, + const char* from_name_dbg, const char* base, + upb_StringView sym, upb_deftype_t type); + +char _upb_DefBuilder_ParseEscape(upb_DefBuilder* ctx, const upb_FieldDef* f, + const char** src, const char* end); + +const char* _upb_DefBuilder_FullToShort(const char* fullname); + +UPB_INLINE void* _upb_DefBuilder_Alloc(upb_DefBuilder* ctx, size_t bytes) { + if (bytes == 0) return NULL; + void* ret = upb_Arena_Malloc(ctx->arena, bytes); + if (!ret) _upb_DefBuilder_OomErr(ctx); + return ret; +} + +// Adds a symbol |v| to the symtab, which must be a def pointer previously +// packed with pack_def(). The def's pointer to upb_FileDef* must be set before +// adding, so we know which entries to remove if building this file fails. +UPB_INLINE void _upb_DefBuilder_Add(upb_DefBuilder* ctx, const char* name, + upb_value v) { + upb_StringView sym = {.data = name, .size = strlen(name)}; + bool ok = _upb_DefPool_InsertSym(ctx->symtab, sym, v, ctx->status); + if (!ok) _upb_DefBuilder_FailJmp(ctx); +} + +UPB_INLINE upb_Arena* _upb_DefBuilder_Arena(const upb_DefBuilder* ctx) { + return ctx->arena; +} + +UPB_INLINE upb_FileDef* _upb_DefBuilder_File(const upb_DefBuilder* ctx) { + return ctx->file; +} + +// This version of CheckIdent() is only called by other, faster versions after +// they detect a parsing error. +void _upb_DefBuilder_CheckIdentSlow(upb_DefBuilder* ctx, upb_StringView name, + bool full); + +// Verify a full identifier string. This is slightly more complicated than +// verifying a relative identifier string because we must track '.' chars. +UPB_INLINE void _upb_DefBuilder_CheckIdentFull(upb_DefBuilder* ctx, + upb_StringView name) { + bool good = name.size > 0; + bool start = true; + + for (size_t i = 0; i < name.size; i++) { + const char c = name.data[i]; + const char d = c | 0x20; // force lowercase + const bool is_alpha = (('a' <= d) & (d <= 'z')) | (c == '_'); + const bool is_numer = ('0' <= c) & (c <= '9') & !start; + const bool is_dot = (c == '.') & !start; + + good &= is_alpha | is_numer | is_dot; + start = is_dot; + } + + if (!good) _upb_DefBuilder_CheckIdentSlow(ctx, name, true); +} + +#ifdef __cplusplus +} /* extern "C" */ +#endif + + +#endif /* UPB_REFLECTION_DEF_BUILDER_INTERNAL_H_ */ + +#ifndef UPB_REFLECTION_ENUM_DEF_INTERNAL_H_ +#define UPB_REFLECTION_ENUM_DEF_INTERNAL_H_ + + +// Must be last. + +#ifdef __cplusplus +extern "C" { +#endif + +upb_EnumDef* _upb_EnumDef_At(const upb_EnumDef* e, int i); +bool _upb_EnumDef_Insert(upb_EnumDef* e, upb_EnumValueDef* v, upb_Arena* a); +const upb_MiniTableEnum* _upb_EnumDef_MiniTable(const upb_EnumDef* e); + +// Allocate and initialize an array of |n| enum defs. +upb_EnumDef* _upb_EnumDefs_New( + upb_DefBuilder* ctx, int n, + const UPB_DESC(EnumDescriptorProto) * const* protos, + const upb_MessageDef* containing_type); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + + +#endif /* UPB_REFLECTION_ENUM_DEF_INTERNAL_H_ */ #ifndef UPB_REFLECTION_ENUM_VALUE_DEF_INTERNAL_H_ #define UPB_REFLECTION_ENUM_VALUE_DEF_INTERNAL_H_ @@ -12240,116 +12500,35 @@ upb_MessageDef* _upb_MessageDefs_New( #endif -#endif /* UPB_REFLECTION_MESSAGE_DEF_INTERNAL_H_ */ - -#ifndef UPB_REFLECTION_SERVICE_DEF_INTERNAL_H_ -#define UPB_REFLECTION_SERVICE_DEF_INTERNAL_H_ - - -// Must be last. - -#ifdef __cplusplus -extern "C" { -#endif - -upb_ServiceDef* _upb_ServiceDef_At(const upb_ServiceDef* s, int i); - -// Allocate and initialize an array of |n| service defs. -upb_ServiceDef* _upb_ServiceDefs_New( - upb_DefBuilder* ctx, int n, - const UPB_DESC(ServiceDescriptorProto) * const* protos); - -#ifdef __cplusplus -} /* extern "C" */ -#endif - - -#endif /* UPB_REFLECTION_SERVICE_DEF_INTERNAL_H_ */ - -#ifndef UPB_REFLECTION_DESC_STATE_INTERNAL_H_ -#define UPB_REFLECTION_DESC_STATE_INTERNAL_H_ - - -#ifndef UPB_MINI_DESCRIPTOR_INTERNAL_ENCODE_H_ -#define UPB_MINI_DESCRIPTOR_INTERNAL_ENCODE_H_ - - -// Must be last. +#endif /* UPB_REFLECTION_MESSAGE_DEF_INTERNAL_H_ */ -// If the input buffer has at least this many bytes available, the encoder call -// is guaranteed to succeed (as long as field number order is maintained). -#define kUpb_MtDataEncoder_MinSize 16 +#ifndef UPB_REFLECTION_SERVICE_DEF_INTERNAL_H_ +#define UPB_REFLECTION_SERVICE_DEF_INTERNAL_H_ -typedef struct { - char* end; // Limit of the buffer passed as a parameter. - // Aliased to internal-only members in .cc. - char internal[32]; -} upb_MtDataEncoder; + +// Must be last. #ifdef __cplusplus extern "C" { #endif -// Encodes field/oneof information for a given message. The sequence of calls -// should look like: -// -// upb_MtDataEncoder e; -// char buf[256]; -// char* ptr = buf; -// e.end = ptr + sizeof(buf); -// unit64_t msg_mod = ...; // bitwise & of kUpb_MessageModifiers or zero -// ptr = upb_MtDataEncoder_StartMessage(&e, ptr, msg_mod); -// // Fields *must* be in field number order. -// ptr = upb_MtDataEncoder_PutField(&e, ptr, ...); -// ptr = upb_MtDataEncoder_PutField(&e, ptr, ...); -// ptr = upb_MtDataEncoder_PutField(&e, ptr, ...); -// -// // If oneofs are present. Oneofs must be encoded after regular fields. -// ptr = upb_MiniTable_StartOneof(&e, ptr) -// ptr = upb_MiniTable_PutOneofField(&e, ptr, ...); -// ptr = upb_MiniTable_PutOneofField(&e, ptr, ...); -// -// ptr = upb_MiniTable_StartOneof(&e, ptr); -// ptr = upb_MiniTable_PutOneofField(&e, ptr, ...); -// ptr = upb_MiniTable_PutOneofField(&e, ptr, ...); -// -// Oneofs must be encoded after all regular fields. -char* upb_MtDataEncoder_StartMessage(upb_MtDataEncoder* e, char* ptr, - uint64_t msg_mod); -char* upb_MtDataEncoder_PutField(upb_MtDataEncoder* e, char* ptr, - upb_FieldType type, uint32_t field_num, - uint64_t field_mod); -char* upb_MtDataEncoder_StartOneof(upb_MtDataEncoder* e, char* ptr); -char* upb_MtDataEncoder_PutOneofField(upb_MtDataEncoder* e, char* ptr, - uint32_t field_num); - -// Encodes the set of values for a given enum. The values must be given in -// order (after casting to uint32_t), and repeats are not allowed. -char* upb_MtDataEncoder_StartEnum(upb_MtDataEncoder* e, char* ptr); -char* upb_MtDataEncoder_PutEnumValue(upb_MtDataEncoder* e, char* ptr, - uint32_t val); -char* upb_MtDataEncoder_EndEnum(upb_MtDataEncoder* e, char* ptr); - -// Encodes an entire mini descriptor for an extension. -char* upb_MtDataEncoder_EncodeExtension(upb_MtDataEncoder* e, char* ptr, - upb_FieldType type, uint32_t field_num, - uint64_t field_mod); - -// Encodes an entire mini descriptor for a map. -char* upb_MtDataEncoder_EncodeMap(upb_MtDataEncoder* e, char* ptr, - upb_FieldType key_type, - upb_FieldType value_type, uint64_t key_mod, - uint64_t value_mod); +upb_ServiceDef* _upb_ServiceDef_At(const upb_ServiceDef* s, int i); -// Encodes an entire mini descriptor for a message set. -char* upb_MtDataEncoder_EncodeMessageSet(upb_MtDataEncoder* e, char* ptr); +// Allocate and initialize an array of |n| service defs. +upb_ServiceDef* _upb_ServiceDefs_New( + upb_DefBuilder* ctx, int n, + const UPB_DESC(ServiceDescriptorProto) * const* protos); #ifdef __cplusplus } /* extern "C" */ #endif -#endif /* UPB_MINI_DESCRIPTOR_INTERNAL_ENCODE_H_ */ +#endif /* UPB_REFLECTION_SERVICE_DEF_INTERNAL_H_ */ + +#ifndef UPB_REFLECTION_DESC_STATE_INTERNAL_H_ +#define UPB_REFLECTION_DESC_STATE_INTERNAL_H_ + // Must be last. @@ -12454,28 +12633,6 @@ upb_ExtensionRange* _upb_ExtensionRanges_New( #endif /* UPB_REFLECTION_EXTENSION_RANGE_INTERNAL_H_ */ -#ifndef UPB_MINI_DESCRIPTOR_INTERNAL_MODIFIERS_H_ -#define UPB_MINI_DESCRIPTOR_INTERNAL_MODIFIERS_H_ - -// Must be last. - -typedef enum { - kUpb_FieldModifier_IsRepeated = 1 << 0, - kUpb_FieldModifier_IsPacked = 1 << 1, - kUpb_FieldModifier_IsClosedEnum = 1 << 2, - kUpb_FieldModifier_IsProto3Singular = 1 << 3, - kUpb_FieldModifier_IsRequired = 1 << 4, -} kUpb_FieldModifier; - -typedef enum { - kUpb_MessageModifier_ValidateUtf8 = 1 << 0, - kUpb_MessageModifier_DefaultIsPacked = 1 << 1, - kUpb_MessageModifier_IsExtendable = 1 << 2, -} kUpb_MessageModifier; - - -#endif // UPB_MINI_DESCRIPTOR_INTERNAL_MODIFIERS_H_ - #ifndef UPB_REFLECTION_ONEOF_DEF_INTERNAL_H_ #define UPB_REFLECTION_ONEOF_DEF_INTERNAL_H_ @@ -12729,163 +12886,6 @@ UPB_INLINE uint32_t _upb_FastDecoder_LoadTag(const char* ptr) { #endif /* UPB_WIRE_INTERNAL_DECODE_H_ */ -#ifndef UPB_MINI_DESCRIPTOR_INTERNAL_BASE92_H_ -#define UPB_MINI_DESCRIPTOR_INTERNAL_BASE92_H_ - - -// Must be last. - -#ifdef __cplusplus -extern "C" { -#endif - -UPB_INLINE char _upb_ToBase92(int8_t ch) { - extern const char _kUpb_ToBase92[]; - UPB_ASSERT(0 <= ch && ch < 92); - return _kUpb_ToBase92[ch]; -} - -UPB_INLINE char _upb_FromBase92(uint8_t ch) { - extern const int8_t _kUpb_FromBase92[]; - if (' ' > ch || ch > '~') return -1; - return _kUpb_FromBase92[ch - ' ']; -} - -UPB_INLINE const char* _upb_Base92_DecodeVarint(const char* ptr, - const char* end, char first_ch, - uint8_t min, uint8_t max, - uint32_t* out_val) { - uint32_t val = 0; - uint32_t shift = 0; - const int bits_per_char = - upb_Log2Ceiling(_upb_FromBase92(max) - _upb_FromBase92(min)); - char ch = first_ch; - while (1) { - uint32_t bits = _upb_FromBase92(ch) - _upb_FromBase92(min); - val |= bits << shift; - if (ptr == end || *ptr < min || max < *ptr) { - *out_val = val; - UPB_ASSUME(ptr != NULL); - return ptr; - } - ch = *ptr++; - shift += bits_per_char; - if (shift >= 32) return NULL; - } -} - -#ifdef __cplusplus -} /* extern "C" */ -#endif - - -#endif // UPB_MINI_DESCRIPTOR_INTERNAL_BASE92_H_ - -#ifndef UPB_MINI_DESCRIPTOR_INTERNAL_WIRE_CONSTANTS_H_ -#define UPB_MINI_DESCRIPTOR_INTERNAL_WIRE_CONSTANTS_H_ - - -// Must be last. - -typedef enum { - kUpb_EncodedType_Double = 0, - kUpb_EncodedType_Float = 1, - kUpb_EncodedType_Fixed32 = 2, - kUpb_EncodedType_Fixed64 = 3, - kUpb_EncodedType_SFixed32 = 4, - kUpb_EncodedType_SFixed64 = 5, - kUpb_EncodedType_Int32 = 6, - kUpb_EncodedType_UInt32 = 7, - kUpb_EncodedType_SInt32 = 8, - kUpb_EncodedType_Int64 = 9, - kUpb_EncodedType_UInt64 = 10, - kUpb_EncodedType_SInt64 = 11, - kUpb_EncodedType_OpenEnum = 12, - kUpb_EncodedType_Bool = 13, - kUpb_EncodedType_Bytes = 14, - kUpb_EncodedType_String = 15, - kUpb_EncodedType_Group = 16, - kUpb_EncodedType_Message = 17, - kUpb_EncodedType_ClosedEnum = 18, - - kUpb_EncodedType_RepeatedBase = 20, -} upb_EncodedType; - -typedef enum { - kUpb_EncodedFieldModifier_FlipPacked = 1 << 0, - kUpb_EncodedFieldModifier_IsRequired = 1 << 1, - kUpb_EncodedFieldModifier_IsProto3Singular = 1 << 2, -} upb_EncodedFieldModifier; - -enum { - kUpb_EncodedValue_MinField = ' ', - kUpb_EncodedValue_MaxField = 'I', - kUpb_EncodedValue_MinModifier = 'L', - kUpb_EncodedValue_MaxModifier = '[', - kUpb_EncodedValue_End = '^', - kUpb_EncodedValue_MinSkip = '_', - kUpb_EncodedValue_MaxSkip = '~', - kUpb_EncodedValue_OneofSeparator = '~', - kUpb_EncodedValue_FieldSeparator = '|', - kUpb_EncodedValue_MinOneofField = ' ', - kUpb_EncodedValue_MaxOneofField = 'b', - kUpb_EncodedValue_MaxEnumMask = 'A', -}; - -enum { - kUpb_EncodedVersion_EnumV1 = '!', - kUpb_EncodedVersion_ExtensionV1 = '#', - kUpb_EncodedVersion_MapV1 = '%', - kUpb_EncodedVersion_MessageV1 = '$', - kUpb_EncodedVersion_MessageSetV1 = '&', -}; - - -#endif // UPB_MINI_DESCRIPTOR_INTERNAL_WIRE_CONSTANTS_H_ - -#ifndef UPB_MINI_DESCRIPTOR_INTERNAL_DECODER_H_ -#define UPB_MINI_DESCRIPTOR_INTERNAL_DECODER_H_ - - -// Must be last. - -// upb_MdDecoder: used internally for decoding MiniDescriptors for messages, -// extensions, and enums. -typedef struct { - const char* end; - upb_Status* status; - jmp_buf err; -} upb_MdDecoder; - -UPB_PRINTF(2, 3) -UPB_NORETURN UPB_INLINE void upb_MdDecoder_ErrorJmp(upb_MdDecoder* d, - const char* fmt, ...) { - if (d->status) { - va_list argp; - upb_Status_SetErrorMessage(d->status, "Error building mini table: "); - va_start(argp, fmt); - upb_Status_VAppendErrorFormat(d->status, fmt, argp); - va_end(argp); - } - UPB_LONGJMP(d->err, 1); -} - -UPB_INLINE void upb_MdDecoder_CheckOutOfMemory(upb_MdDecoder* d, - const void* ptr) { - if (!ptr) upb_MdDecoder_ErrorJmp(d, "Out of memory"); -} - -UPB_INLINE const char* upb_MdDecoder_DecodeBase92Varint( - upb_MdDecoder* d, const char* ptr, char first_ch, uint8_t min, uint8_t max, - uint32_t* out_val) { - ptr = _upb_Base92_DecodeVarint(ptr, d->end, first_ch, min, max, out_val); - if (!ptr) upb_MdDecoder_ErrorJmp(d, "Overlong varint"); - return ptr; -} - - -#endif // UPB_MINI_DESCRIPTOR_INTERNAL_DECODER_H_ - // This should #undef all macros #defined in def.inc #undef UPB_SIZE diff --git a/protobuf_deps.bzl b/protobuf_deps.bzl index d100b9a19e..1c622f88e9 100644 --- a/protobuf_deps.bzl +++ b/protobuf_deps.bzl @@ -150,7 +150,6 @@ def protobuf_deps(): _github_archive( name = "upb", repo = "https://github.com/protocolbuffers/upb", - commit = "58877b55e0796bcca743e9bd4d2be42092562f30", - sha256 = "b3a3279cffb91e22fd38fb316a39ded87211bf89ff39337c7f151d88ed3b71fd", - patches = ["@com_google_protobuf//build_defs:upb.patch"], + commit = "cd176a0cd1913ea8c643680d206c8ca278815c2c", + sha256 = "661b2d63dff82c6868cd1dea5e7be2ca1a6467746a146c293834f18aaa709077", ) diff --git a/ruby/ext/google/protobuf_c/ruby-upb.c b/ruby/ext/google/protobuf_c/ruby-upb.c index a2dd07e35e..1ea861b97c 100644 --- a/ruby/ext/google/protobuf_c/ruby-upb.c +++ b/ruby/ext/google/protobuf_c/ruby-upb.c @@ -373,8 +373,8 @@ void upb_Status_VAppendErrorFormat(upb_Status* status, const char* fmt, status->msg[_kUpb_Status_MaxMessage - 1] = '\0'; } -#include +#include // Must be last. @@ -609,6 +609,7 @@ upb_Map* _upb_Map_New(upb_Arena* a, size_t key_size, size_t value_size) { } + // Must be last. static void _upb_mapsorter_getkeys(const void* _a, const void* _b, void* a_key, @@ -5871,6164 +5872,5381 @@ size_t upb_Message_ExtensionCount(const upb_Message* msg) { return count; } -#include // Must be last. -/* The upb core does not generally have a concept of default instances. However - * for descriptor options we make an exception since the max size is known and - * modest (<200 bytes). All types can share a default instance since it is - * initialized to zeroes. - * - * We have to allocate an extra pointer for upb's internal metadata. */ -static const char opt_default_buf[_UPB_MAXOPT_SIZE + sizeof(void*)] = {0}; -const char* kUpbDefOptDefault = &opt_default_buf[sizeof(void*)]; - -const char* _upb_DefBuilder_FullToShort(const char* fullname) { - const char* p; - - if (fullname == NULL) { - return NULL; - } else if ((p = strrchr(fullname, '.')) == NULL) { - /* No '.' in the name, return the full string. */ - return fullname; - } else { - /* Return one past the last '.'. */ - return p + 1; - } -} - -void _upb_DefBuilder_FailJmp(upb_DefBuilder* ctx) { UPB_LONGJMP(ctx->err, 1); } - -void _upb_DefBuilder_Errf(upb_DefBuilder* ctx, const char* fmt, ...) { - va_list argp; - va_start(argp, fmt); - upb_Status_VSetErrorFormat(ctx->status, fmt, argp); - va_end(argp); - _upb_DefBuilder_FailJmp(ctx); -} +typedef struct { + upb_MdDecoder base; + upb_Arena* arena; + upb_MiniTableEnum* enum_table; + uint32_t enum_value_count; + uint32_t enum_data_count; + uint32_t enum_data_capacity; +} upb_MdEnumDecoder; -void _upb_DefBuilder_OomErr(upb_DefBuilder* ctx) { - upb_Status_SetErrorMessage(ctx->status, "out of memory"); - _upb_DefBuilder_FailJmp(ctx); +static size_t upb_MiniTableEnum_Size(size_t count) { + return sizeof(upb_MiniTableEnum) + count * sizeof(uint32_t); } -// Verify a relative identifier string. The loop is branchless for speed. -static void _upb_DefBuilder_CheckIdentNotFull(upb_DefBuilder* ctx, - upb_StringView name) { - bool good = name.size > 0; - - for (size_t i = 0; i < name.size; i++) { - const char c = name.data[i]; - const char d = c | 0x20; // force lowercase - const bool is_alpha = (('a' <= d) & (d <= 'z')) | (c == '_'); - const bool is_numer = ('0' <= c) & (c <= '9') & (i != 0); - - good &= is_alpha | is_numer; +static upb_MiniTableEnum* _upb_MiniTable_AddEnumDataMember(upb_MdEnumDecoder* d, + uint32_t val) { + if (d->enum_data_count == d->enum_data_capacity) { + size_t old_sz = upb_MiniTableEnum_Size(d->enum_data_capacity); + d->enum_data_capacity = UPB_MAX(2, d->enum_data_capacity * 2); + size_t new_sz = upb_MiniTableEnum_Size(d->enum_data_capacity); + d->enum_table = upb_Arena_Realloc(d->arena, d->enum_table, old_sz, new_sz); + upb_MdDecoder_CheckOutOfMemory(&d->base, d->enum_table); } - - if (!good) _upb_DefBuilder_CheckIdentSlow(ctx, name, false); + d->enum_table->data[d->enum_data_count++] = val; + return d->enum_table; } -const char* _upb_DefBuilder_MakeFullName(upb_DefBuilder* ctx, - const char* prefix, - upb_StringView name) { - _upb_DefBuilder_CheckIdentNotFull(ctx, name); - if (prefix) { - // ret = prefix + '.' + name; - size_t n = strlen(prefix); - char* ret = _upb_DefBuilder_Alloc(ctx, n + name.size + 2); - strcpy(ret, prefix); - ret[n] = '.'; - memcpy(&ret[n + 1], name.data, name.size); - ret[n + 1 + name.size] = '\0'; - return ret; +static void upb_MiniTableEnum_BuildValue(upb_MdEnumDecoder* d, uint32_t val) { + upb_MiniTableEnum* table = d->enum_table; + d->enum_value_count++; + if (table->value_count || (val > 512 && d->enum_value_count < val / 32)) { + if (table->value_count == 0) { + assert(d->enum_data_count == table->mask_limit / 32); + } + table = _upb_MiniTable_AddEnumDataMember(d, val); + table->value_count++; } else { - char* ret = upb_strdup2(name.data, name.size, ctx->arena); - if (!ret) _upb_DefBuilder_OomErr(ctx); - return ret; + uint32_t new_mask_limit = ((val / 32) + 1) * 32; + while (table->mask_limit < new_mask_limit) { + table = _upb_MiniTable_AddEnumDataMember(d, 0); + table->mask_limit += 32; + } + table->data[val / 32] |= 1ULL << (val % 32); } } -static bool remove_component(char* base, size_t* len) { - if (*len == 0) return false; - - for (size_t i = *len - 1; i > 0; i--) { - if (base[i] == '.') { - *len = i; - return true; +static upb_MiniTableEnum* upb_MtDecoder_DoBuildMiniTableEnum( + upb_MdEnumDecoder* d, const char* data, size_t len) { + // If the string is non-empty then it must begin with a version tag. + if (len) { + if (*data != kUpb_EncodedVersion_EnumV1) { + upb_MdDecoder_ErrorJmp(&d->base, "Invalid enum version: %c", *data); } + data++; + len--; } - *len = 0; - return true; -} + upb_MdDecoder_CheckOutOfMemory(&d->base, d->enum_table); -const void* _upb_DefBuilder_ResolveAny(upb_DefBuilder* ctx, - const char* from_name_dbg, - const char* base, upb_StringView sym, - upb_deftype_t* type) { - if (sym.size == 0) goto notfound; - upb_value v; - if (sym.data[0] == '.') { - /* Symbols starting with '.' are absolute, so we do a single lookup. - * Slice to omit the leading '.' */ - if (!_upb_DefPool_LookupSym(ctx->symtab, sym.data + 1, sym.size - 1, &v)) { - goto notfound; - } - } else { - /* Remove components from base until we find an entry or run out. */ - size_t baselen = base ? strlen(base) : 0; - char* tmp = malloc(sym.size + baselen + 1); - while (1) { - char* p = tmp; - if (baselen) { - memcpy(p, base, baselen); - p[baselen] = '.'; - p += baselen + 1; - } - memcpy(p, sym.data, sym.size); - p += sym.size; - if (_upb_DefPool_LookupSym(ctx->symtab, tmp, p - tmp, &v)) { - break; - } - if (!remove_component(tmp, &baselen)) { - free(tmp); - goto notfound; + // Guarantee at least 64 bits of mask without checking mask size. + d->enum_table->mask_limit = 64; + d->enum_table = _upb_MiniTable_AddEnumDataMember(d, 0); + d->enum_table = _upb_MiniTable_AddEnumDataMember(d, 0); + + d->enum_table->value_count = 0; + + const char* ptr = data; + uint32_t base = 0; + + while (ptr < d->base.end) { + char ch = *ptr++; + if (ch <= kUpb_EncodedValue_MaxEnumMask) { + uint32_t mask = _upb_FromBase92(ch); + for (int i = 0; i < 5; i++, base++, mask >>= 1) { + if (mask & 1) upb_MiniTableEnum_BuildValue(d, base); } + } else if (kUpb_EncodedValue_MinSkip <= ch && + ch <= kUpb_EncodedValue_MaxSkip) { + uint32_t skip; + ptr = upb_MdDecoder_DecodeBase92Varint(&d->base, ptr, ch, + kUpb_EncodedValue_MinSkip, + kUpb_EncodedValue_MaxSkip, &skip); + base += skip; + } else { + upb_MdDecoder_ErrorJmp(&d->base, "Unexpected character: %c", ch); } - free(tmp); } - *type = _upb_DefType_Type(v); - return _upb_DefType_Unpack(v, *type); - -notfound: - _upb_DefBuilder_Errf(ctx, "couldn't resolve name '" UPB_STRINGVIEW_FORMAT "'", - UPB_STRINGVIEW_ARGS(sym)); + return d->enum_table; } -const void* _upb_DefBuilder_Resolve(upb_DefBuilder* ctx, - const char* from_name_dbg, const char* base, - upb_StringView sym, upb_deftype_t type) { - upb_deftype_t found_type; - const void* ret = - _upb_DefBuilder_ResolveAny(ctx, from_name_dbg, base, sym, &found_type); - if (ret && found_type != type) { - _upb_DefBuilder_Errf(ctx, - "type mismatch when resolving %s: couldn't find " - "name " UPB_STRINGVIEW_FORMAT " with type=%d", - from_name_dbg, UPB_STRINGVIEW_ARGS(sym), (int)type); - } - return ret; +static upb_MiniTableEnum* upb_MtDecoder_BuildMiniTableEnum( + upb_MdEnumDecoder* const decoder, const char* const data, size_t const len) { + if (UPB_SETJMP(decoder->base.err) != 0) return NULL; + return upb_MtDecoder_DoBuildMiniTableEnum(decoder, data, len); } -// Per ASCII this will lower-case a letter. If the result is a letter, the -// input was definitely a letter. If the output is not a letter, this may -// have transformed the character unpredictably. -static char upb_ascii_lower(char ch) { return ch | 0x20; } +upb_MiniTableEnum* upb_MiniDescriptor_BuildEnum(const char* data, size_t len, + upb_Arena* arena, + upb_Status* status) { + upb_MdEnumDecoder decoder = { + .base = + { + .end = UPB_PTRADD(data, len), + .status = status, + }, + .arena = arena, + .enum_table = upb_Arena_Malloc(arena, upb_MiniTableEnum_Size(2)), + .enum_value_count = 0, + .enum_data_count = 0, + .enum_data_capacity = 1, + }; -// isalpha() etc. from are locale-dependent, which we don't want. -static bool upb_isbetween(uint8_t c, uint8_t low, uint8_t high) { - return low <= c && c <= high; + return upb_MtDecoder_BuildMiniTableEnum(&decoder, data, len); } -static bool upb_isletter(char c) { - char lower = upb_ascii_lower(c); - return upb_isbetween(lower, 'a', 'z') || c == '_'; -} -static bool upb_isalphanum(char c) { - return upb_isletter(c) || upb_isbetween(c, '0', '9'); -} +#include +#include -static bool TryGetChar(const char** src, const char* end, char* ch) { - if (*src == end) return false; - *ch = **src; - *src += 1; - return true; -} -static int TryGetHexDigit(const char** src, const char* end) { - char ch; - if (!TryGetChar(src, end, &ch)) return -1; - if ('0' <= ch && ch <= '9') { - return ch - '0'; - } - ch = upb_ascii_lower(ch); - if ('a' <= ch && ch <= 'f') { - return ch - 'a' + 0xa; - } - *src -= 1; // Char wasn't actually a hex digit. - return -1; -} +// Must be last. -static char upb_DefBuilder_ParseHexEscape(upb_DefBuilder* ctx, - const upb_FieldDef* f, - const char** src, const char* end) { - int hex_digit = TryGetHexDigit(src, end); - if (hex_digit < 0) { - _upb_DefBuilder_Errf( - ctx, "\\x must be followed by at least one hex digit (field='%s')", - upb_FieldDef_FullName(f)); - return 0; - } - unsigned int ret = hex_digit; - while ((hex_digit = TryGetHexDigit(src, end)) >= 0) { - ret = (ret << 4) | hex_digit; - } - if (ret > 0xff) { - _upb_DefBuilder_Errf(ctx, "Value of hex escape in field %s exceeds 8 bits", - upb_FieldDef_FullName(f)); - return 0; - } - return ret; -} - -static char TryGetOctalDigit(const char** src, const char* end) { - char ch; - if (!TryGetChar(src, end, &ch)) return -1; - if ('0' <= ch && ch <= '7') { - return ch - '0'; - } - *src -= 1; // Char wasn't actually an octal digit. - return -1; -} - -static char upb_DefBuilder_ParseOctalEscape(upb_DefBuilder* ctx, - const upb_FieldDef* f, - const char** src, const char* end) { - char ch = 0; - for (int i = 0; i < 3; i++) { - char digit; - if ((digit = TryGetOctalDigit(src, end)) >= 0) { - ch = (ch << 3) | digit; - } - } - return ch; -} - -char _upb_DefBuilder_ParseEscape(upb_DefBuilder* ctx, const upb_FieldDef* f, - const char** src, const char* end) { - char ch; - if (!TryGetChar(src, end, &ch)) { - _upb_DefBuilder_Errf(ctx, "unterminated escape sequence in field %s", - upb_FieldDef_FullName(f)); - return 0; - } - switch (ch) { - case 'a': - return '\a'; - case 'b': - return '\b'; - case 'f': - return '\f'; - case 'n': - return '\n'; - case 'r': - return '\r'; - case 't': - return '\t'; - case 'v': - return '\v'; - case '\\': - return '\\'; - case '\'': - return '\''; - case '\"': - return '\"'; - case '?': - return '\?'; - case 'x': - case 'X': - return upb_DefBuilder_ParseHexEscape(ctx, f, src, end); - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - *src -= 1; - return upb_DefBuilder_ParseOctalEscape(ctx, f, src, end); - } - _upb_DefBuilder_Errf(ctx, "Unknown escape sequence: \\%c", ch); -} +// Note: we sort by this number when calculating layout order. +typedef enum { + kUpb_LayoutItemType_OneofCase, // Oneof case. + kUpb_LayoutItemType_OneofField, // Oneof field data. + kUpb_LayoutItemType_Field, // Non-oneof field data. -void _upb_DefBuilder_CheckIdentSlow(upb_DefBuilder* ctx, upb_StringView name, - bool full) { - const char* str = name.data; - const size_t len = name.size; - bool start = true; - for (size_t i = 0; i < len; i++) { - const char c = str[i]; - if (c == '.') { - if (start || !full) { - _upb_DefBuilder_Errf( - ctx, "invalid name: unexpected '.' (" UPB_STRINGVIEW_FORMAT ")", - UPB_STRINGVIEW_ARGS(name)); - } - start = true; - } else if (start) { - if (!upb_isletter(c)) { - _upb_DefBuilder_Errf(ctx, - "invalid name: path components must start with a " - "letter (" UPB_STRINGVIEW_FORMAT ")", - UPB_STRINGVIEW_ARGS(name)); - } - start = false; - } else if (!upb_isalphanum(c)) { - _upb_DefBuilder_Errf( - ctx, - "invalid name: non-alphanumeric character (" UPB_STRINGVIEW_FORMAT - ")", - UPB_STRINGVIEW_ARGS(name)); - } - } - if (start) { - _upb_DefBuilder_Errf(ctx, - "invalid name: empty part (" UPB_STRINGVIEW_FORMAT ")", - UPB_STRINGVIEW_ARGS(name)); - } + kUpb_LayoutItemType_Max = kUpb_LayoutItemType_Field, +} upb_LayoutItemType; - // We should never reach this point. - UPB_ASSERT(false); -} +#define kUpb_LayoutItem_IndexSentinel ((uint16_t)-1) +typedef struct { + // Index of the corresponding field. When this is a oneof field, the field's + // offset will be the index of the next field in a linked list. + uint16_t field_index; + uint16_t offset; + upb_FieldRep rep; + upb_LayoutItemType type; +} upb_LayoutItem; -// Must be last. +typedef struct { + upb_LayoutItem* data; + size_t size; + size_t capacity; +} upb_LayoutItemVector; -struct upb_DefPool { - upb_Arena* arena; - upb_strtable syms; // full_name -> packed def ptr - upb_strtable files; // file_name -> (upb_FileDef*) - upb_inttable exts; // (upb_MiniTableExtension*) -> (upb_FieldDef*) - upb_ExtensionRegistry* extreg; +typedef struct { + upb_MdDecoder base; + upb_MiniTable* table; + upb_MiniTableField* fields; upb_MiniTablePlatform platform; - void* scratch_data; - size_t scratch_size; - size_t bytes_loaded; + upb_LayoutItemVector vec; + upb_Arena* arena; +} upb_MtDecoder; + +// In each field's offset, we temporarily store a presence classifier: +enum PresenceClass { + kNoPresence = 0, + kHasbitPresence = 1, + kRequiredPresence = 2, + kOneofBase = 3, + // Negative values refer to a specific oneof with that number. Positive + // values >= kOneofBase indicate that this field is in a oneof, and specify + // the next field in this oneof's linked list. }; -void upb_DefPool_Free(upb_DefPool* s) { - upb_Arena_Free(s->arena); - upb_gfree(s->scratch_data); - upb_gfree(s); +static bool upb_MtDecoder_FieldIsPackable(upb_MiniTableField* field) { + return (field->mode & kUpb_FieldMode_Array) && + upb_FieldType_IsPackable(field->UPB_PRIVATE(descriptortype)); } -upb_DefPool* upb_DefPool_New(void) { - upb_DefPool* s = upb_gmalloc(sizeof(*s)); - if (!s) return NULL; - - s->arena = upb_Arena_New(); - s->bytes_loaded = 0; - - s->scratch_size = 240; - s->scratch_data = upb_gmalloc(s->scratch_size); - if (!s->scratch_data) goto err; - - if (!upb_strtable_init(&s->syms, 32, s->arena)) goto err; - if (!upb_strtable_init(&s->files, 4, s->arena)) goto err; - if (!upb_inttable_init(&s->exts, s->arena)) goto err; - - s->extreg = upb_ExtensionRegistry_New(s->arena); - if (!s->extreg) goto err; +typedef struct { + uint16_t submsg_count; + uint16_t subenum_count; +} upb_SubCounts; - s->platform = kUpb_MiniTablePlatform_Native; +static void upb_MiniTable_SetTypeAndSub(upb_MiniTableField* field, + upb_FieldType type, + upb_SubCounts* sub_counts, + uint64_t msg_modifiers, + bool is_proto3_enum) { + if (is_proto3_enum) { + UPB_ASSERT(type == kUpb_FieldType_Enum); + type = kUpb_FieldType_Int32; + field->mode |= kUpb_LabelFlags_IsAlternate; + } else if (type == kUpb_FieldType_String && + !(msg_modifiers & kUpb_MessageModifier_ValidateUtf8)) { + type = kUpb_FieldType_Bytes; + field->mode |= kUpb_LabelFlags_IsAlternate; + } - return s; + field->UPB_PRIVATE(descriptortype) = type; -err: - upb_DefPool_Free(s); - return NULL; -} + if (upb_MtDecoder_FieldIsPackable(field) && + (msg_modifiers & kUpb_MessageModifier_DefaultIsPacked)) { + field->mode |= kUpb_LabelFlags_IsPacked; + } -bool _upb_DefPool_InsertExt(upb_DefPool* s, const upb_MiniTableExtension* ext, - const upb_FieldDef* f) { - return upb_inttable_insert(&s->exts, (uintptr_t)ext, upb_value_constptr(f), - s->arena); + if (type == kUpb_FieldType_Message || type == kUpb_FieldType_Group) { + field->UPB_PRIVATE(submsg_index) = sub_counts->submsg_count++; + } else if (type == kUpb_FieldType_Enum) { + // We will need to update this later once we know the total number of + // submsg fields. + field->UPB_PRIVATE(submsg_index) = sub_counts->subenum_count++; + } else { + field->UPB_PRIVATE(submsg_index) = kUpb_NoSub; + } } -bool _upb_DefPool_InsertSym(upb_DefPool* s, upb_StringView sym, upb_value v, - upb_Status* status) { - // TODO: table should support an operation "tryinsert" to avoid the double - // lookup. - if (upb_strtable_lookup2(&s->syms, sym.data, sym.size, NULL)) { - upb_Status_SetErrorFormat(status, "duplicate symbol '%s'", sym.data); - return false; - } - if (!upb_strtable_insert(&s->syms, sym.data, sym.size, v, s->arena)) { - upb_Status_SetErrorMessage(status, "out of memory"); - return false; - } - return true; -} +static const char kUpb_EncodedToType[] = { + [kUpb_EncodedType_Double] = kUpb_FieldType_Double, + [kUpb_EncodedType_Float] = kUpb_FieldType_Float, + [kUpb_EncodedType_Int64] = kUpb_FieldType_Int64, + [kUpb_EncodedType_UInt64] = kUpb_FieldType_UInt64, + [kUpb_EncodedType_Int32] = kUpb_FieldType_Int32, + [kUpb_EncodedType_Fixed64] = kUpb_FieldType_Fixed64, + [kUpb_EncodedType_Fixed32] = kUpb_FieldType_Fixed32, + [kUpb_EncodedType_Bool] = kUpb_FieldType_Bool, + [kUpb_EncodedType_String] = kUpb_FieldType_String, + [kUpb_EncodedType_Group] = kUpb_FieldType_Group, + [kUpb_EncodedType_Message] = kUpb_FieldType_Message, + [kUpb_EncodedType_Bytes] = kUpb_FieldType_Bytes, + [kUpb_EncodedType_UInt32] = kUpb_FieldType_UInt32, + [kUpb_EncodedType_OpenEnum] = kUpb_FieldType_Enum, + [kUpb_EncodedType_SFixed32] = kUpb_FieldType_SFixed32, + [kUpb_EncodedType_SFixed64] = kUpb_FieldType_SFixed64, + [kUpb_EncodedType_SInt32] = kUpb_FieldType_SInt32, + [kUpb_EncodedType_SInt64] = kUpb_FieldType_SInt64, + [kUpb_EncodedType_ClosedEnum] = kUpb_FieldType_Enum, +}; -static const void* _upb_DefPool_Unpack(const upb_DefPool* s, const char* sym, - size_t size, upb_deftype_t type) { - upb_value v; - return upb_strtable_lookup2(&s->syms, sym, size, &v) - ? _upb_DefType_Unpack(v, type) - : NULL; -} +static void upb_MiniTable_SetField(upb_MtDecoder* d, uint8_t ch, + upb_MiniTableField* field, + uint64_t msg_modifiers, + upb_SubCounts* sub_counts) { + static const char kUpb_EncodedToFieldRep[] = { + [kUpb_EncodedType_Double] = kUpb_FieldRep_8Byte, + [kUpb_EncodedType_Float] = kUpb_FieldRep_4Byte, + [kUpb_EncodedType_Int64] = kUpb_FieldRep_8Byte, + [kUpb_EncodedType_UInt64] = kUpb_FieldRep_8Byte, + [kUpb_EncodedType_Int32] = kUpb_FieldRep_4Byte, + [kUpb_EncodedType_Fixed64] = kUpb_FieldRep_8Byte, + [kUpb_EncodedType_Fixed32] = kUpb_FieldRep_4Byte, + [kUpb_EncodedType_Bool] = kUpb_FieldRep_1Byte, + [kUpb_EncodedType_String] = kUpb_FieldRep_StringView, + [kUpb_EncodedType_Bytes] = kUpb_FieldRep_StringView, + [kUpb_EncodedType_UInt32] = kUpb_FieldRep_4Byte, + [kUpb_EncodedType_OpenEnum] = kUpb_FieldRep_4Byte, + [kUpb_EncodedType_SFixed32] = kUpb_FieldRep_4Byte, + [kUpb_EncodedType_SFixed64] = kUpb_FieldRep_8Byte, + [kUpb_EncodedType_SInt32] = kUpb_FieldRep_4Byte, + [kUpb_EncodedType_SInt64] = kUpb_FieldRep_8Byte, + [kUpb_EncodedType_ClosedEnum] = kUpb_FieldRep_4Byte, + }; -bool _upb_DefPool_LookupSym(const upb_DefPool* s, const char* sym, size_t size, - upb_value* v) { - return upb_strtable_lookup2(&s->syms, sym, size, v); -} + char pointer_rep = d->platform == kUpb_MiniTablePlatform_32Bit + ? kUpb_FieldRep_4Byte + : kUpb_FieldRep_8Byte; -upb_ExtensionRegistry* _upb_DefPool_ExtReg(const upb_DefPool* s) { - return s->extreg; + int8_t type = _upb_FromBase92(ch); + if (ch >= _upb_ToBase92(kUpb_EncodedType_RepeatedBase)) { + type -= kUpb_EncodedType_RepeatedBase; + field->mode = kUpb_FieldMode_Array; + field->mode |= pointer_rep << kUpb_FieldRep_Shift; + field->offset = kNoPresence; + } else { + field->mode = kUpb_FieldMode_Scalar; + field->offset = kHasbitPresence; + if (type == kUpb_EncodedType_Group || type == kUpb_EncodedType_Message) { + field->mode |= pointer_rep << kUpb_FieldRep_Shift; + } else if ((unsigned long)type >= sizeof(kUpb_EncodedToFieldRep)) { + upb_MdDecoder_ErrorJmp(&d->base, "Invalid field type: %d", (int)type); + } else { + field->mode |= kUpb_EncodedToFieldRep[type] << kUpb_FieldRep_Shift; + } + } + if ((unsigned long)type >= sizeof(kUpb_EncodedToType)) { + upb_MdDecoder_ErrorJmp(&d->base, "Invalid field type: %d", (int)type); + } + upb_MiniTable_SetTypeAndSub(field, kUpb_EncodedToType[type], sub_counts, + msg_modifiers, type == kUpb_EncodedType_OpenEnum); } -void** _upb_DefPool_ScratchData(const upb_DefPool* s) { - return (void**)&s->scratch_data; -} +static void upb_MtDecoder_ModifyField(upb_MtDecoder* d, + uint32_t message_modifiers, + uint32_t field_modifiers, + upb_MiniTableField* field) { + if (field_modifiers & kUpb_EncodedFieldModifier_FlipPacked) { + if (!upb_MtDecoder_FieldIsPackable(field)) { + upb_MdDecoder_ErrorJmp(&d->base, + "Cannot flip packed on unpackable field %" PRIu32, + field->number); + } + field->mode ^= kUpb_LabelFlags_IsPacked; + } -size_t* _upb_DefPool_ScratchSize(const upb_DefPool* s) { - return (size_t*)&s->scratch_size; -} + bool singular = field_modifiers & kUpb_EncodedFieldModifier_IsProto3Singular; + bool required = field_modifiers & kUpb_EncodedFieldModifier_IsRequired; -void _upb_DefPool_SetPlatform(upb_DefPool* s, upb_MiniTablePlatform platform) { - assert(upb_strtable_count(&s->files) == 0); - s->platform = platform; -} + // Validate. + if ((singular || required) && field->offset != kHasbitPresence) { + upb_MdDecoder_ErrorJmp(&d->base, + "Invalid modifier(s) for repeated field %" PRIu32, + field->number); + } + if (singular && required) { + upb_MdDecoder_ErrorJmp( + &d->base, "Field %" PRIu32 " cannot be both singular and required", + field->number); + } -const upb_MessageDef* upb_DefPool_FindMessageByName(const upb_DefPool* s, - const char* sym) { - return _upb_DefPool_Unpack(s, sym, strlen(sym), UPB_DEFTYPE_MSG); + if (singular) field->offset = kNoPresence; + if (required) { + field->offset = kRequiredPresence; + } } -const upb_MessageDef* upb_DefPool_FindMessageByNameWithSize( - const upb_DefPool* s, const char* sym, size_t len) { - return _upb_DefPool_Unpack(s, sym, len, UPB_DEFTYPE_MSG); +static void upb_MtDecoder_PushItem(upb_MtDecoder* d, upb_LayoutItem item) { + if (d->vec.size == d->vec.capacity) { + size_t new_cap = UPB_MAX(8, d->vec.size * 2); + d->vec.data = realloc(d->vec.data, new_cap * sizeof(*d->vec.data)); + upb_MdDecoder_CheckOutOfMemory(&d->base, d->vec.data); + d->vec.capacity = new_cap; + } + d->vec.data[d->vec.size++] = item; } -const upb_EnumDef* upb_DefPool_FindEnumByName(const upb_DefPool* s, - const char* sym) { - return _upb_DefPool_Unpack(s, sym, strlen(sym), UPB_DEFTYPE_ENUM); -} +static void upb_MtDecoder_PushOneof(upb_MtDecoder* d, upb_LayoutItem item) { + if (item.field_index == kUpb_LayoutItem_IndexSentinel) { + upb_MdDecoder_ErrorJmp(&d->base, "Empty oneof"); + } + item.field_index -= kOneofBase; -const upb_EnumValueDef* upb_DefPool_FindEnumByNameval(const upb_DefPool* s, - const char* sym) { - return _upb_DefPool_Unpack(s, sym, strlen(sym), UPB_DEFTYPE_ENUMVAL); + // Push oneof data. + item.type = kUpb_LayoutItemType_OneofField; + upb_MtDecoder_PushItem(d, item); + + // Push oneof case. + item.rep = kUpb_FieldRep_4Byte; // Field Number. + item.type = kUpb_LayoutItemType_OneofCase; + upb_MtDecoder_PushItem(d, item); } -const upb_FileDef* upb_DefPool_FindFileByName(const upb_DefPool* s, - const char* name) { - upb_value v; - return upb_strtable_lookup(&s->files, name, &v) ? upb_value_getconstptr(v) - : NULL; +size_t upb_MtDecoder_SizeOfRep(upb_FieldRep rep, + upb_MiniTablePlatform platform) { + static const uint8_t kRepToSize32[] = { + [kUpb_FieldRep_1Byte] = 1, + [kUpb_FieldRep_4Byte] = 4, + [kUpb_FieldRep_StringView] = 8, + [kUpb_FieldRep_8Byte] = 8, + }; + static const uint8_t kRepToSize64[] = { + [kUpb_FieldRep_1Byte] = 1, + [kUpb_FieldRep_4Byte] = 4, + [kUpb_FieldRep_StringView] = 16, + [kUpb_FieldRep_8Byte] = 8, + }; + UPB_ASSERT(sizeof(upb_StringView) == + UPB_SIZE(kRepToSize32, kRepToSize64)[kUpb_FieldRep_StringView]); + return platform == kUpb_MiniTablePlatform_32Bit ? kRepToSize32[rep] + : kRepToSize64[rep]; } -const upb_FileDef* upb_DefPool_FindFileByNameWithSize(const upb_DefPool* s, - const char* name, - size_t len) { - upb_value v; - return upb_strtable_lookup2(&s->files, name, len, &v) - ? upb_value_getconstptr(v) - : NULL; +size_t upb_MtDecoder_AlignOfRep(upb_FieldRep rep, + upb_MiniTablePlatform platform) { + static const uint8_t kRepToAlign32[] = { + [kUpb_FieldRep_1Byte] = 1, + [kUpb_FieldRep_4Byte] = 4, + [kUpb_FieldRep_StringView] = 4, + [kUpb_FieldRep_8Byte] = 8, + }; + static const uint8_t kRepToAlign64[] = { + [kUpb_FieldRep_1Byte] = 1, + [kUpb_FieldRep_4Byte] = 4, + [kUpb_FieldRep_StringView] = 8, + [kUpb_FieldRep_8Byte] = 8, + }; + UPB_ASSERT(UPB_ALIGN_OF(upb_StringView) == + UPB_SIZE(kRepToAlign32, kRepToAlign64)[kUpb_FieldRep_StringView]); + return platform == kUpb_MiniTablePlatform_32Bit ? kRepToAlign32[rep] + : kRepToAlign64[rep]; } -const upb_FieldDef* upb_DefPool_FindExtensionByNameWithSize( - const upb_DefPool* s, const char* name, size_t size) { - upb_value v; - if (!upb_strtable_lookup2(&s->syms, name, size, &v)) return NULL; +static const char* upb_MtDecoder_DecodeOneofField(upb_MtDecoder* d, + const char* ptr, + char first_ch, + upb_LayoutItem* item) { + uint32_t field_num; + ptr = upb_MdDecoder_DecodeBase92Varint( + &d->base, ptr, first_ch, kUpb_EncodedValue_MinOneofField, + kUpb_EncodedValue_MaxOneofField, &field_num); + upb_MiniTableField* f = + (void*)upb_MiniTable_FindFieldByNumber(d->table, field_num); - switch (_upb_DefType_Type(v)) { - case UPB_DEFTYPE_FIELD: - return _upb_DefType_Unpack(v, UPB_DEFTYPE_FIELD); - case UPB_DEFTYPE_MSG: { - const upb_MessageDef* m = _upb_DefType_Unpack(v, UPB_DEFTYPE_MSG); - return _upb_MessageDef_InMessageSet(m) - ? upb_MessageDef_NestedExtension(m, 0) - : NULL; - } - default: - break; + if (!f) { + upb_MdDecoder_ErrorJmp(&d->base, + "Couldn't add field number %" PRIu32 + " to oneof, no such field number.", + field_num); + } + if (f->offset != kHasbitPresence) { + upb_MdDecoder_ErrorJmp( + &d->base, + "Cannot add repeated, required, or singular field %" PRIu32 + " to oneof.", + field_num); } - return NULL; + // Oneof storage must be large enough to accommodate the largest member. + int rep = f->mode >> kUpb_FieldRep_Shift; + if (upb_MtDecoder_SizeOfRep(rep, d->platform) > + upb_MtDecoder_SizeOfRep(item->rep, d->platform)) { + item->rep = rep; + } + // Prepend this field to the linked list. + f->offset = item->field_index; + item->field_index = (f - d->fields) + kOneofBase; + return ptr; } -const upb_FieldDef* upb_DefPool_FindExtensionByName(const upb_DefPool* s, - const char* sym) { - return upb_DefPool_FindExtensionByNameWithSize(s, sym, strlen(sym)); -} +static const char* upb_MtDecoder_DecodeOneofs(upb_MtDecoder* d, + const char* ptr) { + upb_LayoutItem item = {.rep = 0, + .field_index = kUpb_LayoutItem_IndexSentinel}; + while (ptr < d->base.end) { + char ch = *ptr++; + if (ch == kUpb_EncodedValue_FieldSeparator) { + // Field separator, no action needed. + } else if (ch == kUpb_EncodedValue_OneofSeparator) { + // End of oneof. + upb_MtDecoder_PushOneof(d, item); + item.field_index = kUpb_LayoutItem_IndexSentinel; // Move to next oneof. + } else { + ptr = upb_MtDecoder_DecodeOneofField(d, ptr, ch, &item); + } + } -const upb_ServiceDef* upb_DefPool_FindServiceByName(const upb_DefPool* s, - const char* name) { - return _upb_DefPool_Unpack(s, name, strlen(name), UPB_DEFTYPE_SERVICE); + // Push final oneof. + upb_MtDecoder_PushOneof(d, item); + return ptr; } -const upb_ServiceDef* upb_DefPool_FindServiceByNameWithSize( - const upb_DefPool* s, const char* name, size_t size) { - return _upb_DefPool_Unpack(s, name, size, UPB_DEFTYPE_SERVICE); +static const char* upb_MtDecoder_ParseModifier(upb_MtDecoder* d, + const char* ptr, char first_ch, + upb_MiniTableField* last_field, + uint64_t* msg_modifiers) { + uint32_t mod; + ptr = upb_MdDecoder_DecodeBase92Varint(&d->base, ptr, first_ch, + kUpb_EncodedValue_MinModifier, + kUpb_EncodedValue_MaxModifier, &mod); + if (last_field) { + upb_MtDecoder_ModifyField(d, *msg_modifiers, mod, last_field); + } else { + if (!d->table) { + upb_MdDecoder_ErrorJmp(&d->base, + "Extensions cannot have message modifiers"); + } + *msg_modifiers = mod; + } + + return ptr; } -const upb_FileDef* upb_DefPool_FindFileContainingSymbol(const upb_DefPool* s, - const char* name) { - upb_value v; - // TODO(haberman): non-extension fields and oneofs. - if (upb_strtable_lookup(&s->syms, name, &v)) { - switch (_upb_DefType_Type(v)) { - case UPB_DEFTYPE_EXT: { - const upb_FieldDef* f = _upb_DefType_Unpack(v, UPB_DEFTYPE_EXT); - return upb_FieldDef_File(f); +static void upb_MtDecoder_AllocateSubs(upb_MtDecoder* d, + upb_SubCounts sub_counts) { + uint32_t total_count = sub_counts.submsg_count + sub_counts.subenum_count; + size_t subs_bytes = sizeof(*d->table->subs) * total_count; + upb_MiniTableSub* subs = upb_Arena_Malloc(d->arena, subs_bytes); + upb_MdDecoder_CheckOutOfMemory(&d->base, subs); + uint32_t i = 0; + for (; i < sub_counts.submsg_count; i++) { + subs[i].submsg = &_kUpb_MiniTable_Empty; + } + if (sub_counts.subenum_count) { + upb_MiniTableField* f = d->fields; + upb_MiniTableField* end_f = f + d->table->field_count; + for (; f < end_f; f++) { + if (f->UPB_PRIVATE(descriptortype) == kUpb_FieldType_Enum) { + f->UPB_PRIVATE(submsg_index) += sub_counts.submsg_count; } - case UPB_DEFTYPE_MSG: { - const upb_MessageDef* m = _upb_DefType_Unpack(v, UPB_DEFTYPE_MSG); - return upb_MessageDef_File(m); + } + for (; i < sub_counts.submsg_count + sub_counts.subenum_count; i++) { + subs[i].subenum = NULL; + } + } + d->table->subs = subs; +} + +static const char* upb_MtDecoder_Parse(upb_MtDecoder* d, const char* ptr, + size_t len, void* fields, + size_t field_size, uint16_t* field_count, + upb_SubCounts* sub_counts) { + uint64_t msg_modifiers = 0; + uint32_t last_field_number = 0; + upb_MiniTableField* last_field = NULL; + bool need_dense_below = d->table != NULL; + + d->base.end = UPB_PTRADD(ptr, len); + + while (ptr < d->base.end) { + char ch = *ptr++; + if (ch <= kUpb_EncodedValue_MaxField) { + if (!d->table && last_field) { + // For extensions, consume only a single field and then return. + return --ptr; } - case UPB_DEFTYPE_ENUM: { - const upb_EnumDef* e = _upb_DefType_Unpack(v, UPB_DEFTYPE_ENUM); - return upb_EnumDef_File(e); + upb_MiniTableField* field = fields; + *field_count += 1; + fields = (char*)fields + field_size; + field->number = ++last_field_number; + last_field = field; + upb_MiniTable_SetField(d, ch, field, msg_modifiers, sub_counts); + } else if (kUpb_EncodedValue_MinModifier <= ch && + ch <= kUpb_EncodedValue_MaxModifier) { + ptr = upb_MtDecoder_ParseModifier(d, ptr, ch, last_field, &msg_modifiers); + if (msg_modifiers & kUpb_MessageModifier_IsExtendable) { + d->table->ext |= kUpb_ExtMode_Extendable; } - case UPB_DEFTYPE_ENUMVAL: { - const upb_EnumValueDef* ev = - _upb_DefType_Unpack(v, UPB_DEFTYPE_ENUMVAL); - return upb_EnumDef_File(upb_EnumValueDef_Enum(ev)); + } else if (ch == kUpb_EncodedValue_End) { + if (!d->table) { + upb_MdDecoder_ErrorJmp(&d->base, "Extensions cannot have oneofs."); } - case UPB_DEFTYPE_SERVICE: { - const upb_ServiceDef* service = - _upb_DefType_Unpack(v, UPB_DEFTYPE_SERVICE); - return upb_ServiceDef_File(service); + ptr = upb_MtDecoder_DecodeOneofs(d, ptr); + } else if (kUpb_EncodedValue_MinSkip <= ch && + ch <= kUpb_EncodedValue_MaxSkip) { + if (need_dense_below) { + d->table->dense_below = d->table->field_count; + need_dense_below = false; } - default: - UPB_UNREACHABLE(); + uint32_t skip; + ptr = upb_MdDecoder_DecodeBase92Varint(&d->base, ptr, ch, + kUpb_EncodedValue_MinSkip, + kUpb_EncodedValue_MaxSkip, &skip); + last_field_number += skip; + last_field_number--; // Next field seen will increment. + } else { + upb_MdDecoder_ErrorJmp(&d->base, "Invalid char: %c", ch); } } - const char* last_dot = strrchr(name, '.'); - if (last_dot) { - const upb_MessageDef* parent = - upb_DefPool_FindMessageByNameWithSize(s, name, last_dot - name); - if (parent) { - const char* shortname = last_dot + 1; - if (upb_MessageDef_FindByNameWithSize(parent, shortname, - strlen(shortname), NULL, NULL)) { - return upb_MessageDef_File(parent); - } - } + if (need_dense_below) { + d->table->dense_below = d->table->field_count; } - return NULL; + return ptr; } -static void remove_filedef(upb_DefPool* s, upb_FileDef* file) { - intptr_t iter = UPB_INTTABLE_BEGIN; - upb_StringView key; - upb_value val; - while (upb_strtable_next2(&s->syms, &key, &val, &iter)) { - const upb_FileDef* f; - switch (_upb_DefType_Type(val)) { - case UPB_DEFTYPE_EXT: - f = upb_FieldDef_File(_upb_DefType_Unpack(val, UPB_DEFTYPE_EXT)); - break; - case UPB_DEFTYPE_MSG: - f = upb_MessageDef_File(_upb_DefType_Unpack(val, UPB_DEFTYPE_MSG)); - break; - case UPB_DEFTYPE_ENUM: - f = upb_EnumDef_File(_upb_DefType_Unpack(val, UPB_DEFTYPE_ENUM)); - break; - case UPB_DEFTYPE_ENUMVAL: - f = upb_EnumDef_File(upb_EnumValueDef_Enum( - _upb_DefType_Unpack(val, UPB_DEFTYPE_ENUMVAL))); - break; - case UPB_DEFTYPE_SERVICE: - f = upb_ServiceDef_File(_upb_DefType_Unpack(val, UPB_DEFTYPE_SERVICE)); - break; - default: - UPB_UNREACHABLE(); - } +static void upb_MtDecoder_ParseMessage(upb_MtDecoder* d, const char* data, + size_t len) { + // Buffer length is an upper bound on the number of fields. We will return + // what we don't use. + d->fields = upb_Arena_Malloc(d->arena, sizeof(*d->fields) * len); + upb_MdDecoder_CheckOutOfMemory(&d->base, d->fields); - if (f == file) upb_strtable_removeiter(&s->syms, &iter); - } + upb_SubCounts sub_counts = {0, 0}; + d->table->field_count = 0; + d->table->fields = d->fields; + upb_MtDecoder_Parse(d, data, len, d->fields, sizeof(*d->fields), + &d->table->field_count, &sub_counts); + + upb_Arena_ShrinkLast(d->arena, d->fields, sizeof(*d->fields) * len, + sizeof(*d->fields) * d->table->field_count); + d->table->fields = d->fields; + upb_MtDecoder_AllocateSubs(d, sub_counts); } -static const upb_FileDef* upb_DefBuilder_AddFileToPool( - upb_DefBuilder* const builder, upb_DefPool* const s, - const UPB_DESC(FileDescriptorProto) * const file_proto, - const upb_StringView name, upb_Status* const status) { - if (UPB_SETJMP(builder->err) != 0) { - UPB_ASSERT(!upb_Status_IsOk(status)); - if (builder->file) { - remove_filedef(s, builder->file); - builder->file = NULL; - } - } else if (!builder->arena || !builder->tmp_arena) { - _upb_DefBuilder_OomErr(builder); - } else { - _upb_FileDef_Create(builder, file_proto); - upb_strtable_insert(&s->files, name.data, name.size, - upb_value_constptr(builder->file), builder->arena); - UPB_ASSERT(upb_Status_IsOk(status)); - upb_Arena_Fuse(s->arena, builder->arena); +int upb_MtDecoder_CompareFields(const void* _a, const void* _b) { + const upb_LayoutItem* a = _a; + const upb_LayoutItem* b = _b; + // Currently we just sort by: + // 1. rep (smallest fields first) + // 2. type (oneof cases first) + // 2. field_index (smallest numbers first) + // The main goal of this is to reduce space lost to padding. + // Later we may have more subtle reasons to prefer a different ordering. + const int rep_bits = upb_Log2Ceiling(kUpb_FieldRep_Max); + const int type_bits = upb_Log2Ceiling(kUpb_LayoutItemType_Max); + const int idx_bits = (sizeof(a->field_index) * 8); + UPB_ASSERT(idx_bits + rep_bits + type_bits < 32); +#define UPB_COMBINE(rep, ty, idx) (((rep << type_bits) | ty) << idx_bits) | idx + uint32_t a_packed = UPB_COMBINE(a->rep, a->type, a->field_index); + uint32_t b_packed = UPB_COMBINE(b->rep, b->type, b->field_index); + assert(a_packed != b_packed); +#undef UPB_COMBINE + return a_packed < b_packed ? -1 : 1; +} + +static bool upb_MtDecoder_SortLayoutItems(upb_MtDecoder* d) { + // Add items for all non-oneof fields (oneofs were already added). + int n = d->table->field_count; + for (int i = 0; i < n; i++) { + upb_MiniTableField* f = &d->fields[i]; + if (f->offset >= kOneofBase) continue; + upb_LayoutItem item = {.field_index = i, + .rep = f->mode >> kUpb_FieldRep_Shift, + .type = kUpb_LayoutItemType_Field}; + upb_MtDecoder_PushItem(d, item); } - if (builder->arena) upb_Arena_Free(builder->arena); - if (builder->tmp_arena) upb_Arena_Free(builder->tmp_arena); - return builder->file; + if (d->vec.size) { + qsort(d->vec.data, d->vec.size, sizeof(*d->vec.data), + upb_MtDecoder_CompareFields); + } + + return true; } -static const upb_FileDef* _upb_DefPool_AddFile( - upb_DefPool* s, const UPB_DESC(FileDescriptorProto) * file_proto, - const upb_MiniTableFile* layout, upb_Status* status) { - const upb_StringView name = UPB_DESC(FileDescriptorProto_name)(file_proto); +static size_t upb_MiniTable_DivideRoundUp(size_t n, size_t d) { + return (n + d - 1) / d; +} - // Determine whether we already know about this file. - { - upb_value v; - if (upb_strtable_lookup2(&s->files, name.data, name.size, &v)) { - upb_Status_SetErrorFormat(status, - "duplicate file name " UPB_STRINGVIEW_FORMAT, - UPB_STRINGVIEW_ARGS(name)); - return NULL; +static void upb_MtDecoder_AssignHasbits(upb_MtDecoder* d) { + upb_MiniTable* ret = d->table; + int n = ret->field_count; + int last_hasbit = 0; // 0 cannot be used. + + // First assign required fields, which must have the lowest hasbits. + for (int i = 0; i < n; i++) { + upb_MiniTableField* field = (upb_MiniTableField*)&ret->fields[i]; + if (field->offset == kRequiredPresence) { + field->presence = ++last_hasbit; + } else if (field->offset == kNoPresence) { + field->presence = 0; } } + ret->required_count = last_hasbit; - upb_DefBuilder ctx = { - .symtab = s, - .layout = layout, - .platform = s->platform, - .msg_count = 0, - .enum_count = 0, - .ext_count = 0, - .status = status, - .file = NULL, - .arena = upb_Arena_New(), - .tmp_arena = upb_Arena_New(), - }; + if (ret->required_count > 63) { + upb_MdDecoder_ErrorJmp(&d->base, "Too many required fields"); + } - return upb_DefBuilder_AddFileToPool(&ctx, s, file_proto, name, status); -} + // Next assign non-required hasbit fields. + for (int i = 0; i < n; i++) { + upb_MiniTableField* field = (upb_MiniTableField*)&ret->fields[i]; + if (field->offset == kHasbitPresence) { + field->presence = ++last_hasbit; + } + } -const upb_FileDef* upb_DefPool_AddFile(upb_DefPool* s, - const UPB_DESC(FileDescriptorProto) * - file_proto, - upb_Status* status) { - return _upb_DefPool_AddFile(s, file_proto, NULL, status); + ret->size = last_hasbit ? upb_MiniTable_DivideRoundUp(last_hasbit + 1, 8) : 0; } -bool _upb_DefPool_LoadDefInitEx(upb_DefPool* s, const _upb_DefPool_Init* init, - bool rebuild_minitable) { - /* Since this function should never fail (it would indicate a bug in upb) we - * print errors to stderr instead of returning error status to the user. */ - _upb_DefPool_Init** deps = init->deps; - UPB_DESC(FileDescriptorProto) * file; - upb_Arena* arena; - upb_Status status; - - upb_Status_Clear(&status); - - if (upb_DefPool_FindFileByName(s, init->filename)) { - return true; +size_t upb_MtDecoder_Place(upb_MtDecoder* d, upb_FieldRep rep) { + size_t size = upb_MtDecoder_SizeOfRep(rep, d->platform); + size_t align = upb_MtDecoder_AlignOfRep(rep, d->platform); + size_t ret = UPB_ALIGN_UP(d->table->size, align); + static const size_t max = UINT16_MAX; + size_t new_size = ret + size; + if (new_size > max) { + upb_MdDecoder_ErrorJmp( + &d->base, "Message size exceeded maximum size of %zu bytes", max); } + d->table->size = new_size; + return ret; +} - arena = upb_Arena_New(); +static void upb_MtDecoder_AssignOffsets(upb_MtDecoder* d) { + upb_LayoutItem* end = UPB_PTRADD(d->vec.data, d->vec.size); - for (; *deps; deps++) { - if (!_upb_DefPool_LoadDefInitEx(s, *deps, rebuild_minitable)) goto err; + // Compute offsets. + for (upb_LayoutItem* item = d->vec.data; item < end; item++) { + item->offset = upb_MtDecoder_Place(d, item->rep); } - file = UPB_DESC(FileDescriptorProto_parse_ex)( - init->descriptor.data, init->descriptor.size, NULL, - kUpb_DecodeOption_AliasString, arena); - s->bytes_loaded += init->descriptor.size; - - if (!file) { - upb_Status_SetErrorFormat( - &status, - "Failed to parse compiled-in descriptor for file '%s'. This should " - "never happen.", - init->filename); - goto err; + // Assign oneof case offsets. We must do these first, since assigning + // actual offsets will overwrite the links of the linked list. + for (upb_LayoutItem* item = d->vec.data; item < end; item++) { + if (item->type != kUpb_LayoutItemType_OneofCase) continue; + upb_MiniTableField* f = &d->fields[item->field_index]; + while (true) { + f->presence = ~item->offset; + if (f->offset == kUpb_LayoutItem_IndexSentinel) break; + UPB_ASSERT(f->offset - kOneofBase < d->table->field_count); + f = &d->fields[f->offset - kOneofBase]; + } } - const upb_MiniTableFile* mt = rebuild_minitable ? NULL : init->layout; - if (!_upb_DefPool_AddFile(s, file, mt, &status)) { - goto err; + // Assign offsets. + for (upb_LayoutItem* item = d->vec.data; item < end; item++) { + upb_MiniTableField* f = &d->fields[item->field_index]; + switch (item->type) { + case kUpb_LayoutItemType_OneofField: + while (true) { + uint16_t next_offset = f->offset; + f->offset = item->offset; + if (next_offset == kUpb_LayoutItem_IndexSentinel) break; + f = &d->fields[next_offset - kOneofBase]; + } + break; + case kUpb_LayoutItemType_Field: + f->offset = item->offset; + break; + default: + break; + } } - upb_Arena_Free(arena); - return true; - -err: - fprintf(stderr, - "Error loading compiled-in descriptor for file '%s' (this should " - "never happen): %s\n", - init->filename, upb_Status_ErrorMessage(&status)); - upb_Arena_Free(arena); - return false; + // The fasttable parser (supported on 64-bit only) depends on this being a + // multiple of 8 in order to satisfy UPB_MALLOC_ALIGN, which is also 8. + // + // On 32-bit we could potentially make this smaller, but there is no + // compelling reason to optimize this right now. + d->table->size = UPB_ALIGN_UP(d->table->size, 8); } -size_t _upb_DefPool_BytesLoaded(const upb_DefPool* s) { - return s->bytes_loaded; -} +static void upb_MtDecoder_ValidateEntryField(upb_MtDecoder* d, + const upb_MiniTableField* f, + uint32_t expected_num) { + const char* name = expected_num == 1 ? "key" : "val"; + if (f->number != expected_num) { + upb_MdDecoder_ErrorJmp(&d->base, + "map %s did not have expected number (%d vs %d)", + name, expected_num, (int)f->number); + } -upb_Arena* _upb_DefPool_Arena(const upb_DefPool* s) { return s->arena; } + if (upb_IsRepeatedOrMap(f)) { + upb_MdDecoder_ErrorJmp( + &d->base, "map %s cannot be repeated or map, or be in oneof", name); + } -const upb_FieldDef* upb_DefPool_FindExtensionByMiniTable( - const upb_DefPool* s, const upb_MiniTableExtension* ext) { - upb_value v; - bool ok = upb_inttable_lookup(&s->exts, (uintptr_t)ext, &v); - UPB_ASSERT(ok); - return upb_value_getconstptr(v); -} + uint32_t not_ok_types; + if (expected_num == 1) { + not_ok_types = (1 << kUpb_FieldType_Float) | (1 << kUpb_FieldType_Double) | + (1 << kUpb_FieldType_Message) | (1 << kUpb_FieldType_Group) | + (1 << kUpb_FieldType_Bytes) | (1 << kUpb_FieldType_Enum); + } else { + not_ok_types = 1 << kUpb_FieldType_Group; + } -const upb_FieldDef* upb_DefPool_FindExtensionByNumber(const upb_DefPool* s, - const upb_MessageDef* m, - int32_t fieldnum) { - const upb_MiniTable* t = upb_MessageDef_MiniTable(m); - const upb_MiniTableExtension* ext = - upb_ExtensionRegistry_Lookup(s->extreg, t, fieldnum); - return ext ? upb_DefPool_FindExtensionByMiniTable(s, ext) : NULL; + if ((1 << upb_MiniTableField_Type(f)) & not_ok_types) { + upb_MdDecoder_ErrorJmp(&d->base, "map %s cannot have type %d", name, + (int)f->UPB_PRIVATE(descriptortype)); + } } -const upb_ExtensionRegistry* upb_DefPool_ExtensionRegistry( - const upb_DefPool* s) { - return s->extreg; -} +static void upb_MtDecoder_ParseMap(upb_MtDecoder* d, const char* data, + size_t len) { + upb_MtDecoder_ParseMessage(d, data, len); + upb_MtDecoder_AssignHasbits(d); -const upb_FieldDef** upb_DefPool_GetAllExtensions(const upb_DefPool* s, - const upb_MessageDef* m, - size_t* count) { - size_t n = 0; - intptr_t iter = UPB_INTTABLE_BEGIN; - uintptr_t key; - upb_value val; - // This is O(all exts) instead of O(exts for m). If we need this to be - // efficient we may need to make extreg into a two-level table, or have a - // second per-message index. - while (upb_inttable_next(&s->exts, &key, &val, &iter)) { - const upb_FieldDef* f = upb_value_getconstptr(val); - if (upb_FieldDef_ContainingType(f) == m) n++; - } - const upb_FieldDef** exts = malloc(n * sizeof(*exts)); - iter = UPB_INTTABLE_BEGIN; - size_t i = 0; - while (upb_inttable_next(&s->exts, &key, &val, &iter)) { - const upb_FieldDef* f = upb_value_getconstptr(val); - if (upb_FieldDef_ContainingType(f) == m) exts[i++] = f; + if (UPB_UNLIKELY(d->table->field_count != 2)) { + upb_MdDecoder_ErrorJmp(&d->base, "%hu fields in map", + d->table->field_count); + UPB_UNREACHABLE(); } - *count = n; - return exts; -} -bool _upb_DefPool_LoadDefInit(upb_DefPool* s, const _upb_DefPool_Init* init) { - return _upb_DefPool_LoadDefInitEx(s, init, false); -} + upb_LayoutItem* end = UPB_PTRADD(d->vec.data, d->vec.size); + for (upb_LayoutItem* item = d->vec.data; item < end; item++) { + if (item->type == kUpb_LayoutItemType_OneofCase) { + upb_MdDecoder_ErrorJmp(&d->base, "Map entry cannot have oneof"); + } + } + upb_MtDecoder_ValidateEntryField(d, &d->table->fields[0], 1); + upb_MtDecoder_ValidateEntryField(d, &d->table->fields[1], 2); -// Must be last. + // Map entries have a pre-determined layout, regardless of types. + // NOTE: sync with mini_table/message_internal.h. + const size_t kv_size = d->platform == kUpb_MiniTablePlatform_32Bit ? 8 : 16; + const size_t hasbit_size = 8; + d->fields[0].offset = hasbit_size; + d->fields[1].offset = hasbit_size + kv_size; + d->table->size = UPB_ALIGN_UP(hasbit_size + kv_size + kv_size, 8); -upb_deftype_t _upb_DefType_Type(upb_value v) { - const uintptr_t num = (uintptr_t)upb_value_getconstptr(v); - return num & UPB_DEFTYPE_MASK; + // Map entries have a special bit set to signal it's a map entry, used in + // upb_MiniTable_SetSubMessage() below. + d->table->ext |= kUpb_ExtMode_IsMapEntry; } -upb_value _upb_DefType_Pack(const void* ptr, upb_deftype_t type) { - uintptr_t num = (uintptr_t)ptr; - UPB_ASSERT((num & UPB_DEFTYPE_MASK) == 0); - num |= type; - return upb_value_constptr((const void*)num); -} +static void upb_MtDecoder_ParseMessageSet(upb_MtDecoder* d, const char* data, + size_t len) { + if (len > 0) { + upb_MdDecoder_ErrorJmp(&d->base, "Invalid message set encode length: %zu", + len); + } -const void* _upb_DefType_Unpack(upb_value v, upb_deftype_t type) { - uintptr_t num = (uintptr_t)upb_value_getconstptr(v); - return (num & UPB_DEFTYPE_MASK) == type - ? (const void*)(num & ~UPB_DEFTYPE_MASK) - : NULL; + upb_MiniTable* ret = d->table; + ret->size = 0; + ret->field_count = 0; + ret->ext = kUpb_ExtMode_IsMessageSet; + ret->dense_below = 0; + ret->table_mask = -1; + ret->required_count = 0; } +static upb_MiniTable* upb_MtDecoder_DoBuildMiniTableWithBuf( + upb_MtDecoder* decoder, const char* data, size_t len, void** buf, + size_t* buf_size) { + upb_MdDecoder_CheckOutOfMemory(&decoder->base, decoder->table); -// Must be last. + decoder->table->size = 0; + decoder->table->field_count = 0; + decoder->table->ext = kUpb_ExtMode_NonExtendable; + decoder->table->dense_below = 0; + decoder->table->table_mask = -1; + decoder->table->required_count = 0; -bool _upb_DescState_Grow(upb_DescState* d, upb_Arena* a) { - const size_t oldbufsize = d->bufsize; - const int used = d->ptr - d->buf; + // Strip off and verify the version tag. + if (!len--) goto done; + const char vers = *data++; - if (!d->buf) { - d->buf = upb_Arena_Malloc(a, d->bufsize); - if (!d->buf) return false; - d->ptr = d->buf; - d->e.end = d->buf + d->bufsize; - } + switch (vers) { + case kUpb_EncodedVersion_MapV1: + upb_MtDecoder_ParseMap(decoder, data, len); + break; - if (oldbufsize - used < kUpb_MtDataEncoder_MinSize) { - d->bufsize *= 2; - d->buf = upb_Arena_Realloc(a, d->buf, oldbufsize, d->bufsize); - if (!d->buf) return false; - d->ptr = d->buf + used; - d->e.end = d->buf + d->bufsize; - } + case kUpb_EncodedVersion_MessageV1: + upb_MtDecoder_ParseMessage(decoder, data, len); + upb_MtDecoder_AssignHasbits(decoder); + upb_MtDecoder_SortLayoutItems(decoder); + upb_MtDecoder_AssignOffsets(decoder); + break; - return true; -} + case kUpb_EncodedVersion_MessageSetV1: + upb_MtDecoder_ParseMessageSet(decoder, data, len); + break; + default: + upb_MdDecoder_ErrorJmp(&decoder->base, "Invalid message version: %c", + vers); + } -// Must be last. +done: + *buf = decoder->vec.data; + *buf_size = decoder->vec.capacity * sizeof(*decoder->vec.data); + return decoder->table; +} -struct upb_EnumDef { - const UPB_DESC(EnumOptions) * opts; - const upb_MiniTableEnum* layout; // Only for proto2. - const upb_FileDef* file; - const upb_MessageDef* containing_type; // Could be merged with "file". - const char* full_name; - upb_strtable ntoi; - upb_inttable iton; - const upb_EnumValueDef* values; - const upb_EnumReservedRange* res_ranges; - const upb_StringView* res_names; - int value_count; - int res_range_count; - int res_name_count; - int32_t defaultval; - bool is_closed; - bool is_sorted; // Whether all of the values are defined in ascending order. -}; +static upb_MiniTable* upb_MtDecoder_BuildMiniTableWithBuf( + upb_MtDecoder* const decoder, const char* const data, const size_t len, + void** const buf, size_t* const buf_size) { + if (UPB_SETJMP(decoder->base.err) != 0) { + *buf = decoder->vec.data; + *buf_size = decoder->vec.capacity * sizeof(*decoder->vec.data); + return NULL; + } -upb_EnumDef* _upb_EnumDef_At(const upb_EnumDef* e, int i) { - return (upb_EnumDef*)&e[i]; + return upb_MtDecoder_DoBuildMiniTableWithBuf(decoder, data, len, buf, + buf_size); } -const upb_MiniTableEnum* _upb_EnumDef_MiniTable(const upb_EnumDef* e) { - return e->layout; -} +upb_MiniTable* upb_MiniTable_BuildWithBuf(const char* data, size_t len, + upb_MiniTablePlatform platform, + upb_Arena* arena, void** buf, + size_t* buf_size, + upb_Status* status) { + upb_MtDecoder decoder = { + .base = {.status = status}, + .platform = platform, + .vec = + { + .data = *buf, + .capacity = *buf_size / sizeof(*decoder.vec.data), + .size = 0, + }, + .arena = arena, + .table = upb_Arena_Malloc(arena, sizeof(*decoder.table)), + }; -bool _upb_EnumDef_Insert(upb_EnumDef* e, upb_EnumValueDef* v, upb_Arena* a) { - const char* name = upb_EnumValueDef_Name(v); - const upb_value val = upb_value_constptr(v); - bool ok = upb_strtable_insert(&e->ntoi, name, strlen(name), val, a); - if (!ok) return false; + return upb_MtDecoder_BuildMiniTableWithBuf(&decoder, data, len, buf, + buf_size); +} - // Multiple enumerators can have the same number, first one wins. - const int number = upb_EnumValueDef_Number(v); - if (!upb_inttable_lookup(&e->iton, number, NULL)) { - return upb_inttable_insert(&e->iton, number, val, a); +static const char* upb_MtDecoder_DoBuildMiniTableExtension( + upb_MtDecoder* decoder, const char* data, size_t len, + upb_MiniTableExtension* ext, const upb_MiniTable* extendee, + upb_MiniTableSub sub) { + // If the string is non-empty then it must begin with a version tag. + if (len) { + if (*data != kUpb_EncodedVersion_ExtensionV1) { + upb_MdDecoder_ErrorJmp(&decoder->base, "Invalid ext version: %c", *data); + } + data++; + len--; } - return true; -} -const UPB_DESC(EnumOptions) * upb_EnumDef_Options(const upb_EnumDef* e) { - return e->opts; -} + uint16_t count = 0; + upb_SubCounts sub_counts = {0, 0}; + const char* ret = upb_MtDecoder_Parse(decoder, data, len, ext, sizeof(*ext), + &count, &sub_counts); + if (!ret || count != 1) return NULL; -bool upb_EnumDef_HasOptions(const upb_EnumDef* e) { - return e->opts != (void*)kUpbDefOptDefault; -} + upb_MiniTableField* f = &ext->field; -const char* upb_EnumDef_FullName(const upb_EnumDef* e) { return e->full_name; } + f->mode |= kUpb_LabelFlags_IsExtension; + f->offset = 0; + f->presence = 0; -const char* upb_EnumDef_Name(const upb_EnumDef* e) { - return _upb_DefBuilder_FullToShort(e->full_name); -} + if (extendee->ext & kUpb_ExtMode_IsMessageSet) { + // Extensions of MessageSet must be messages. + if (!upb_IsSubMessage(f)) return NULL; -const upb_FileDef* upb_EnumDef_File(const upb_EnumDef* e) { return e->file; } + // Extensions of MessageSet must be non-repeating. + if ((f->mode & kUpb_FieldMode_Mask) == kUpb_FieldMode_Array) return NULL; + } -const upb_MessageDef* upb_EnumDef_ContainingType(const upb_EnumDef* e) { - return e->containing_type; -} + ext->extendee = extendee; + ext->sub = sub; -int32_t upb_EnumDef_Default(const upb_EnumDef* e) { - UPB_ASSERT(upb_EnumDef_FindValueByNumber(e, e->defaultval)); - return e->defaultval; + return ret; } -int upb_EnumDef_ReservedRangeCount(const upb_EnumDef* e) { - return e->res_range_count; +static const char* upb_MtDecoder_BuildMiniTableExtension( + upb_MtDecoder* const decoder, const char* const data, const size_t len, + upb_MiniTableExtension* const ext, const upb_MiniTable* const extendee, + const upb_MiniTableSub sub) { + if (UPB_SETJMP(decoder->base.err) != 0) return NULL; + return upb_MtDecoder_DoBuildMiniTableExtension(decoder, data, len, ext, + extendee, sub); } -const upb_EnumReservedRange* upb_EnumDef_ReservedRange(const upb_EnumDef* e, - int i) { - UPB_ASSERT(0 <= i && i < e->res_range_count); - return _upb_EnumReservedRange_At(e->res_ranges, i); -} +const char* _upb_MiniTableExtension_Init(const char* data, size_t len, + upb_MiniTableExtension* ext, + const upb_MiniTable* extendee, + upb_MiniTableSub sub, + upb_MiniTablePlatform platform, + upb_Status* status) { + upb_MtDecoder decoder = { + .base = {.status = status}, + .arena = NULL, + .table = NULL, + .platform = platform, + }; -int upb_EnumDef_ReservedNameCount(const upb_EnumDef* e) { - return e->res_name_count; + return upb_MtDecoder_BuildMiniTableExtension(&decoder, data, len, ext, + extendee, sub); } -upb_StringView upb_EnumDef_ReservedName(const upb_EnumDef* e, int i) { - UPB_ASSERT(0 <= i && i < e->res_name_count); - return e->res_names[i]; -} +upb_MiniTableExtension* _upb_MiniTableExtension_Build( + const char* data, size_t len, const upb_MiniTable* extendee, + upb_MiniTableSub sub, upb_MiniTablePlatform platform, upb_Arena* arena, + upb_Status* status) { + upb_MiniTableExtension* ext = + upb_Arena_Malloc(arena, sizeof(upb_MiniTableExtension)); + if (UPB_UNLIKELY(!ext)) return NULL; -int upb_EnumDef_ValueCount(const upb_EnumDef* e) { return e->value_count; } + const char* ptr = _upb_MiniTableExtension_Init(data, len, ext, extendee, sub, + platform, status); + if (UPB_UNLIKELY(!ptr)) return NULL; -const upb_EnumValueDef* upb_EnumDef_FindValueByName(const upb_EnumDef* e, - const char* name) { - return upb_EnumDef_FindValueByNameWithSize(e, name, strlen(name)); + return ext; } -const upb_EnumValueDef* upb_EnumDef_FindValueByNameWithSize( - const upb_EnumDef* e, const char* name, size_t size) { - upb_value v; - return upb_strtable_lookup2(&e->ntoi, name, size, &v) - ? upb_value_getconstptr(v) - : NULL; +upb_MiniTable* _upb_MiniTable_Build(const char* data, size_t len, + upb_MiniTablePlatform platform, + upb_Arena* arena, upb_Status* status) { + void* buf = NULL; + size_t size = 0; + upb_MiniTable* ret = upb_MiniTable_BuildWithBuf(data, len, platform, arena, + &buf, &size, status); + free(buf); + return ret; } -const upb_EnumValueDef* upb_EnumDef_FindValueByNumber(const upb_EnumDef* e, - int32_t num) { - upb_value v; - return upb_inttable_lookup(&e->iton, num, &v) ? upb_value_getconstptr(v) - : NULL; -} - -bool upb_EnumDef_CheckNumber(const upb_EnumDef* e, int32_t num) { - // We could use upb_EnumDef_FindValueByNumber(e, num) != NULL, but we expect - // this to be faster (especially for small numbers). - return upb_MiniTableEnum_CheckValue(e->layout, num); -} - -const upb_EnumValueDef* upb_EnumDef_Value(const upb_EnumDef* e, int i) { - UPB_ASSERT(0 <= i && i < e->value_count); - return _upb_EnumValueDef_At(e->values, i); -} -bool upb_EnumDef_IsClosed(const upb_EnumDef* e) { return e->is_closed; } +// Must be last. -bool upb_EnumDef_MiniDescriptorEncode(const upb_EnumDef* e, upb_Arena* a, - upb_StringView* out) { - upb_DescState s; - _upb_DescState_Init(&s); +bool upb_MiniTable_SetSubMessage(upb_MiniTable* table, + upb_MiniTableField* field, + const upb_MiniTable* sub) { + UPB_ASSERT((uintptr_t)table->fields <= (uintptr_t)field && + (uintptr_t)field < + (uintptr_t)(table->fields + table->field_count)); + UPB_ASSERT(sub); - const upb_EnumValueDef** sorted = NULL; - if (!e->is_sorted) { - sorted = _upb_EnumValueDefs_Sorted(e->values, e->value_count, a); - if (!sorted) return false; - } + const bool sub_is_map = sub->ext & kUpb_ExtMode_IsMapEntry; - if (!_upb_DescState_Grow(&s, a)) return false; - s.ptr = upb_MtDataEncoder_StartEnum(&s.e, s.ptr); + switch (field->UPB_PRIVATE(descriptortype)) { + case kUpb_FieldType_Message: + if (sub_is_map) { + const bool table_is_map = table->ext & kUpb_ExtMode_IsMapEntry; + if (UPB_UNLIKELY(table_is_map)) return false; - // Duplicate values are allowed but we only encode each value once. - uint32_t previous = 0; + field->mode = (field->mode & ~kUpb_FieldMode_Mask) | kUpb_FieldMode_Map; + } + break; - for (size_t i = 0; i < e->value_count; i++) { - const uint32_t current = - upb_EnumValueDef_Number(sorted ? sorted[i] : upb_EnumDef_Value(e, i)); - if (i != 0 && previous == current) continue; + case kUpb_FieldType_Group: + if (UPB_UNLIKELY(sub_is_map)) return false; + break; - if (!_upb_DescState_Grow(&s, a)) return false; - s.ptr = upb_MtDataEncoder_PutEnumValue(&s.e, s.ptr, current); - previous = current; + default: + return false; } - if (!_upb_DescState_Grow(&s, a)) return false; - s.ptr = upb_MtDataEncoder_EndEnum(&s.e, s.ptr); + upb_MiniTableSub* table_sub = + (void*)&table->subs[field->UPB_PRIVATE(submsg_index)]; + // TODO(haberman): Add this assert back once YouTube is updated to not call + // this function repeatedly. + // UPB_ASSERT(table_sub->submsg == &_kUpb_MiniTable_Empty); + table_sub->submsg = sub; + return true; +} - // There will always be room for this '\0' in the encoder buffer because - // kUpb_MtDataEncoder_MinSize is overkill for upb_MtDataEncoder_EndEnum(). - UPB_ASSERT(s.ptr < s.buf + s.bufsize); - *s.ptr = '\0'; +bool upb_MiniTable_SetSubEnum(upb_MiniTable* table, upb_MiniTableField* field, + const upb_MiniTableEnum* sub) { + UPB_ASSERT((uintptr_t)table->fields <= (uintptr_t)field && + (uintptr_t)field < + (uintptr_t)(table->fields + table->field_count)); + UPB_ASSERT(sub); - out->data = s.buf; - out->size = s.ptr - s.buf; + upb_MiniTableSub* table_sub = + (void*)&table->subs[field->UPB_PRIVATE(submsg_index)]; + table_sub->subenum = sub; return true; } -static upb_MiniTableEnum* create_enumlayout(upb_DefBuilder* ctx, - const upb_EnumDef* e) { - upb_StringView sv; - bool ok = upb_EnumDef_MiniDescriptorEncode(e, ctx->tmp_arena, &sv); - if (!ok) _upb_DefBuilder_Errf(ctx, "OOM while building enum MiniDescriptor"); +uint32_t upb_MiniTable_GetSubList(const upb_MiniTable* mt, + const upb_MiniTableField** subs) { + uint32_t msg_count = 0; + uint32_t enum_count = 0; - upb_Status status; - upb_MiniTableEnum* layout = - upb_MiniTableEnum_Build(sv.data, sv.size, ctx->arena, &status); - if (!layout) - _upb_DefBuilder_Errf(ctx, "Error building enum MiniTable: %s", status.msg); - return layout; -} + for (int i = 0; i < mt->field_count; i++) { + const upb_MiniTableField* f = &mt->fields[i]; + if (upb_MiniTableField_CType(f) == kUpb_CType_Message) { + *subs = f; + ++subs; + msg_count++; + } + } -static upb_StringView* _upb_EnumReservedNames_New( - upb_DefBuilder* ctx, int n, const upb_StringView* protos) { - upb_StringView* sv = _upb_DefBuilder_Alloc(ctx, sizeof(upb_StringView) * n); - for (size_t i = 0; i < n; i++) { - sv[i].data = - upb_strdup2(protos[i].data, protos[i].size, _upb_DefBuilder_Arena(ctx)); - sv[i].size = protos[i].size; + for (int i = 0; i < mt->field_count; i++) { + const upb_MiniTableField* f = &mt->fields[i]; + if (upb_MiniTableField_CType(f) == kUpb_CType_Enum) { + *subs = f; + ++subs; + enum_count++; + } } - return sv; -} -static void create_enumdef(upb_DefBuilder* ctx, const char* prefix, - const UPB_DESC(EnumDescriptorProto) * enum_proto, - upb_EnumDef* e) { - const UPB_DESC(EnumValueDescriptorProto)* const* values; - const UPB_DESC(EnumDescriptorProto_EnumReservedRange)* const* res_ranges; - const upb_StringView* res_names; - upb_StringView name; - size_t n_value, n_res_range, n_res_name; + return (msg_count << 16) | enum_count; +} - // Must happen before _upb_DefBuilder_Add() - e->file = _upb_DefBuilder_File(ctx); +// The list of sub_tables and sub_enums must exactly match the number and order +// of sub-message fields and sub-enum fields given by upb_MiniTable_GetSubList() +// above. +bool upb_MiniTable_Link(upb_MiniTable* mt, const upb_MiniTable** sub_tables, + size_t sub_table_count, + const upb_MiniTableEnum** sub_enums, + size_t sub_enum_count) { + uint32_t msg_count = 0; + uint32_t enum_count = 0; - name = UPB_DESC(EnumDescriptorProto_name)(enum_proto); + for (int i = 0; i < mt->field_count; i++) { + upb_MiniTableField* f = (upb_MiniTableField*)&mt->fields[i]; + if (upb_MiniTableField_CType(f) == kUpb_CType_Message) { + const upb_MiniTable* sub = sub_tables[msg_count++]; + if (msg_count > sub_table_count) return false; + if (sub != NULL) { + if (!upb_MiniTable_SetSubMessage(mt, f, sub)) return false; + } + } + } - e->full_name = _upb_DefBuilder_MakeFullName(ctx, prefix, name); - _upb_DefBuilder_Add(ctx, e->full_name, - _upb_DefType_Pack(e, UPB_DEFTYPE_ENUM)); + for (int i = 0; i < mt->field_count; i++) { + upb_MiniTableField* f = (upb_MiniTableField*)&mt->fields[i]; + if (upb_MiniTableField_IsClosedEnum(f)) { + const upb_MiniTableEnum* sub = sub_enums[enum_count++]; + if (enum_count > sub_enum_count) return false; + if (sub != NULL) { + if (!upb_MiniTable_SetSubEnum(mt, f, sub)) return false; + } + } + } - e->is_closed = (!UPB_TREAT_PROTO2_ENUMS_LIKE_PROTO3) && - (upb_FileDef_Syntax(e->file) == kUpb_Syntax_Proto2); + return true; +} - values = UPB_DESC(EnumDescriptorProto_value)(enum_proto, &n_value); - bool ok = upb_strtable_init(&e->ntoi, n_value, ctx->arena); - if (!ok) _upb_DefBuilder_OomErr(ctx); - ok = upb_inttable_init(&e->iton, ctx->arena); - if (!ok) _upb_DefBuilder_OomErr(ctx); +// Must be last. - e->defaultval = 0; - e->value_count = n_value; - e->values = - _upb_EnumValueDefs_New(ctx, prefix, n_value, values, e, &e->is_sorted); +typedef struct { + uint64_t present_values_mask; + uint32_t last_written_value; +} upb_MtDataEncoderInternal_EnumState; - if (n_value == 0) { - _upb_DefBuilder_Errf(ctx, "enums must contain at least one value (%s)", - e->full_name); - } +typedef struct { + uint64_t msg_modifiers; + uint32_t last_field_num; + enum { + kUpb_OneofState_NotStarted, + kUpb_OneofState_StartedOneof, + kUpb_OneofState_EmittedOneofField, + } oneof_state; +} upb_MtDataEncoderInternal_MsgState; - res_ranges = - UPB_DESC(EnumDescriptorProto_reserved_range)(enum_proto, &n_res_range); - e->res_range_count = n_res_range; - e->res_ranges = _upb_EnumReservedRanges_New(ctx, n_res_range, res_ranges, e); +typedef struct { + char* buf_start; // Only for checking kUpb_MtDataEncoder_MinSize. + union { + upb_MtDataEncoderInternal_EnumState enum_state; + upb_MtDataEncoderInternal_MsgState msg_state; + } state; +} upb_MtDataEncoderInternal; - res_names = - UPB_DESC(EnumDescriptorProto_reserved_name)(enum_proto, &n_res_name); - e->res_name_count = n_res_name; - e->res_names = _upb_EnumReservedNames_New(ctx, n_res_name, res_names); +static upb_MtDataEncoderInternal* upb_MtDataEncoder_GetInternal( + upb_MtDataEncoder* e, char* buf_start) { + UPB_ASSERT(sizeof(upb_MtDataEncoderInternal) <= sizeof(e->internal)); + upb_MtDataEncoderInternal* ret = (upb_MtDataEncoderInternal*)e->internal; + ret->buf_start = buf_start; + return ret; +} - UPB_DEF_SET_OPTIONS(e->opts, EnumDescriptorProto, EnumOptions, enum_proto); +static char* upb_MtDataEncoder_PutRaw(upb_MtDataEncoder* e, char* ptr, + char ch) { + upb_MtDataEncoderInternal* in = (upb_MtDataEncoderInternal*)e->internal; + UPB_ASSERT(ptr - in->buf_start < kUpb_MtDataEncoder_MinSize); + if (ptr == e->end) return NULL; + *ptr++ = ch; + return ptr; +} - upb_inttable_compact(&e->iton, ctx->arena); +static char* upb_MtDataEncoder_Put(upb_MtDataEncoder* e, char* ptr, char ch) { + return upb_MtDataEncoder_PutRaw(e, ptr, _upb_ToBase92(ch)); +} - if (e->is_closed) { - if (ctx->layout) { - UPB_ASSERT(ctx->enum_count < ctx->layout->enum_count); - e->layout = ctx->layout->enums[ctx->enum_count++]; - } else { - e->layout = create_enumlayout(ctx, e); - } - } else { - e->layout = NULL; - } +static char* upb_MtDataEncoder_PutBase92Varint(upb_MtDataEncoder* e, char* ptr, + uint32_t val, int min, int max) { + int shift = upb_Log2Ceiling(_upb_FromBase92(max) - _upb_FromBase92(min) + 1); + UPB_ASSERT(shift <= 6); + uint32_t mask = (1 << shift) - 1; + do { + uint32_t bits = val & mask; + ptr = upb_MtDataEncoder_Put(e, ptr, bits + _upb_FromBase92(min)); + if (!ptr) return NULL; + val >>= shift; + } while (val); + return ptr; } -upb_EnumDef* _upb_EnumDefs_New( - upb_DefBuilder* ctx, int n, - const UPB_DESC(EnumDescriptorProto) * const* protos, - const upb_MessageDef* containing_type) { - _upb_DefType_CheckPadding(sizeof(upb_EnumDef)); +char* upb_MtDataEncoder_PutModifier(upb_MtDataEncoder* e, char* ptr, + uint64_t mod) { + if (mod) { + ptr = upb_MtDataEncoder_PutBase92Varint(e, ptr, mod, + kUpb_EncodedValue_MinModifier, + kUpb_EncodedValue_MaxModifier); + } + return ptr; +} - // If a containing type is defined then get the full name from that. - // Otherwise use the package name from the file def. - const char* name = containing_type ? upb_MessageDef_FullName(containing_type) - : _upb_FileDef_RawPackage(ctx->file); +char* upb_MtDataEncoder_EncodeExtension(upb_MtDataEncoder* e, char* ptr, + upb_FieldType type, uint32_t field_num, + uint64_t field_mod) { + upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr); + in->state.msg_state.msg_modifiers = 0; + in->state.msg_state.last_field_num = 0; + in->state.msg_state.oneof_state = kUpb_OneofState_NotStarted; - upb_EnumDef* e = _upb_DefBuilder_Alloc(ctx, sizeof(upb_EnumDef) * n); - for (size_t i = 0; i < n; i++) { - create_enumdef(ctx, name, protos[i], &e[i]); - e[i].containing_type = containing_type; - } - return e; + ptr = upb_MtDataEncoder_PutRaw(e, ptr, kUpb_EncodedVersion_ExtensionV1); + if (!ptr) return NULL; + + return upb_MtDataEncoder_PutField(e, ptr, type, field_num, field_mod); } -// #include "upb/reflection/extension_range_internal.h" -// #include "upb/reflection/message_def.h" +char* upb_MtDataEncoder_EncodeMap(upb_MtDataEncoder* e, char* ptr, + upb_FieldType key_type, + upb_FieldType value_type, uint64_t key_mod, + uint64_t value_mod) { + upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr); + in->state.msg_state.msg_modifiers = 0; + in->state.msg_state.last_field_num = 0; + in->state.msg_state.oneof_state = kUpb_OneofState_NotStarted; -// Must be last. + ptr = upb_MtDataEncoder_PutRaw(e, ptr, kUpb_EncodedVersion_MapV1); + if (!ptr) return NULL; -struct upb_EnumReservedRange { - int32_t start; - int32_t end; -}; + ptr = upb_MtDataEncoder_PutField(e, ptr, key_type, 1, key_mod); + if (!ptr) return NULL; -upb_EnumReservedRange* _upb_EnumReservedRange_At(const upb_EnumReservedRange* r, - int i) { - return (upb_EnumReservedRange*)&r[i]; + return upb_MtDataEncoder_PutField(e, ptr, value_type, 2, value_mod); } -int32_t upb_EnumReservedRange_Start(const upb_EnumReservedRange* r) { - return r->start; -} -int32_t upb_EnumReservedRange_End(const upb_EnumReservedRange* r) { - return r->end; +char* upb_MtDataEncoder_EncodeMessageSet(upb_MtDataEncoder* e, char* ptr) { + (void)upb_MtDataEncoder_GetInternal(e, ptr); + return upb_MtDataEncoder_PutRaw(e, ptr, kUpb_EncodedVersion_MessageSetV1); } -upb_EnumReservedRange* _upb_EnumReservedRanges_New( - upb_DefBuilder* ctx, int n, - const UPB_DESC(EnumDescriptorProto_EnumReservedRange) * const* protos, - const upb_EnumDef* e) { - upb_EnumReservedRange* r = - _upb_DefBuilder_Alloc(ctx, sizeof(upb_EnumReservedRange) * n); - - for (int i = 0; i < n; i++) { - const int32_t start = - UPB_DESC(EnumDescriptorProto_EnumReservedRange_start)(protos[i]); - const int32_t end = - UPB_DESC(EnumDescriptorProto_EnumReservedRange_end)(protos[i]); +char* upb_MtDataEncoder_StartMessage(upb_MtDataEncoder* e, char* ptr, + uint64_t msg_mod) { + upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr); + in->state.msg_state.msg_modifiers = msg_mod; + in->state.msg_state.last_field_num = 0; + in->state.msg_state.oneof_state = kUpb_OneofState_NotStarted; - // A full validation would also check that each range is disjoint, and that - // none of the fields overlap with the extension ranges, but we are just - // sanity checking here. + ptr = upb_MtDataEncoder_PutRaw(e, ptr, kUpb_EncodedVersion_MessageV1); + if (!ptr) return NULL; - // Note: Not a typo! Unlike extension ranges and message reserved ranges, - // the end value of an enum reserved range is *inclusive*! - if (end < start) { - _upb_DefBuilder_Errf(ctx, "Reserved range (%d, %d) is invalid, enum=%s\n", - (int)start, (int)end, upb_EnumDef_FullName(e)); - } + return upb_MtDataEncoder_PutModifier(e, ptr, msg_mod); +} - r[i].start = start; - r[i].end = end; +static char* _upb_MtDataEncoder_MaybePutFieldSkip(upb_MtDataEncoder* e, + char* ptr, + uint32_t field_num) { + upb_MtDataEncoderInternal* in = (upb_MtDataEncoderInternal*)e->internal; + if (field_num <= in->state.msg_state.last_field_num) return NULL; + if (in->state.msg_state.last_field_num + 1 != field_num) { + // Put skip. + UPB_ASSERT(field_num > in->state.msg_state.last_field_num); + uint32_t skip = field_num - in->state.msg_state.last_field_num; + ptr = upb_MtDataEncoder_PutBase92Varint( + e, ptr, skip, kUpb_EncodedValue_MinSkip, kUpb_EncodedValue_MaxSkip); + if (!ptr) return NULL; } - - return r; + in->state.msg_state.last_field_num = field_num; + return ptr; } +static char* _upb_MtDataEncoder_PutFieldType(upb_MtDataEncoder* e, char* ptr, + upb_FieldType type, + uint64_t field_mod) { + static const char kUpb_TypeToEncoded[] = { + [kUpb_FieldType_Double] = kUpb_EncodedType_Double, + [kUpb_FieldType_Float] = kUpb_EncodedType_Float, + [kUpb_FieldType_Int64] = kUpb_EncodedType_Int64, + [kUpb_FieldType_UInt64] = kUpb_EncodedType_UInt64, + [kUpb_FieldType_Int32] = kUpb_EncodedType_Int32, + [kUpb_FieldType_Fixed64] = kUpb_EncodedType_Fixed64, + [kUpb_FieldType_Fixed32] = kUpb_EncodedType_Fixed32, + [kUpb_FieldType_Bool] = kUpb_EncodedType_Bool, + [kUpb_FieldType_String] = kUpb_EncodedType_String, + [kUpb_FieldType_Group] = kUpb_EncodedType_Group, + [kUpb_FieldType_Message] = kUpb_EncodedType_Message, + [kUpb_FieldType_Bytes] = kUpb_EncodedType_Bytes, + [kUpb_FieldType_UInt32] = kUpb_EncodedType_UInt32, + [kUpb_FieldType_Enum] = kUpb_EncodedType_OpenEnum, + [kUpb_FieldType_SFixed32] = kUpb_EncodedType_SFixed32, + [kUpb_FieldType_SFixed64] = kUpb_EncodedType_SFixed64, + [kUpb_FieldType_SInt32] = kUpb_EncodedType_SInt32, + [kUpb_FieldType_SInt64] = kUpb_EncodedType_SInt64, + }; + + int encoded_type = kUpb_TypeToEncoded[type]; -// Must be last. + if (field_mod & kUpb_FieldModifier_IsClosedEnum) { + UPB_ASSERT(type == kUpb_FieldType_Enum); + encoded_type = kUpb_EncodedType_ClosedEnum; + } -struct upb_EnumValueDef { - const UPB_DESC(EnumValueOptions) * opts; - const upb_EnumDef* parent; - const char* full_name; - int32_t number; -}; + if (field_mod & kUpb_FieldModifier_IsRepeated) { + // Repeated fields shift the type number up (unlike other modifiers which + // are bit flags). + encoded_type += kUpb_EncodedType_RepeatedBase; + } -upb_EnumValueDef* _upb_EnumValueDef_At(const upb_EnumValueDef* v, int i) { - return (upb_EnumValueDef*)&v[i]; + return upb_MtDataEncoder_Put(e, ptr, encoded_type); } -static int _upb_EnumValueDef_Compare(const void* p1, const void* p2) { - const uint32_t v1 = (*(const upb_EnumValueDef**)p1)->number; - const uint32_t v2 = (*(const upb_EnumValueDef**)p2)->number; - return (v1 < v2) ? -1 : (v1 > v2); -} +static char* _upb_MtDataEncoder_MaybePutModifiers(upb_MtDataEncoder* e, + char* ptr, upb_FieldType type, + uint64_t field_mod) { + upb_MtDataEncoderInternal* in = (upb_MtDataEncoderInternal*)e->internal; + uint32_t encoded_modifiers = 0; + if ((field_mod & kUpb_FieldModifier_IsRepeated) && + upb_FieldType_IsPackable(type)) { + bool field_is_packed = field_mod & kUpb_FieldModifier_IsPacked; + bool default_is_packed = in->state.msg_state.msg_modifiers & + kUpb_MessageModifier_DefaultIsPacked; + if (field_is_packed != default_is_packed) { + encoded_modifiers |= kUpb_EncodedFieldModifier_FlipPacked; + } + } -const upb_EnumValueDef** _upb_EnumValueDefs_Sorted(const upb_EnumValueDef* v, - int n, upb_Arena* a) { - // TODO: Try to replace this arena alloc with a persistent scratch buffer. - upb_EnumValueDef** out = - (upb_EnumValueDef**)upb_Arena_Malloc(a, n * sizeof(void*)); - if (!out) return NULL; + if (field_mod & kUpb_FieldModifier_IsProto3Singular) { + encoded_modifiers |= kUpb_EncodedFieldModifier_IsProto3Singular; + } - for (int i = 0; i < n; i++) { - out[i] = (upb_EnumValueDef*)&v[i]; + if (field_mod & kUpb_FieldModifier_IsRequired) { + encoded_modifiers |= kUpb_EncodedFieldModifier_IsRequired; } - qsort(out, n, sizeof(void*), _upb_EnumValueDef_Compare); - return (const upb_EnumValueDef**)out; + return upb_MtDataEncoder_PutModifier(e, ptr, encoded_modifiers); } -const UPB_DESC(EnumValueOptions) * - upb_EnumValueDef_Options(const upb_EnumValueDef* v) { - return v->opts; -} +char* upb_MtDataEncoder_PutField(upb_MtDataEncoder* e, char* ptr, + upb_FieldType type, uint32_t field_num, + uint64_t field_mod) { + upb_MtDataEncoder_GetInternal(e, ptr); -bool upb_EnumValueDef_HasOptions(const upb_EnumValueDef* v) { - return v->opts != (void*)kUpbDefOptDefault; -} + ptr = _upb_MtDataEncoder_MaybePutFieldSkip(e, ptr, field_num); + if (!ptr) return NULL; -const upb_EnumDef* upb_EnumValueDef_Enum(const upb_EnumValueDef* v) { - return v->parent; + ptr = _upb_MtDataEncoder_PutFieldType(e, ptr, type, field_mod); + if (!ptr) return NULL; + + return _upb_MtDataEncoder_MaybePutModifiers(e, ptr, type, field_mod); } -const char* upb_EnumValueDef_FullName(const upb_EnumValueDef* v) { - return v->full_name; +char* upb_MtDataEncoder_StartOneof(upb_MtDataEncoder* e, char* ptr) { + upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr); + if (in->state.msg_state.oneof_state == kUpb_OneofState_NotStarted) { + ptr = upb_MtDataEncoder_Put(e, ptr, _upb_FromBase92(kUpb_EncodedValue_End)); + } else { + ptr = upb_MtDataEncoder_Put( + e, ptr, _upb_FromBase92(kUpb_EncodedValue_OneofSeparator)); + } + in->state.msg_state.oneof_state = kUpb_OneofState_StartedOneof; + return ptr; } -const char* upb_EnumValueDef_Name(const upb_EnumValueDef* v) { - return _upb_DefBuilder_FullToShort(v->full_name); +char* upb_MtDataEncoder_PutOneofField(upb_MtDataEncoder* e, char* ptr, + uint32_t field_num) { + upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr); + if (in->state.msg_state.oneof_state == kUpb_OneofState_EmittedOneofField) { + ptr = upb_MtDataEncoder_Put( + e, ptr, _upb_FromBase92(kUpb_EncodedValue_FieldSeparator)); + if (!ptr) return NULL; + } + ptr = upb_MtDataEncoder_PutBase92Varint(e, ptr, field_num, _upb_ToBase92(0), + _upb_ToBase92(63)); + in->state.msg_state.oneof_state = kUpb_OneofState_EmittedOneofField; + return ptr; } -int32_t upb_EnumValueDef_Number(const upb_EnumValueDef* v) { return v->number; } +char* upb_MtDataEncoder_StartEnum(upb_MtDataEncoder* e, char* ptr) { + upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr); + in->state.enum_state.present_values_mask = 0; + in->state.enum_state.last_written_value = 0; -uint32_t upb_EnumValueDef_Index(const upb_EnumValueDef* v) { - // Compute index in our parent's array. - return v - upb_EnumDef_Value(v->parent, 0); + return upb_MtDataEncoder_PutRaw(e, ptr, kUpb_EncodedVersion_EnumV1); } -static void create_enumvaldef(upb_DefBuilder* ctx, const char* prefix, - const UPB_DESC(EnumValueDescriptorProto) * - val_proto, - upb_EnumDef* e, upb_EnumValueDef* v) { - upb_StringView name = UPB_DESC(EnumValueDescriptorProto_name)(val_proto); +static char* upb_MtDataEncoder_FlushDenseEnumMask(upb_MtDataEncoder* e, + char* ptr) { + upb_MtDataEncoderInternal* in = (upb_MtDataEncoderInternal*)e->internal; + ptr = upb_MtDataEncoder_Put(e, ptr, in->state.enum_state.present_values_mask); + in->state.enum_state.present_values_mask = 0; + in->state.enum_state.last_written_value += 5; + return ptr; +} - v->parent = e; // Must happen prior to _upb_DefBuilder_Add() - v->full_name = _upb_DefBuilder_MakeFullName(ctx, prefix, name); - v->number = UPB_DESC(EnumValueDescriptorProto_number)(val_proto); - _upb_DefBuilder_Add(ctx, v->full_name, - _upb_DefType_Pack(v, UPB_DEFTYPE_ENUMVAL)); +char* upb_MtDataEncoder_PutEnumValue(upb_MtDataEncoder* e, char* ptr, + uint32_t val) { + // TODO(b/229641772): optimize this encoding. + upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr); + UPB_ASSERT(val >= in->state.enum_state.last_written_value); + uint32_t delta = val - in->state.enum_state.last_written_value; + if (delta >= 5 && in->state.enum_state.present_values_mask) { + ptr = upb_MtDataEncoder_FlushDenseEnumMask(e, ptr); + if (!ptr) { + return NULL; + } + delta -= 5; + } - UPB_DEF_SET_OPTIONS(v->opts, EnumValueDescriptorProto, EnumValueOptions, - val_proto); + if (delta >= 5) { + ptr = upb_MtDataEncoder_PutBase92Varint( + e, ptr, delta, kUpb_EncodedValue_MinSkip, kUpb_EncodedValue_MaxSkip); + in->state.enum_state.last_written_value += delta; + delta = 0; + } - bool ok = _upb_EnumDef_Insert(e, v, ctx->arena); - if (!ok) _upb_DefBuilder_OomErr(ctx); + UPB_ASSERT((in->state.enum_state.present_values_mask >> delta) == 0); + in->state.enum_state.present_values_mask |= 1ULL << delta; + return ptr; } -// Allocate and initialize an array of |n| enum value defs owned by |e|. -upb_EnumValueDef* _upb_EnumValueDefs_New( - upb_DefBuilder* ctx, const char* prefix, int n, - const UPB_DESC(EnumValueDescriptorProto) * const* protos, upb_EnumDef* e, - bool* is_sorted) { - _upb_DefType_CheckPadding(sizeof(upb_EnumValueDef)); - - upb_EnumValueDef* v = - _upb_DefBuilder_Alloc(ctx, sizeof(upb_EnumValueDef) * n); +char* upb_MtDataEncoder_EndEnum(upb_MtDataEncoder* e, char* ptr) { + upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr); + if (!in->state.enum_state.present_values_mask) return ptr; + return upb_MtDataEncoder_FlushDenseEnumMask(e, ptr); +} - *is_sorted = true; - uint32_t previous = 0; - for (size_t i = 0; i < n; i++) { - create_enumvaldef(ctx, prefix, protos[i], e, &v[i]); - const uint32_t current = v[i].number; - if (previous > current) *is_sorted = false; - previous = current; - } +const char _kUpb_ToBase92[] = { + ' ', '!', '#', '$', '%', '&', '(', ')', '*', '+', ',', '-', '.', '/', + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<', '=', + '>', '?', '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', + 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', + 'Z', '[', ']', '^', '_', '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', + 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', + 'w', 'x', 'y', 'z', '{', '|', '}', '~', +}; - if (upb_FileDef_Syntax(ctx->file) == kUpb_Syntax_Proto3 && n > 0 && - v[0].number != 0) { - _upb_DefBuilder_Errf(ctx, - "for proto3, the first enum value must be zero (%s)", - upb_EnumDef_FullName(e)); - } +const int8_t _kUpb_FromBase92[] = { + 0, 1, -1, 2, 3, 4, 5, -1, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, + 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, + 55, 56, 57, -1, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, + 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, +}; - return v; -} // Must be last. -struct upb_ExtensionRange { - const UPB_DESC(ExtensionRangeOptions) * opts; - int32_t start; - int32_t end; +#define EXTREG_KEY_SIZE (sizeof(upb_MiniTable*) + sizeof(uint32_t)) + +struct upb_ExtensionRegistry { + upb_Arena* arena; + upb_strtable exts; // Key is upb_MiniTable* concatenated with fieldnum. }; -upb_ExtensionRange* _upb_ExtensionRange_At(const upb_ExtensionRange* r, int i) { - return (upb_ExtensionRange*)&r[i]; +static void extreg_key(char* buf, const upb_MiniTable* l, uint32_t fieldnum) { + memcpy(buf, &l, sizeof(l)); + memcpy(buf + sizeof(l), &fieldnum, sizeof(fieldnum)); } -const UPB_DESC(ExtensionRangeOptions) * - upb_ExtensionRange_Options(const upb_ExtensionRange* r) { - return r->opts; +upb_ExtensionRegistry* upb_ExtensionRegistry_New(upb_Arena* arena) { + upb_ExtensionRegistry* r = upb_Arena_Malloc(arena, sizeof(*r)); + if (!r) return NULL; + r->arena = arena; + if (!upb_strtable_init(&r->exts, 8, arena)) return NULL; + return r; } -bool upb_ExtensionRange_HasOptions(const upb_ExtensionRange* r) { - return r->opts != (void*)kUpbDefOptDefault; +UPB_API bool upb_ExtensionRegistry_Add(upb_ExtensionRegistry* r, + const upb_MiniTableExtension* e) { + char buf[EXTREG_KEY_SIZE]; + extreg_key(buf, e->extendee, e->field.number); + if (upb_strtable_lookup2(&r->exts, buf, EXTREG_KEY_SIZE, NULL)) return false; + return upb_strtable_insert(&r->exts, buf, EXTREG_KEY_SIZE, + upb_value_constptr(e), r->arena); } -int32_t upb_ExtensionRange_Start(const upb_ExtensionRange* r) { - return r->start; +bool upb_ExtensionRegistry_AddArray(upb_ExtensionRegistry* r, + const upb_MiniTableExtension** e, + size_t count) { + const upb_MiniTableExtension** start = e; + const upb_MiniTableExtension** end = UPB_PTRADD(e, count); + for (; e < end; e++) { + if (!upb_ExtensionRegistry_Add(r, *e)) goto failure; + } + return true; + +failure: + // Back out the entries previously added. + for (end = e, e = start; e < end; e++) { + const upb_MiniTableExtension* ext = *e; + char buf[EXTREG_KEY_SIZE]; + extreg_key(buf, ext->extendee, ext->field.number); + upb_strtable_remove2(&r->exts, buf, EXTREG_KEY_SIZE, NULL); + } + return false; } -int32_t upb_ExtensionRange_End(const upb_ExtensionRange* r) { return r->end; } +const upb_MiniTableExtension* upb_ExtensionRegistry_Lookup( + const upb_ExtensionRegistry* r, const upb_MiniTable* t, uint32_t num) { + char buf[EXTREG_KEY_SIZE]; + upb_value v; + extreg_key(buf, t, num); + if (upb_strtable_lookup2(&r->exts, buf, EXTREG_KEY_SIZE, &v)) { + return upb_value_getconstptr(v); + } else { + return NULL; + } +} -upb_ExtensionRange* _upb_ExtensionRanges_New( - upb_DefBuilder* ctx, int n, - const UPB_DESC(DescriptorProto_ExtensionRange) * const* protos, - const upb_MessageDef* m) { - upb_ExtensionRange* r = - _upb_DefBuilder_Alloc(ctx, sizeof(upb_ExtensionRange) * n); - for (int i = 0; i < n; i++) { - const int32_t start = - UPB_DESC(DescriptorProto_ExtensionRange_start)(protos[i]); - const int32_t end = UPB_DESC(DescriptorProto_ExtensionRange_end)(protos[i]); - const int32_t max = UPB_DESC(MessageOptions_message_set_wire_format)( - upb_MessageDef_Options(m)) - ? INT32_MAX - : kUpb_MaxFieldNumber + 1; +#include - // A full validation would also check that each range is disjoint, and that - // none of the fields overlap with the extension ranges, but we are just - // sanity checking here. - if (start < 1 || end <= start || end > max) { - _upb_DefBuilder_Errf(ctx, - "Extension range (%d, %d) is invalid, message=%s\n", - (int)start, (int)end, upb_MessageDef_FullName(m)); + +// Must be last. + +const upb_MiniTableField* upb_MiniTable_FindFieldByNumber( + const upb_MiniTable* t, uint32_t number) { + const size_t i = ((size_t)number) - 1; // 0 wraps to SIZE_MAX + + // Ideal case: index into dense fields + if (i < t->dense_below) { + UPB_ASSERT(t->fields[i].number == number); + return &t->fields[i]; + } + + // Slow case: binary search + int lo = t->dense_below; + int hi = t->field_count - 1; + while (lo <= hi) { + int mid = (lo + hi) / 2; + uint32_t num = t->fields[mid].number; + if (num < number) { + lo = mid + 1; + continue; + } + if (num > number) { + hi = mid - 1; + continue; } + return &t->fields[mid]; + } + return NULL; +} - r[i].start = start; - r[i].end = end; - UPB_DEF_SET_OPTIONS(r[i].opts, DescriptorProto_ExtensionRange, - ExtensionRangeOptions, protos[i]); +static bool upb_MiniTable_Is_Oneof(const upb_MiniTableField* f) { + return f->presence < 0; +} + +const upb_MiniTableField* upb_MiniTable_GetOneof(const upb_MiniTable* m, + const upb_MiniTableField* f) { + if (UPB_UNLIKELY(!upb_MiniTable_Is_Oneof(f))) { + return NULL; + } + const upb_MiniTableField* ptr = &m->fields[0]; + const upb_MiniTableField* end = &m->fields[m->field_count]; + while (++ptr < end) { + if (ptr->presence == (*f).presence) { + return ptr; + } } + return NULL; +} - return r; +bool upb_MiniTable_NextOneofField(const upb_MiniTable* m, + const upb_MiniTableField** f) { + const upb_MiniTableField* ptr = *f; + const upb_MiniTableField* end = &m->fields[m->field_count]; + while (++ptr < end) { + if (ptr->presence == (*f)->presence) { + *f = ptr; + return true; + } + } + return false; } -#include -#include +const struct upb_MiniTable _kUpb_MiniTable_Empty = { + .subs = NULL, + .fields = NULL, + .size = 0, + .field_count = 0, + .ext = kUpb_ExtMode_NonExtendable, + .dense_below = 0, + .table_mask = -1, + .required_count = 0, +}; -// Must be last. -#define UPB_FIELD_TYPE_UNSPECIFIED 0 +#include -typedef struct { - size_t len; - char str[1]; // Null-terminated string data follows. -} str_t; -struct upb_FieldDef { - const UPB_DESC(FieldOptions) * opts; - const upb_FileDef* file; - const upb_MessageDef* msgdef; - const char* full_name; - const char* json_name; - union { - int64_t sint; - uint64_t uint; - double dbl; - float flt; - bool boolean; - str_t* str; - void* msg; // Always NULL. - } defaultval; - union { - const upb_OneofDef* oneof; - const upb_MessageDef* extension_scope; - } scope; - union { - const upb_MessageDef* msgdef; - const upb_EnumDef* enumdef; - const UPB_DESC(FieldDescriptorProto) * unresolved; - } sub; - uint32_t number_; - uint16_t index_; - uint16_t layout_index; // Index into msgdef->layout->fields or file->exts - bool has_default; - bool has_json_name; - bool has_presence; - bool is_extension; - bool is_packed; - bool is_proto3_optional; - upb_FieldType type_; - upb_Label label_; -#if UINTPTR_MAX == 0xffffffff - uint32_t padding; // Increase size to a multiple of 8. -#endif -}; +// Must be last. -upb_FieldDef* _upb_FieldDef_At(const upb_FieldDef* f, int i) { - return (upb_FieldDef*)&f[i]; -} +/* The upb core does not generally have a concept of default instances. However + * for descriptor options we make an exception since the max size is known and + * modest (<200 bytes). All types can share a default instance since it is + * initialized to zeroes. + * + * We have to allocate an extra pointer for upb's internal metadata. */ +static const char opt_default_buf[_UPB_MAXOPT_SIZE + sizeof(void*)] = {0}; +const char* kUpbDefOptDefault = &opt_default_buf[sizeof(void*)]; -const UPB_DESC(FieldOptions) * upb_FieldDef_Options(const upb_FieldDef* f) { - return f->opts; -} +const char* _upb_DefBuilder_FullToShort(const char* fullname) { + const char* p; -bool upb_FieldDef_HasOptions(const upb_FieldDef* f) { - return f->opts != (void*)kUpbDefOptDefault; + if (fullname == NULL) { + return NULL; + } else if ((p = strrchr(fullname, '.')) == NULL) { + /* No '.' in the name, return the full string. */ + return fullname; + } else { + /* Return one past the last '.'. */ + return p + 1; + } } -const char* upb_FieldDef_FullName(const upb_FieldDef* f) { - return f->full_name; +void _upb_DefBuilder_FailJmp(upb_DefBuilder* ctx) { UPB_LONGJMP(ctx->err, 1); } + +void _upb_DefBuilder_Errf(upb_DefBuilder* ctx, const char* fmt, ...) { + va_list argp; + va_start(argp, fmt); + upb_Status_VSetErrorFormat(ctx->status, fmt, argp); + va_end(argp); + _upb_DefBuilder_FailJmp(ctx); } -upb_CType upb_FieldDef_CType(const upb_FieldDef* f) { - switch (f->type_) { - case kUpb_FieldType_Double: - return kUpb_CType_Double; - case kUpb_FieldType_Float: - return kUpb_CType_Float; - case kUpb_FieldType_Int64: - case kUpb_FieldType_SInt64: - case kUpb_FieldType_SFixed64: - return kUpb_CType_Int64; - case kUpb_FieldType_Int32: - case kUpb_FieldType_SFixed32: - case kUpb_FieldType_SInt32: - return kUpb_CType_Int32; - case kUpb_FieldType_UInt64: - case kUpb_FieldType_Fixed64: - return kUpb_CType_UInt64; - case kUpb_FieldType_UInt32: - case kUpb_FieldType_Fixed32: - return kUpb_CType_UInt32; - case kUpb_FieldType_Enum: - return kUpb_CType_Enum; - case kUpb_FieldType_Bool: - return kUpb_CType_Bool; - case kUpb_FieldType_String: - return kUpb_CType_String; - case kUpb_FieldType_Bytes: - return kUpb_CType_Bytes; - case kUpb_FieldType_Group: - case kUpb_FieldType_Message: - return kUpb_CType_Message; - } - UPB_UNREACHABLE(); +void _upb_DefBuilder_OomErr(upb_DefBuilder* ctx) { + upb_Status_SetErrorMessage(ctx->status, "out of memory"); + _upb_DefBuilder_FailJmp(ctx); } -upb_FieldType upb_FieldDef_Type(const upb_FieldDef* f) { return f->type_; } +// Verify a relative identifier string. The loop is branchless for speed. +static void _upb_DefBuilder_CheckIdentNotFull(upb_DefBuilder* ctx, + upb_StringView name) { + bool good = name.size > 0; -uint32_t upb_FieldDef_Index(const upb_FieldDef* f) { return f->index_; } + for (size_t i = 0; i < name.size; i++) { + const char c = name.data[i]; + const char d = c | 0x20; // force lowercase + const bool is_alpha = (('a' <= d) & (d <= 'z')) | (c == '_'); + const bool is_numer = ('0' <= c) & (c <= '9') & (i != 0); -upb_Label upb_FieldDef_Label(const upb_FieldDef* f) { return f->label_; } + good &= is_alpha | is_numer; + } -uint32_t upb_FieldDef_Number(const upb_FieldDef* f) { return f->number_; } + if (!good) _upb_DefBuilder_CheckIdentSlow(ctx, name, false); +} -bool upb_FieldDef_IsExtension(const upb_FieldDef* f) { return f->is_extension; } +const char* _upb_DefBuilder_MakeFullName(upb_DefBuilder* ctx, + const char* prefix, + upb_StringView name) { + _upb_DefBuilder_CheckIdentNotFull(ctx, name); + if (prefix) { + // ret = prefix + '.' + name; + size_t n = strlen(prefix); + char* ret = _upb_DefBuilder_Alloc(ctx, n + name.size + 2); + strcpy(ret, prefix); + ret[n] = '.'; + memcpy(&ret[n + 1], name.data, name.size); + ret[n + 1 + name.size] = '\0'; + return ret; + } else { + char* ret = upb_strdup2(name.data, name.size, ctx->arena); + if (!ret) _upb_DefBuilder_OomErr(ctx); + return ret; + } +} -bool upb_FieldDef_IsPacked(const upb_FieldDef* f) { return f->is_packed; } +static bool remove_component(char* base, size_t* len) { + if (*len == 0) return false; -const char* upb_FieldDef_Name(const upb_FieldDef* f) { - return _upb_DefBuilder_FullToShort(f->full_name); + for (size_t i = *len - 1; i > 0; i--) { + if (base[i] == '.') { + *len = i; + return true; + } + } + + *len = 0; + return true; } -const char* upb_FieldDef_JsonName(const upb_FieldDef* f) { - return f->json_name; +const void* _upb_DefBuilder_ResolveAny(upb_DefBuilder* ctx, + const char* from_name_dbg, + const char* base, upb_StringView sym, + upb_deftype_t* type) { + if (sym.size == 0) goto notfound; + upb_value v; + if (sym.data[0] == '.') { + /* Symbols starting with '.' are absolute, so we do a single lookup. + * Slice to omit the leading '.' */ + if (!_upb_DefPool_LookupSym(ctx->symtab, sym.data + 1, sym.size - 1, &v)) { + goto notfound; + } + } else { + /* Remove components from base until we find an entry or run out. */ + size_t baselen = base ? strlen(base) : 0; + char* tmp = malloc(sym.size + baselen + 1); + while (1) { + char* p = tmp; + if (baselen) { + memcpy(p, base, baselen); + p[baselen] = '.'; + p += baselen + 1; + } + memcpy(p, sym.data, sym.size); + p += sym.size; + if (_upb_DefPool_LookupSym(ctx->symtab, tmp, p - tmp, &v)) { + break; + } + if (!remove_component(tmp, &baselen)) { + free(tmp); + goto notfound; + } + } + free(tmp); + } + + *type = _upb_DefType_Type(v); + return _upb_DefType_Unpack(v, *type); + +notfound: + _upb_DefBuilder_Errf(ctx, "couldn't resolve name '" UPB_STRINGVIEW_FORMAT "'", + UPB_STRINGVIEW_ARGS(sym)); } -bool upb_FieldDef_HasJsonName(const upb_FieldDef* f) { - return f->has_json_name; +const void* _upb_DefBuilder_Resolve(upb_DefBuilder* ctx, + const char* from_name_dbg, const char* base, + upb_StringView sym, upb_deftype_t type) { + upb_deftype_t found_type; + const void* ret = + _upb_DefBuilder_ResolveAny(ctx, from_name_dbg, base, sym, &found_type); + if (ret && found_type != type) { + _upb_DefBuilder_Errf(ctx, + "type mismatch when resolving %s: couldn't find " + "name " UPB_STRINGVIEW_FORMAT " with type=%d", + from_name_dbg, UPB_STRINGVIEW_ARGS(sym), (int)type); + } + return ret; } -const upb_FileDef* upb_FieldDef_File(const upb_FieldDef* f) { return f->file; } +// Per ASCII this will lower-case a letter. If the result is a letter, the +// input was definitely a letter. If the output is not a letter, this may +// have transformed the character unpredictably. +static char upb_ascii_lower(char ch) { return ch | 0x20; } -const upb_MessageDef* upb_FieldDef_ContainingType(const upb_FieldDef* f) { - return f->msgdef; +// isalpha() etc. from are locale-dependent, which we don't want. +static bool upb_isbetween(uint8_t c, uint8_t low, uint8_t high) { + return low <= c && c <= high; } -const upb_MessageDef* upb_FieldDef_ExtensionScope(const upb_FieldDef* f) { - return f->is_extension ? f->scope.extension_scope : NULL; +static bool upb_isletter(char c) { + char lower = upb_ascii_lower(c); + return upb_isbetween(lower, 'a', 'z') || c == '_'; } -const upb_OneofDef* upb_FieldDef_ContainingOneof(const upb_FieldDef* f) { - return f->is_extension ? NULL : f->scope.oneof; +static bool upb_isalphanum(char c) { + return upb_isletter(c) || upb_isbetween(c, '0', '9'); } -const upb_OneofDef* upb_FieldDef_RealContainingOneof(const upb_FieldDef* f) { - const upb_OneofDef* oneof = upb_FieldDef_ContainingOneof(f); - if (!oneof || upb_OneofDef_IsSynthetic(oneof)) return NULL; - return oneof; +static bool TryGetChar(const char** src, const char* end, char* ch) { + if (*src == end) return false; + *ch = **src; + *src += 1; + return true; } -upb_MessageValue upb_FieldDef_Default(const upb_FieldDef* f) { - upb_MessageValue ret; - - if (upb_FieldDef_IsRepeated(f) || upb_FieldDef_IsSubMessage(f)) { - return (upb_MessageValue){.msg_val = NULL}; +static int TryGetHexDigit(const char** src, const char* end) { + char ch; + if (!TryGetChar(src, end, &ch)) return -1; + if ('0' <= ch && ch <= '9') { + return ch - '0'; } - - switch (upb_FieldDef_CType(f)) { - case kUpb_CType_Bool: - return (upb_MessageValue){.bool_val = f->defaultval.boolean}; - case kUpb_CType_Int64: - return (upb_MessageValue){.int64_val = f->defaultval.sint}; - case kUpb_CType_UInt64: - return (upb_MessageValue){.uint64_val = f->defaultval.uint}; - case kUpb_CType_Enum: - case kUpb_CType_Int32: - return (upb_MessageValue){.int32_val = (int32_t)f->defaultval.sint}; - case kUpb_CType_UInt32: - return (upb_MessageValue){.uint32_val = (uint32_t)f->defaultval.uint}; - case kUpb_CType_Float: - return (upb_MessageValue){.float_val = f->defaultval.flt}; - case kUpb_CType_Double: - return (upb_MessageValue){.double_val = f->defaultval.dbl}; - case kUpb_CType_String: - case kUpb_CType_Bytes: { - str_t* str = f->defaultval.str; - if (str) { - return (upb_MessageValue){ - .str_val = (upb_StringView){.data = str->str, .size = str->len}}; - } else { - return (upb_MessageValue){ - .str_val = (upb_StringView){.data = NULL, .size = 0}}; - } - } - default: - UPB_UNREACHABLE(); + ch = upb_ascii_lower(ch); + if ('a' <= ch && ch <= 'f') { + return ch - 'a' + 0xa; } + *src -= 1; // Char wasn't actually a hex digit. + return -1; +} +static char upb_DefBuilder_ParseHexEscape(upb_DefBuilder* ctx, + const upb_FieldDef* f, + const char** src, const char* end) { + int hex_digit = TryGetHexDigit(src, end); + if (hex_digit < 0) { + _upb_DefBuilder_Errf( + ctx, "\\x must be followed by at least one hex digit (field='%s')", + upb_FieldDef_FullName(f)); + return 0; + } + unsigned int ret = hex_digit; + while ((hex_digit = TryGetHexDigit(src, end)) >= 0) { + ret = (ret << 4) | hex_digit; + } + if (ret > 0xff) { + _upb_DefBuilder_Errf(ctx, "Value of hex escape in field %s exceeds 8 bits", + upb_FieldDef_FullName(f)); + return 0; + } return ret; } -const upb_MessageDef* upb_FieldDef_MessageSubDef(const upb_FieldDef* f) { - return upb_FieldDef_CType(f) == kUpb_CType_Message ? f->sub.msgdef : NULL; +static char TryGetOctalDigit(const char** src, const char* end) { + char ch; + if (!TryGetChar(src, end, &ch)) return -1; + if ('0' <= ch && ch <= '7') { + return ch - '0'; + } + *src -= 1; // Char wasn't actually an octal digit. + return -1; } -const upb_EnumDef* upb_FieldDef_EnumSubDef(const upb_FieldDef* f) { - return upb_FieldDef_CType(f) == kUpb_CType_Enum ? f->sub.enumdef : NULL; +static char upb_DefBuilder_ParseOctalEscape(upb_DefBuilder* ctx, + const upb_FieldDef* f, + const char** src, const char* end) { + char ch = 0; + for (int i = 0; i < 3; i++) { + char digit; + if ((digit = TryGetOctalDigit(src, end)) >= 0) { + ch = (ch << 3) | digit; + } + } + return ch; } -const upb_MiniTableField* upb_FieldDef_MiniTable(const upb_FieldDef* f) { - if (upb_FieldDef_IsExtension(f)) { - const upb_FileDef* file = upb_FieldDef_File(f); - return (upb_MiniTableField*)_upb_FileDef_ExtensionMiniTable( - file, f->layout_index); - } else { - const upb_MiniTable* layout = upb_MessageDef_MiniTable(f->msgdef); - return &layout->fields[f->layout_index]; +char _upb_DefBuilder_ParseEscape(upb_DefBuilder* ctx, const upb_FieldDef* f, + const char** src, const char* end) { + char ch; + if (!TryGetChar(src, end, &ch)) { + _upb_DefBuilder_Errf(ctx, "unterminated escape sequence in field %s", + upb_FieldDef_FullName(f)); + return 0; + } + switch (ch) { + case 'a': + return '\a'; + case 'b': + return '\b'; + case 'f': + return '\f'; + case 'n': + return '\n'; + case 'r': + return '\r'; + case 't': + return '\t'; + case 'v': + return '\v'; + case '\\': + return '\\'; + case '\'': + return '\''; + case '\"': + return '\"'; + case '?': + return '\?'; + case 'x': + case 'X': + return upb_DefBuilder_ParseHexEscape(ctx, f, src, end); + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + *src -= 1; + return upb_DefBuilder_ParseOctalEscape(ctx, f, src, end); } + _upb_DefBuilder_Errf(ctx, "Unknown escape sequence: \\%c", ch); } -const upb_MiniTableExtension* _upb_FieldDef_ExtensionMiniTable( - const upb_FieldDef* f) { - UPB_ASSERT(upb_FieldDef_IsExtension(f)); - const upb_FileDef* file = upb_FieldDef_File(f); - return _upb_FileDef_ExtensionMiniTable(file, f->layout_index); -} +void _upb_DefBuilder_CheckIdentSlow(upb_DefBuilder* ctx, upb_StringView name, + bool full) { + const char* str = name.data; + const size_t len = name.size; + bool start = true; + for (size_t i = 0; i < len; i++) { + const char c = str[i]; + if (c == '.') { + if (start || !full) { + _upb_DefBuilder_Errf( + ctx, "invalid name: unexpected '.' (" UPB_STRINGVIEW_FORMAT ")", + UPB_STRINGVIEW_ARGS(name)); + } + start = true; + } else if (start) { + if (!upb_isletter(c)) { + _upb_DefBuilder_Errf(ctx, + "invalid name: path components must start with a " + "letter (" UPB_STRINGVIEW_FORMAT ")", + UPB_STRINGVIEW_ARGS(name)); + } + start = false; + } else if (!upb_isalphanum(c)) { + _upb_DefBuilder_Errf( + ctx, + "invalid name: non-alphanumeric character (" UPB_STRINGVIEW_FORMAT + ")", + UPB_STRINGVIEW_ARGS(name)); + } + } + if (start) { + _upb_DefBuilder_Errf(ctx, + "invalid name: empty part (" UPB_STRINGVIEW_FORMAT ")", + UPB_STRINGVIEW_ARGS(name)); + } -bool _upb_FieldDef_IsClosedEnum(const upb_FieldDef* f) { - if (f->type_ != kUpb_FieldType_Enum) return false; - return upb_EnumDef_IsClosed(f->sub.enumdef); + // We should never reach this point. + UPB_ASSERT(false); } -bool _upb_FieldDef_IsProto3Optional(const upb_FieldDef* f) { - return f->is_proto3_optional; -} -int _upb_FieldDef_LayoutIndex(const upb_FieldDef* f) { return f->layout_index; } -uint64_t _upb_FieldDef_Modifiers(const upb_FieldDef* f) { - uint64_t out = f->is_packed ? kUpb_FieldModifier_IsPacked : 0; +// Must be last. - switch (f->label_) { - case kUpb_Label_Optional: - if (!upb_FieldDef_HasPresence(f)) { - out |= kUpb_FieldModifier_IsProto3Singular; - } - break; - case kUpb_Label_Repeated: - out |= kUpb_FieldModifier_IsRepeated; - break; - case kUpb_Label_Required: - out |= kUpb_FieldModifier_IsRequired; - break; - } +struct upb_DefPool { + upb_Arena* arena; + upb_strtable syms; // full_name -> packed def ptr + upb_strtable files; // file_name -> (upb_FileDef*) + upb_inttable exts; // (upb_MiniTableExtension*) -> (upb_FieldDef*) + upb_ExtensionRegistry* extreg; + upb_MiniTablePlatform platform; + void* scratch_data; + size_t scratch_size; + size_t bytes_loaded; +}; - if (_upb_FieldDef_IsClosedEnum(f)) { - out |= kUpb_FieldModifier_IsClosedEnum; - } - return out; +void upb_DefPool_Free(upb_DefPool* s) { + upb_Arena_Free(s->arena); + upb_gfree(s->scratch_data); + upb_gfree(s); } -bool upb_FieldDef_HasDefault(const upb_FieldDef* f) { return f->has_default; } -bool upb_FieldDef_HasPresence(const upb_FieldDef* f) { return f->has_presence; } +upb_DefPool* upb_DefPool_New(void) { + upb_DefPool* s = upb_gmalloc(sizeof(*s)); + if (!s) return NULL; -bool upb_FieldDef_HasSubDef(const upb_FieldDef* f) { - return upb_FieldDef_IsSubMessage(f) || - upb_FieldDef_CType(f) == kUpb_CType_Enum; -} + s->arena = upb_Arena_New(); + s->bytes_loaded = 0; -bool upb_FieldDef_IsMap(const upb_FieldDef* f) { - return upb_FieldDef_IsRepeated(f) && upb_FieldDef_IsSubMessage(f) && - upb_MessageDef_IsMapEntry(upb_FieldDef_MessageSubDef(f)); -} + s->scratch_size = 240; + s->scratch_data = upb_gmalloc(s->scratch_size); + if (!s->scratch_data) goto err; -bool upb_FieldDef_IsOptional(const upb_FieldDef* f) { - return upb_FieldDef_Label(f) == kUpb_Label_Optional; -} + if (!upb_strtable_init(&s->syms, 32, s->arena)) goto err; + if (!upb_strtable_init(&s->files, 4, s->arena)) goto err; + if (!upb_inttable_init(&s->exts, s->arena)) goto err; -bool upb_FieldDef_IsPrimitive(const upb_FieldDef* f) { - return !upb_FieldDef_IsString(f) && !upb_FieldDef_IsSubMessage(f); -} + s->extreg = upb_ExtensionRegistry_New(s->arena); + if (!s->extreg) goto err; -bool upb_FieldDef_IsRepeated(const upb_FieldDef* f) { - return upb_FieldDef_Label(f) == kUpb_Label_Repeated; -} + s->platform = kUpb_MiniTablePlatform_Native; -bool upb_FieldDef_IsRequired(const upb_FieldDef* f) { - return upb_FieldDef_Label(f) == kUpb_Label_Required; -} + return s; -bool upb_FieldDef_IsString(const upb_FieldDef* f) { - return upb_FieldDef_CType(f) == kUpb_CType_String || - upb_FieldDef_CType(f) == kUpb_CType_Bytes; +err: + upb_DefPool_Free(s); + return NULL; } -bool upb_FieldDef_IsSubMessage(const upb_FieldDef* f) { - return upb_FieldDef_CType(f) == kUpb_CType_Message; -} +bool _upb_DefPool_InsertExt(upb_DefPool* s, const upb_MiniTableExtension* ext, + const upb_FieldDef* f) { + return upb_inttable_insert(&s->exts, (uintptr_t)ext, upb_value_constptr(f), + s->arena); +} -static bool between(int32_t x, int32_t low, int32_t high) { - return x >= low && x <= high; +bool _upb_DefPool_InsertSym(upb_DefPool* s, upb_StringView sym, upb_value v, + upb_Status* status) { + // TODO: table should support an operation "tryinsert" to avoid the double + // lookup. + if (upb_strtable_lookup2(&s->syms, sym.data, sym.size, NULL)) { + upb_Status_SetErrorFormat(status, "duplicate symbol '%s'", sym.data); + return false; + } + if (!upb_strtable_insert(&s->syms, sym.data, sym.size, v, s->arena)) { + upb_Status_SetErrorMessage(status, "out of memory"); + return false; + } + return true; } -bool upb_FieldDef_checklabel(int32_t label) { return between(label, 1, 3); } -bool upb_FieldDef_checktype(int32_t type) { return between(type, 1, 11); } -bool upb_FieldDef_checkintfmt(int32_t fmt) { return between(fmt, 1, 3); } +static const void* _upb_DefPool_Unpack(const upb_DefPool* s, const char* sym, + size_t size, upb_deftype_t type) { + upb_value v; + return upb_strtable_lookup2(&s->syms, sym, size, &v) + ? _upb_DefType_Unpack(v, type) + : NULL; +} -bool upb_FieldDef_checkdescriptortype(int32_t type) { - return between(type, 1, 18); +bool _upb_DefPool_LookupSym(const upb_DefPool* s, const char* sym, size_t size, + upb_value* v) { + return upb_strtable_lookup2(&s->syms, sym, size, v); } -static bool streql2(const char* a, size_t n, const char* b) { - return n == strlen(b) && memcmp(a, b, n) == 0; +upb_ExtensionRegistry* _upb_DefPool_ExtReg(const upb_DefPool* s) { + return s->extreg; } -// Implement the transformation as described in the spec: -// 1. upper case all letters after an underscore. -// 2. remove all underscores. -static char* make_json_name(const char* name, size_t size, upb_Arena* a) { - char* out = upb_Arena_Malloc(a, size + 1); // +1 is to add a trailing '\0' - if (out == NULL) return NULL; +void** _upb_DefPool_ScratchData(const upb_DefPool* s) { + return (void**)&s->scratch_data; +} - bool ucase_next = false; - char* des = out; - for (size_t i = 0; i < size; i++) { - if (name[i] == '_') { - ucase_next = true; - } else { - *des++ = ucase_next ? toupper(name[i]) : name[i]; - ucase_next = false; - } - } - *des++ = '\0'; - return out; +size_t* _upb_DefPool_ScratchSize(const upb_DefPool* s) { + return (size_t*)&s->scratch_size; } -static str_t* newstr(upb_DefBuilder* ctx, const char* data, size_t len) { - str_t* ret = _upb_DefBuilder_Alloc(ctx, sizeof(*ret) + len); - if (!ret) _upb_DefBuilder_OomErr(ctx); - ret->len = len; - if (len) memcpy(ret->str, data, len); - ret->str[len] = '\0'; - return ret; +void _upb_DefPool_SetPlatform(upb_DefPool* s, upb_MiniTablePlatform platform) { + assert(upb_strtable_count(&s->files) == 0); + s->platform = platform; } -static str_t* unescape(upb_DefBuilder* ctx, const upb_FieldDef* f, - const char* data, size_t len) { - // Size here is an upper bound; escape sequences could ultimately shrink it. - str_t* ret = _upb_DefBuilder_Alloc(ctx, sizeof(*ret) + len); - char* dst = &ret->str[0]; - const char* src = data; - const char* end = data + len; +const upb_MessageDef* upb_DefPool_FindMessageByName(const upb_DefPool* s, + const char* sym) { + return _upb_DefPool_Unpack(s, sym, strlen(sym), UPB_DEFTYPE_MSG); +} - while (src < end) { - if (*src == '\\') { - src++; - *dst++ = _upb_DefBuilder_ParseEscape(ctx, f, &src, end); - } else { - *dst++ = *src++; - } - } +const upb_MessageDef* upb_DefPool_FindMessageByNameWithSize( + const upb_DefPool* s, const char* sym, size_t len) { + return _upb_DefPool_Unpack(s, sym, len, UPB_DEFTYPE_MSG); +} - ret->len = dst - &ret->str[0]; - return ret; +const upb_EnumDef* upb_DefPool_FindEnumByName(const upb_DefPool* s, + const char* sym) { + return _upb_DefPool_Unpack(s, sym, strlen(sym), UPB_DEFTYPE_ENUM); } -static void parse_default(upb_DefBuilder* ctx, const char* str, size_t len, - upb_FieldDef* f) { - char* end; - char nullz[64]; - errno = 0; +const upb_EnumValueDef* upb_DefPool_FindEnumByNameval(const upb_DefPool* s, + const char* sym) { + return _upb_DefPool_Unpack(s, sym, strlen(sym), UPB_DEFTYPE_ENUMVAL); +} - switch (upb_FieldDef_CType(f)) { - case kUpb_CType_Int32: - case kUpb_CType_Int64: - case kUpb_CType_UInt32: - case kUpb_CType_UInt64: - case kUpb_CType_Double: - case kUpb_CType_Float: - // Standard C number parsing functions expect null-terminated strings. - if (len >= sizeof(nullz) - 1) { - _upb_DefBuilder_Errf(ctx, "Default too long: %.*s", (int)len, str); - } - memcpy(nullz, str, len); - nullz[len] = '\0'; - str = nullz; - break; +const upb_FileDef* upb_DefPool_FindFileByName(const upb_DefPool* s, + const char* name) { + upb_value v; + return upb_strtable_lookup(&s->files, name, &v) ? upb_value_getconstptr(v) + : NULL; +} + +const upb_FileDef* upb_DefPool_FindFileByNameWithSize(const upb_DefPool* s, + const char* name, + size_t len) { + upb_value v; + return upb_strtable_lookup2(&s->files, name, len, &v) + ? upb_value_getconstptr(v) + : NULL; +} + +const upb_FieldDef* upb_DefPool_FindExtensionByNameWithSize( + const upb_DefPool* s, const char* name, size_t size) { + upb_value v; + if (!upb_strtable_lookup2(&s->syms, name, size, &v)) return NULL; + + switch (_upb_DefType_Type(v)) { + case UPB_DEFTYPE_FIELD: + return _upb_DefType_Unpack(v, UPB_DEFTYPE_FIELD); + case UPB_DEFTYPE_MSG: { + const upb_MessageDef* m = _upb_DefType_Unpack(v, UPB_DEFTYPE_MSG); + return _upb_MessageDef_InMessageSet(m) + ? upb_MessageDef_NestedExtension(m, 0) + : NULL; + } default: break; } - switch (upb_FieldDef_CType(f)) { - case kUpb_CType_Int32: { - long val = strtol(str, &end, 0); - if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || *end) { - goto invalid; + return NULL; +} + +const upb_FieldDef* upb_DefPool_FindExtensionByName(const upb_DefPool* s, + const char* sym) { + return upb_DefPool_FindExtensionByNameWithSize(s, sym, strlen(sym)); +} + +const upb_ServiceDef* upb_DefPool_FindServiceByName(const upb_DefPool* s, + const char* name) { + return _upb_DefPool_Unpack(s, name, strlen(name), UPB_DEFTYPE_SERVICE); +} + +const upb_ServiceDef* upb_DefPool_FindServiceByNameWithSize( + const upb_DefPool* s, const char* name, size_t size) { + return _upb_DefPool_Unpack(s, name, size, UPB_DEFTYPE_SERVICE); +} + +const upb_FileDef* upb_DefPool_FindFileContainingSymbol(const upb_DefPool* s, + const char* name) { + upb_value v; + // TODO(haberman): non-extension fields and oneofs. + if (upb_strtable_lookup(&s->syms, name, &v)) { + switch (_upb_DefType_Type(v)) { + case UPB_DEFTYPE_EXT: { + const upb_FieldDef* f = _upb_DefType_Unpack(v, UPB_DEFTYPE_EXT); + return upb_FieldDef_File(f); } - f->defaultval.sint = val; - break; - } - case kUpb_CType_Enum: { - const upb_EnumDef* e = f->sub.enumdef; - const upb_EnumValueDef* ev = - upb_EnumDef_FindValueByNameWithSize(e, str, len); - if (!ev) { - goto invalid; + case UPB_DEFTYPE_MSG: { + const upb_MessageDef* m = _upb_DefType_Unpack(v, UPB_DEFTYPE_MSG); + return upb_MessageDef_File(m); } - f->defaultval.sint = upb_EnumValueDef_Number(ev); - break; - } - case kUpb_CType_Int64: { - long long val = strtoll(str, &end, 0); - if (val > INT64_MAX || val < INT64_MIN || errno == ERANGE || *end) { - goto invalid; + case UPB_DEFTYPE_ENUM: { + const upb_EnumDef* e = _upb_DefType_Unpack(v, UPB_DEFTYPE_ENUM); + return upb_EnumDef_File(e); } - f->defaultval.sint = val; - break; - } - case kUpb_CType_UInt32: { - unsigned long val = strtoul(str, &end, 0); - if (val > UINT32_MAX || errno == ERANGE || *end) { - goto invalid; + case UPB_DEFTYPE_ENUMVAL: { + const upb_EnumValueDef* ev = + _upb_DefType_Unpack(v, UPB_DEFTYPE_ENUMVAL); + return upb_EnumDef_File(upb_EnumValueDef_Enum(ev)); } - f->defaultval.uint = val; - break; - } - case kUpb_CType_UInt64: { - unsigned long long val = strtoull(str, &end, 0); - if (val > UINT64_MAX || errno == ERANGE || *end) { - goto invalid; + case UPB_DEFTYPE_SERVICE: { + const upb_ServiceDef* service = + _upb_DefType_Unpack(v, UPB_DEFTYPE_SERVICE); + return upb_ServiceDef_File(service); } - f->defaultval.uint = val; - break; + default: + UPB_UNREACHABLE(); } - case kUpb_CType_Double: { - double val = strtod(str, &end); - if (errno == ERANGE || *end) { - goto invalid; + } + + const char* last_dot = strrchr(name, '.'); + if (last_dot) { + const upb_MessageDef* parent = + upb_DefPool_FindMessageByNameWithSize(s, name, last_dot - name); + if (parent) { + const char* shortname = last_dot + 1; + if (upb_MessageDef_FindByNameWithSize(parent, shortname, + strlen(shortname), NULL, NULL)) { + return upb_MessageDef_File(parent); } - f->defaultval.dbl = val; - break; } - case kUpb_CType_Float: { - float val = strtof(str, &end); - if (errno == ERANGE || *end) { - goto invalid; - } - f->defaultval.flt = val; - break; - } - case kUpb_CType_Bool: { - if (streql2(str, len, "false")) { - f->defaultval.boolean = false; - } else if (streql2(str, len, "true")) { - f->defaultval.boolean = true; - } else { - goto invalid; - } - break; - } - case kUpb_CType_String: - f->defaultval.str = newstr(ctx, str, len); - break; - case kUpb_CType_Bytes: - f->defaultval.str = unescape(ctx, f, str, len); - break; - case kUpb_CType_Message: - /* Should not have a default value. */ - _upb_DefBuilder_Errf(ctx, "Message should not have a default (%s)", - upb_FieldDef_FullName(f)); } - return; - -invalid: - _upb_DefBuilder_Errf(ctx, "Invalid default '%.*s' for field %s of type %d", - (int)len, str, upb_FieldDef_FullName(f), - (int)upb_FieldDef_Type(f)); + return NULL; } -static void set_default_default(upb_DefBuilder* ctx, upb_FieldDef* f) { - switch (upb_FieldDef_CType(f)) { - case kUpb_CType_Int32: - case kUpb_CType_Int64: - f->defaultval.sint = 0; - break; - case kUpb_CType_UInt64: - case kUpb_CType_UInt32: - f->defaultval.uint = 0; - break; - case kUpb_CType_Double: - case kUpb_CType_Float: - f->defaultval.dbl = 0; - break; - case kUpb_CType_String: - case kUpb_CType_Bytes: - f->defaultval.str = newstr(ctx, NULL, 0); - break; - case kUpb_CType_Bool: - f->defaultval.boolean = false; - break; - case kUpb_CType_Enum: { - const upb_EnumValueDef* v = upb_EnumDef_Value(f->sub.enumdef, 0); - f->defaultval.sint = upb_EnumValueDef_Number(v); - break; +static void remove_filedef(upb_DefPool* s, upb_FileDef* file) { + intptr_t iter = UPB_INTTABLE_BEGIN; + upb_StringView key; + upb_value val; + while (upb_strtable_next2(&s->syms, &key, &val, &iter)) { + const upb_FileDef* f; + switch (_upb_DefType_Type(val)) { + case UPB_DEFTYPE_EXT: + f = upb_FieldDef_File(_upb_DefType_Unpack(val, UPB_DEFTYPE_EXT)); + break; + case UPB_DEFTYPE_MSG: + f = upb_MessageDef_File(_upb_DefType_Unpack(val, UPB_DEFTYPE_MSG)); + break; + case UPB_DEFTYPE_ENUM: + f = upb_EnumDef_File(_upb_DefType_Unpack(val, UPB_DEFTYPE_ENUM)); + break; + case UPB_DEFTYPE_ENUMVAL: + f = upb_EnumDef_File(upb_EnumValueDef_Enum( + _upb_DefType_Unpack(val, UPB_DEFTYPE_ENUMVAL))); + break; + case UPB_DEFTYPE_SERVICE: + f = upb_ServiceDef_File(_upb_DefType_Unpack(val, UPB_DEFTYPE_SERVICE)); + break; + default: + UPB_UNREACHABLE(); } - case kUpb_CType_Message: - break; - } -} - -static void _upb_FieldDef_Create(upb_DefBuilder* ctx, const char* prefix, - const UPB_DESC(FieldDescriptorProto) * - field_proto, - upb_MessageDef* m, upb_FieldDef* f) { - // Must happen before _upb_DefBuilder_Add() - f->file = _upb_DefBuilder_File(ctx); - if (!UPB_DESC(FieldDescriptorProto_has_name)(field_proto)) { - _upb_DefBuilder_Errf(ctx, "field has no name"); + if (f == file) upb_strtable_removeiter(&s->syms, &iter); } +} - const upb_StringView name = UPB_DESC(FieldDescriptorProto_name)(field_proto); - - f->full_name = _upb_DefBuilder_MakeFullName(ctx, prefix, name); - f->label_ = (int)UPB_DESC(FieldDescriptorProto_label)(field_proto); - f->number_ = UPB_DESC(FieldDescriptorProto_number)(field_proto); - f->is_proto3_optional = - UPB_DESC(FieldDescriptorProto_proto3_optional)(field_proto); - f->msgdef = m; - f->scope.oneof = NULL; - - f->has_json_name = UPB_DESC(FieldDescriptorProto_has_json_name)(field_proto); - if (f->has_json_name) { - const upb_StringView sv = - UPB_DESC(FieldDescriptorProto_json_name)(field_proto); - f->json_name = upb_strdup2(sv.data, sv.size, ctx->arena); +static const upb_FileDef* upb_DefBuilder_AddFileToPool( + upb_DefBuilder* const builder, upb_DefPool* const s, + const UPB_DESC(FileDescriptorProto) * const file_proto, + const upb_StringView name, upb_Status* const status) { + if (UPB_SETJMP(builder->err) != 0) { + UPB_ASSERT(!upb_Status_IsOk(status)); + if (builder->file) { + remove_filedef(s, builder->file); + builder->file = NULL; + } + } else if (!builder->arena || !builder->tmp_arena) { + _upb_DefBuilder_OomErr(builder); } else { - f->json_name = make_json_name(name.data, name.size, ctx->arena); + _upb_FileDef_Create(builder, file_proto); + upb_strtable_insert(&s->files, name.data, name.size, + upb_value_constptr(builder->file), builder->arena); + UPB_ASSERT(upb_Status_IsOk(status)); + upb_Arena_Fuse(s->arena, builder->arena); } - if (!f->json_name) _upb_DefBuilder_OomErr(ctx); - const bool has_type = UPB_DESC(FieldDescriptorProto_has_type)(field_proto); - const bool has_type_name = - UPB_DESC(FieldDescriptorProto_has_type_name)(field_proto); + if (builder->arena) upb_Arena_Free(builder->arena); + if (builder->tmp_arena) upb_Arena_Free(builder->tmp_arena); + return builder->file; +} - f->type_ = (int)UPB_DESC(FieldDescriptorProto_type)(field_proto); +static const upb_FileDef* _upb_DefPool_AddFile( + upb_DefPool* s, const UPB_DESC(FileDescriptorProto) * file_proto, + const upb_MiniTableFile* layout, upb_Status* status) { + const upb_StringView name = UPB_DESC(FileDescriptorProto_name)(file_proto); - if (has_type) { - switch (f->type_) { - case kUpb_FieldType_Message: - case kUpb_FieldType_Group: - case kUpb_FieldType_Enum: - if (!has_type_name) { - _upb_DefBuilder_Errf(ctx, "field of type %d requires type name (%s)", - (int)f->type_, f->full_name); - } - break; - default: - if (has_type_name) { - _upb_DefBuilder_Errf( - ctx, "invalid type for field with type_name set (%s, %d)", - f->full_name, (int)f->type_); - } + // Determine whether we already know about this file. + { + upb_value v; + if (upb_strtable_lookup2(&s->files, name.data, name.size, &v)) { + upb_Status_SetErrorFormat(status, + "duplicate file name " UPB_STRINGVIEW_FORMAT, + UPB_STRINGVIEW_ARGS(name)); + return NULL; } } - if (!has_type && has_type_name) { - f->type_ = - UPB_FIELD_TYPE_UNSPECIFIED; // We'll assign this in resolve_subdef() - } else { - if (f->type_ < kUpb_FieldType_Double || f->type_ > kUpb_FieldType_SInt64) { - _upb_DefBuilder_Errf(ctx, "invalid type for field %s (%d)", f->full_name, - f->type_); - } - } + upb_DefBuilder ctx = { + .symtab = s, + .layout = layout, + .platform = s->platform, + .msg_count = 0, + .enum_count = 0, + .ext_count = 0, + .status = status, + .file = NULL, + .arena = upb_Arena_New(), + .tmp_arena = upb_Arena_New(), + }; - if (f->label_ < kUpb_Label_Optional || f->label_ > kUpb_Label_Repeated) { - _upb_DefBuilder_Errf(ctx, "invalid label for field %s (%d)", f->full_name, - f->label_); - } + return upb_DefBuilder_AddFileToPool(&ctx, s, file_proto, name, status); +} - /* We can't resolve the subdef or (in the case of extensions) the containing - * message yet, because it may not have been defined yet. We stash a pointer - * to the field_proto until later when we can properly resolve it. */ - f->sub.unresolved = field_proto; +const upb_FileDef* upb_DefPool_AddFile(upb_DefPool* s, + const UPB_DESC(FileDescriptorProto) * + file_proto, + upb_Status* status) { + return _upb_DefPool_AddFile(s, file_proto, NULL, status); +} - if (f->label_ == kUpb_Label_Required && - upb_FileDef_Syntax(f->file) == kUpb_Syntax_Proto3) { - _upb_DefBuilder_Errf(ctx, "proto3 fields cannot be required (%s)", - f->full_name); - } +bool _upb_DefPool_LoadDefInitEx(upb_DefPool* s, const _upb_DefPool_Init* init, + bool rebuild_minitable) { + /* Since this function should never fail (it would indicate a bug in upb) we + * print errors to stderr instead of returning error status to the user. */ + _upb_DefPool_Init** deps = init->deps; + UPB_DESC(FileDescriptorProto) * file; + upb_Arena* arena; + upb_Status status; - if (UPB_DESC(FieldDescriptorProto_has_oneof_index)(field_proto)) { - uint32_t oneof_index = - UPB_DESC(FieldDescriptorProto_oneof_index)(field_proto); + upb_Status_Clear(&status); - if (upb_FieldDef_Label(f) != kUpb_Label_Optional) { - _upb_DefBuilder_Errf(ctx, "fields in oneof must have OPTIONAL label (%s)", - f->full_name); - } + if (upb_DefPool_FindFileByName(s, init->filename)) { + return true; + } - if (!m) { - _upb_DefBuilder_Errf(ctx, "oneof field (%s) has no containing msg", - f->full_name); - } + arena = upb_Arena_New(); - if (oneof_index >= upb_MessageDef_OneofCount(m)) { - _upb_DefBuilder_Errf(ctx, "oneof_index out of range (%s)", f->full_name); - } + for (; *deps; deps++) { + if (!_upb_DefPool_LoadDefInitEx(s, *deps, rebuild_minitable)) goto err; + } - upb_OneofDef* oneof = (upb_OneofDef*)upb_MessageDef_Oneof(m, oneof_index); - f->scope.oneof = oneof; + file = UPB_DESC(FileDescriptorProto_parse_ex)( + init->descriptor.data, init->descriptor.size, NULL, + kUpb_DecodeOption_AliasString, arena); + s->bytes_loaded += init->descriptor.size; - _upb_OneofDef_Insert(ctx, oneof, f, name.data, name.size); + if (!file) { + upb_Status_SetErrorFormat( + &status, + "Failed to parse compiled-in descriptor for file '%s'. This should " + "never happen.", + init->filename); + goto err; } - UPB_DEF_SET_OPTIONS(f->opts, FieldDescriptorProto, FieldOptions, field_proto); + const upb_MiniTableFile* mt = rebuild_minitable ? NULL : init->layout; + if (!_upb_DefPool_AddFile(s, file, mt, &status)) { + goto err; + } - if (UPB_DESC(FieldOptions_has_packed)(f->opts)) { - f->is_packed = UPB_DESC(FieldOptions_packed)(f->opts); - } else { - // Repeated fields default to packed for proto3 only. - f->is_packed = has_type && upb_FieldDef_IsPrimitive(f) && - f->label_ == kUpb_Label_Repeated && - upb_FileDef_Syntax(f->file) == kUpb_Syntax_Proto3; - } + upb_Arena_Free(arena); + return true; - f->has_presence = - (!upb_FieldDef_IsRepeated(f)) && - (f->type_ == kUpb_FieldType_Message || f->type_ == kUpb_FieldType_Group || - upb_FieldDef_ContainingOneof(f) || - (upb_FileDef_Syntax(f->file) == kUpb_Syntax_Proto2)); +err: + fprintf(stderr, + "Error loading compiled-in descriptor for file '%s' (this should " + "never happen): %s\n", + init->filename, upb_Status_ErrorMessage(&status)); + upb_Arena_Free(arena); + return false; } -static void _upb_FieldDef_CreateExt(upb_DefBuilder* ctx, const char* prefix, - const UPB_DESC(FieldDescriptorProto) * - field_proto, - upb_MessageDef* m, upb_FieldDef* f) { - f->is_extension = true; - _upb_FieldDef_Create(ctx, prefix, field_proto, m, f); - - if (UPB_DESC(FieldDescriptorProto_has_oneof_index)(field_proto)) { - _upb_DefBuilder_Errf(ctx, "oneof_index provided for extension field (%s)", - f->full_name); - } - - f->scope.extension_scope = m; - _upb_DefBuilder_Add(ctx, f->full_name, _upb_DefType_Pack(f, UPB_DEFTYPE_EXT)); - f->layout_index = ctx->ext_count++; - - if (ctx->layout) { - UPB_ASSERT(_upb_FieldDef_ExtensionMiniTable(f)->field.number == f->number_); - } +size_t _upb_DefPool_BytesLoaded(const upb_DefPool* s) { + return s->bytes_loaded; } -static void _upb_FieldDef_CreateNotExt(upb_DefBuilder* ctx, const char* prefix, - const UPB_DESC(FieldDescriptorProto) * - field_proto, - upb_MessageDef* m, upb_FieldDef* f) { - f->is_extension = false; - _upb_FieldDef_Create(ctx, prefix, field_proto, m, f); - - if (!UPB_DESC(FieldDescriptorProto_has_oneof_index)(field_proto)) { - if (f->is_proto3_optional) { - _upb_DefBuilder_Errf( - ctx, - "non-extension field (%s) with proto3_optional was not in a oneof", - f->full_name); - } - } +upb_Arena* _upb_DefPool_Arena(const upb_DefPool* s) { return s->arena; } - _upb_MessageDef_InsertField(ctx, m, f); +const upb_FieldDef* upb_DefPool_FindExtensionByMiniTable( + const upb_DefPool* s, const upb_MiniTableExtension* ext) { + upb_value v; + bool ok = upb_inttable_lookup(&s->exts, (uintptr_t)ext, &v); + UPB_ASSERT(ok); + return upb_value_getconstptr(v); } -upb_FieldDef* _upb_Extensions_New( - upb_DefBuilder* ctx, int n, - const UPB_DESC(FieldDescriptorProto) * const* protos, const char* prefix, - upb_MessageDef* m) { - _upb_DefType_CheckPadding(sizeof(upb_FieldDef)); - upb_FieldDef* defs = - (upb_FieldDef*)_upb_DefBuilder_Alloc(ctx, sizeof(upb_FieldDef) * n); +const upb_FieldDef* upb_DefPool_FindExtensionByNumber(const upb_DefPool* s, + const upb_MessageDef* m, + int32_t fieldnum) { + const upb_MiniTable* t = upb_MessageDef_MiniTable(m); + const upb_MiniTableExtension* ext = + upb_ExtensionRegistry_Lookup(s->extreg, t, fieldnum); + return ext ? upb_DefPool_FindExtensionByMiniTable(s, ext) : NULL; +} - for (int i = 0; i < n; i++) { - upb_FieldDef* f = &defs[i]; +const upb_ExtensionRegistry* upb_DefPool_ExtensionRegistry( + const upb_DefPool* s) { + return s->extreg; +} - _upb_FieldDef_CreateExt(ctx, prefix, protos[i], m, f); - f->index_ = i; +const upb_FieldDef** upb_DefPool_GetAllExtensions(const upb_DefPool* s, + const upb_MessageDef* m, + size_t* count) { + size_t n = 0; + intptr_t iter = UPB_INTTABLE_BEGIN; + uintptr_t key; + upb_value val; + // This is O(all exts) instead of O(exts for m). If we need this to be + // efficient we may need to make extreg into a two-level table, or have a + // second per-message index. + while (upb_inttable_next(&s->exts, &key, &val, &iter)) { + const upb_FieldDef* f = upb_value_getconstptr(val); + if (upb_FieldDef_ContainingType(f) == m) n++; } - - return defs; + const upb_FieldDef** exts = malloc(n * sizeof(*exts)); + iter = UPB_INTTABLE_BEGIN; + size_t i = 0; + while (upb_inttable_next(&s->exts, &key, &val, &iter)) { + const upb_FieldDef* f = upb_value_getconstptr(val); + if (upb_FieldDef_ContainingType(f) == m) exts[i++] = f; + } + *count = n; + return exts; } -upb_FieldDef* _upb_FieldDefs_New( - upb_DefBuilder* ctx, int n, - const UPB_DESC(FieldDescriptorProto) * const* protos, const char* prefix, - upb_MessageDef* m, bool* is_sorted) { - _upb_DefType_CheckPadding(sizeof(upb_FieldDef)); - upb_FieldDef* defs = - (upb_FieldDef*)_upb_DefBuilder_Alloc(ctx, sizeof(upb_FieldDef) * n); - - uint32_t previous = 0; - for (int i = 0; i < n; i++) { - upb_FieldDef* f = &defs[i]; +bool _upb_DefPool_LoadDefInit(upb_DefPool* s, const _upb_DefPool_Init* init) { + return _upb_DefPool_LoadDefInitEx(s, init, false); +} - _upb_FieldDef_CreateNotExt(ctx, prefix, protos[i], m, f); - f->index_ = i; - if (!ctx->layout) { - // Speculate that the def fields are sorted. We will always sort the - // MiniTable fields, so if defs are sorted then indices will match. - // - // If this is incorrect, we will overwrite later. - f->layout_index = i; - } - const uint32_t current = f->number_; - if (previous > current) *is_sorted = false; - previous = current; - } +// Must be last. - return defs; +upb_deftype_t _upb_DefType_Type(upb_value v) { + const uintptr_t num = (uintptr_t)upb_value_getconstptr(v); + return num & UPB_DEFTYPE_MASK; } -static void resolve_subdef(upb_DefBuilder* ctx, const char* prefix, - upb_FieldDef* f) { - const UPB_DESC(FieldDescriptorProto)* field_proto = f->sub.unresolved; - upb_StringView name = UPB_DESC(FieldDescriptorProto_type_name)(field_proto); - bool has_name = UPB_DESC(FieldDescriptorProto_has_type_name)(field_proto); - switch ((int)f->type_) { - case UPB_FIELD_TYPE_UNSPECIFIED: { - // Type was not specified and must be inferred. - UPB_ASSERT(has_name); - upb_deftype_t type; - const void* def = - _upb_DefBuilder_ResolveAny(ctx, f->full_name, prefix, name, &type); - switch (type) { - case UPB_DEFTYPE_ENUM: - f->sub.enumdef = def; - f->type_ = kUpb_FieldType_Enum; - if (!UPB_DESC(FieldOptions_has_packed)(f->opts)) { - f->is_packed = f->label_ == kUpb_Label_Repeated && - upb_FileDef_Syntax(f->file) == kUpb_Syntax_Proto3; - } - break; - case UPB_DEFTYPE_MSG: - f->sub.msgdef = def; - f->type_ = kUpb_FieldType_Message; // It appears there is no way of - // this being a group. - f->has_presence = !upb_FieldDef_IsRepeated(f); - break; - default: - _upb_DefBuilder_Errf(ctx, "Couldn't resolve type name for field %s", - f->full_name); - } - break; - } - case kUpb_FieldType_Message: - case kUpb_FieldType_Group: - UPB_ASSERT(has_name); - f->sub.msgdef = _upb_DefBuilder_Resolve(ctx, f->full_name, prefix, name, - UPB_DEFTYPE_MSG); - break; - case kUpb_FieldType_Enum: - UPB_ASSERT(has_name); - f->sub.enumdef = _upb_DefBuilder_Resolve(ctx, f->full_name, prefix, name, - UPB_DEFTYPE_ENUM); - break; - default: - // No resolution necessary. - break; - } +upb_value _upb_DefType_Pack(const void* ptr, upb_deftype_t type) { + uintptr_t num = (uintptr_t)ptr; + UPB_ASSERT((num & UPB_DEFTYPE_MASK) == 0); + num |= type; + return upb_value_constptr((const void*)num); } -static int _upb_FieldDef_Compare(const void* p1, const void* p2) { - const uint32_t v1 = (*(upb_FieldDef**)p1)->number_; - const uint32_t v2 = (*(upb_FieldDef**)p2)->number_; - return (v1 < v2) ? -1 : (v1 > v2); +const void* _upb_DefType_Unpack(upb_value v, upb_deftype_t type) { + uintptr_t num = (uintptr_t)upb_value_getconstptr(v); + return (num & UPB_DEFTYPE_MASK) == type + ? (const void*)(num & ~UPB_DEFTYPE_MASK) + : NULL; } -// _upb_FieldDefs_Sorted() is mostly a pure function of its inputs, but has one -// critical side effect that we depend on: it sets layout_index appropriately -// for non-sorted lists of fields. -const upb_FieldDef** _upb_FieldDefs_Sorted(const upb_FieldDef* f, int n, - upb_Arena* a) { - // TODO(salo): Replace this arena alloc with a persistent scratch buffer. - upb_FieldDef** out = (upb_FieldDef**)upb_Arena_Malloc(a, n * sizeof(void*)); - if (!out) return NULL; - for (int i = 0; i < n; i++) { - out[i] = (upb_FieldDef*)&f[i]; - } - qsort(out, n, sizeof(void*), _upb_FieldDef_Compare); +// Must be last. - for (int i = 0; i < n; i++) { - out[i]->layout_index = i; +bool _upb_DescState_Grow(upb_DescState* d, upb_Arena* a) { + const size_t oldbufsize = d->bufsize; + const int used = d->ptr - d->buf; + + if (!d->buf) { + d->buf = upb_Arena_Malloc(a, d->bufsize); + if (!d->buf) return false; + d->ptr = d->buf; + d->e.end = d->buf + d->bufsize; } - return (const upb_FieldDef**)out; -} -bool upb_FieldDef_MiniDescriptorEncode(const upb_FieldDef* f, upb_Arena* a, - upb_StringView* out) { - UPB_ASSERT(f->is_extension); + if (oldbufsize - used < kUpb_MtDataEncoder_MinSize) { + d->bufsize *= 2; + d->buf = upb_Arena_Realloc(a, d->buf, oldbufsize, d->bufsize); + if (!d->buf) return false; + d->ptr = d->buf + used; + d->e.end = d->buf + d->bufsize; + } - upb_DescState s; - _upb_DescState_Init(&s); + return true; +} - const int number = upb_FieldDef_Number(f); - const uint64_t modifiers = _upb_FieldDef_Modifiers(f); - if (!_upb_DescState_Grow(&s, a)) return false; - s.ptr = upb_MtDataEncoder_EncodeExtension(&s.e, s.ptr, f->type_, number, - modifiers); - *s.ptr = '\0'; - out->data = s.buf; - out->size = s.ptr - s.buf; - return true; -} +// Must be last. -static void resolve_extension(upb_DefBuilder* ctx, const char* prefix, - upb_FieldDef* f, - const UPB_DESC(FieldDescriptorProto) * - field_proto) { - if (!UPB_DESC(FieldDescriptorProto_has_extendee)(field_proto)) { - _upb_DefBuilder_Errf(ctx, "extension for field '%s' had no extendee", - f->full_name); - } - - upb_StringView name = UPB_DESC(FieldDescriptorProto_extendee)(field_proto); - const upb_MessageDef* m = - _upb_DefBuilder_Resolve(ctx, f->full_name, prefix, name, UPB_DEFTYPE_MSG); - f->msgdef = m; +struct upb_EnumDef { + const UPB_DESC(EnumOptions) * opts; + const upb_MiniTableEnum* layout; // Only for proto2. + const upb_FileDef* file; + const upb_MessageDef* containing_type; // Could be merged with "file". + const char* full_name; + upb_strtable ntoi; + upb_inttable iton; + const upb_EnumValueDef* values; + const upb_EnumReservedRange* res_ranges; + const upb_StringView* res_names; + int value_count; + int res_range_count; + int res_name_count; + int32_t defaultval; + bool is_closed; + bool is_sorted; // Whether all of the values are defined in ascending order. +}; - if (!_upb_MessageDef_IsValidExtensionNumber(m, f->number_)) { - _upb_DefBuilder_Errf( - ctx, - "field number %u in extension %s has no extension range in message %s", - (unsigned)f->number_, f->full_name, upb_MessageDef_FullName(m)); - } +upb_EnumDef* _upb_EnumDef_At(const upb_EnumDef* e, int i) { + return (upb_EnumDef*)&e[i]; } -void _upb_FieldDef_BuildMiniTableExtension(upb_DefBuilder* ctx, - const upb_FieldDef* f) { - const upb_MiniTableExtension* ext = _upb_FieldDef_ExtensionMiniTable(f); - - if (ctx->layout) { - UPB_ASSERT(upb_FieldDef_Number(f) == ext->field.number); - } else { - upb_StringView desc; - if (!upb_FieldDef_MiniDescriptorEncode(f, ctx->tmp_arena, &desc)) { - _upb_DefBuilder_OomErr(ctx); - } - - upb_MiniTableExtension* mut_ext = (upb_MiniTableExtension*)ext; - upb_MiniTableSub sub = {NULL}; - if (upb_FieldDef_IsSubMessage(f)) { - sub.submsg = upb_MessageDef_MiniTable(f->sub.msgdef); - } else if (_upb_FieldDef_IsClosedEnum(f)) { - sub.subenum = _upb_EnumDef_MiniTable(f->sub.enumdef); - } - bool ok2 = upb_MiniTableExtension_Init(desc.data, desc.size, mut_ext, - upb_MessageDef_MiniTable(f->msgdef), - sub, ctx->status); - if (!ok2) _upb_DefBuilder_Errf(ctx, "Could not build extension mini table"); - } - - bool ok = _upb_DefPool_InsertExt(ctx->symtab, ext, f); - if (!ok) _upb_DefBuilder_OomErr(ctx); +const upb_MiniTableEnum* _upb_EnumDef_MiniTable(const upb_EnumDef* e) { + return e->layout; } -static void resolve_default(upb_DefBuilder* ctx, upb_FieldDef* f, - const UPB_DESC(FieldDescriptorProto) * - field_proto) { - // Have to delay resolving of the default value until now because of the enum - // case, since enum defaults are specified with a label. - if (UPB_DESC(FieldDescriptorProto_has_default_value)(field_proto)) { - upb_StringView defaultval = - UPB_DESC(FieldDescriptorProto_default_value)(field_proto); - - if (upb_FileDef_Syntax(f->file) == kUpb_Syntax_Proto3) { - _upb_DefBuilder_Errf(ctx, - "proto3 fields cannot have explicit defaults (%s)", - f->full_name); - } - - if (upb_FieldDef_IsSubMessage(f)) { - _upb_DefBuilder_Errf(ctx, - "message fields cannot have explicit defaults (%s)", - f->full_name); - } +bool _upb_EnumDef_Insert(upb_EnumDef* e, upb_EnumValueDef* v, upb_Arena* a) { + const char* name = upb_EnumValueDef_Name(v); + const upb_value val = upb_value_constptr(v); + bool ok = upb_strtable_insert(&e->ntoi, name, strlen(name), val, a); + if (!ok) return false; - parse_default(ctx, defaultval.data, defaultval.size, f); - f->has_default = true; - } else { - set_default_default(ctx, f); - f->has_default = false; + // Multiple enumerators can have the same number, first one wins. + const int number = upb_EnumValueDef_Number(v); + if (!upb_inttable_lookup(&e->iton, number, NULL)) { + return upb_inttable_insert(&e->iton, number, val, a); } + return true; } -void _upb_FieldDef_Resolve(upb_DefBuilder* ctx, const char* prefix, - upb_FieldDef* f) { - // We have to stash this away since resolve_subdef() may overwrite it. - const UPB_DESC(FieldDescriptorProto)* field_proto = f->sub.unresolved; - - resolve_subdef(ctx, prefix, f); - resolve_default(ctx, f, field_proto); - - if (f->is_extension) { - resolve_extension(ctx, prefix, f, field_proto); - } +const UPB_DESC(EnumOptions) * upb_EnumDef_Options(const upb_EnumDef* e) { + return e->opts; } +bool upb_EnumDef_HasOptions(const upb_EnumDef* e) { + return e->opts != (void*)kUpbDefOptDefault; +} -// Must be last. - -struct upb_FileDef { - const UPB_DESC(FileOptions) * opts; - const char* name; - const char* package; - const char* edition; +const char* upb_EnumDef_FullName(const upb_EnumDef* e) { return e->full_name; } - const upb_FileDef** deps; - const int32_t* public_deps; - const int32_t* weak_deps; - const upb_MessageDef* top_lvl_msgs; - const upb_EnumDef* top_lvl_enums; - const upb_FieldDef* top_lvl_exts; - const upb_ServiceDef* services; - const upb_MiniTableExtension** ext_layouts; - const upb_DefPool* symtab; +const char* upb_EnumDef_Name(const upb_EnumDef* e) { + return _upb_DefBuilder_FullToShort(e->full_name); +} - int dep_count; - int public_dep_count; - int weak_dep_count; - int top_lvl_msg_count; - int top_lvl_enum_count; - int top_lvl_ext_count; - int service_count; - int ext_count; // All exts in the file. - upb_Syntax syntax; -}; +const upb_FileDef* upb_EnumDef_File(const upb_EnumDef* e) { return e->file; } -const UPB_DESC(FileOptions) * upb_FileDef_Options(const upb_FileDef* f) { - return f->opts; +const upb_MessageDef* upb_EnumDef_ContainingType(const upb_EnumDef* e) { + return e->containing_type; } -bool upb_FileDef_HasOptions(const upb_FileDef* f) { - return f->opts != (void*)kUpbDefOptDefault; +int32_t upb_EnumDef_Default(const upb_EnumDef* e) { + UPB_ASSERT(upb_EnumDef_FindValueByNumber(e, e->defaultval)); + return e->defaultval; } -const char* upb_FileDef_Name(const upb_FileDef* f) { return f->name; } - -const char* upb_FileDef_Package(const upb_FileDef* f) { - return f->package ? f->package : ""; +int upb_EnumDef_ReservedRangeCount(const upb_EnumDef* e) { + return e->res_range_count; } -const char* upb_FileDef_Edition(const upb_FileDef* f) { - return f->edition ? f->edition : ""; +const upb_EnumReservedRange* upb_EnumDef_ReservedRange(const upb_EnumDef* e, + int i) { + UPB_ASSERT(0 <= i && i < e->res_range_count); + return _upb_EnumReservedRange_At(e->res_ranges, i); } -const char* _upb_FileDef_RawPackage(const upb_FileDef* f) { return f->package; } - -upb_Syntax upb_FileDef_Syntax(const upb_FileDef* f) { return f->syntax; } +int upb_EnumDef_ReservedNameCount(const upb_EnumDef* e) { + return e->res_name_count; +} -int upb_FileDef_TopLevelMessageCount(const upb_FileDef* f) { - return f->top_lvl_msg_count; +upb_StringView upb_EnumDef_ReservedName(const upb_EnumDef* e, int i) { + UPB_ASSERT(0 <= i && i < e->res_name_count); + return e->res_names[i]; } -int upb_FileDef_DependencyCount(const upb_FileDef* f) { return f->dep_count; } +int upb_EnumDef_ValueCount(const upb_EnumDef* e) { return e->value_count; } -int upb_FileDef_PublicDependencyCount(const upb_FileDef* f) { - return f->public_dep_count; +const upb_EnumValueDef* upb_EnumDef_FindValueByName(const upb_EnumDef* e, + const char* name) { + return upb_EnumDef_FindValueByNameWithSize(e, name, strlen(name)); } -int upb_FileDef_WeakDependencyCount(const upb_FileDef* f) { - return f->weak_dep_count; +const upb_EnumValueDef* upb_EnumDef_FindValueByNameWithSize( + const upb_EnumDef* e, const char* name, size_t size) { + upb_value v; + return upb_strtable_lookup2(&e->ntoi, name, size, &v) + ? upb_value_getconstptr(v) + : NULL; } -const int32_t* _upb_FileDef_PublicDependencyIndexes(const upb_FileDef* f) { - return f->public_deps; +const upb_EnumValueDef* upb_EnumDef_FindValueByNumber(const upb_EnumDef* e, + int32_t num) { + upb_value v; + return upb_inttable_lookup(&e->iton, num, &v) ? upb_value_getconstptr(v) + : NULL; } -const int32_t* _upb_FileDef_WeakDependencyIndexes(const upb_FileDef* f) { - return f->weak_deps; +bool upb_EnumDef_CheckNumber(const upb_EnumDef* e, int32_t num) { + // We could use upb_EnumDef_FindValueByNumber(e, num) != NULL, but we expect + // this to be faster (especially for small numbers). + return upb_MiniTableEnum_CheckValue(e->layout, num); } -int upb_FileDef_TopLevelEnumCount(const upb_FileDef* f) { - return f->top_lvl_enum_count; +const upb_EnumValueDef* upb_EnumDef_Value(const upb_EnumDef* e, int i) { + UPB_ASSERT(0 <= i && i < e->value_count); + return _upb_EnumValueDef_At(e->values, i); } -int upb_FileDef_TopLevelExtensionCount(const upb_FileDef* f) { - return f->top_lvl_ext_count; -} +bool upb_EnumDef_IsClosed(const upb_EnumDef* e) { return e->is_closed; } -int upb_FileDef_ServiceCount(const upb_FileDef* f) { return f->service_count; } +bool upb_EnumDef_MiniDescriptorEncode(const upb_EnumDef* e, upb_Arena* a, + upb_StringView* out) { + upb_DescState s; + _upb_DescState_Init(&s); -const upb_FileDef* upb_FileDef_Dependency(const upb_FileDef* f, int i) { - UPB_ASSERT(0 <= i && i < f->dep_count); - return f->deps[i]; -} + const upb_EnumValueDef** sorted = NULL; + if (!e->is_sorted) { + sorted = _upb_EnumValueDefs_Sorted(e->values, e->value_count, a); + if (!sorted) return false; + } -const upb_FileDef* upb_FileDef_PublicDependency(const upb_FileDef* f, int i) { - UPB_ASSERT(0 <= i && i < f->public_dep_count); - return f->deps[f->public_deps[i]]; -} + if (!_upb_DescState_Grow(&s, a)) return false; + s.ptr = upb_MtDataEncoder_StartEnum(&s.e, s.ptr); -const upb_FileDef* upb_FileDef_WeakDependency(const upb_FileDef* f, int i) { - UPB_ASSERT(0 <= i && i < f->public_dep_count); - return f->deps[f->weak_deps[i]]; -} + // Duplicate values are allowed but we only encode each value once. + uint32_t previous = 0; -const upb_MessageDef* upb_FileDef_TopLevelMessage(const upb_FileDef* f, int i) { - UPB_ASSERT(0 <= i && i < f->top_lvl_msg_count); - return _upb_MessageDef_At(f->top_lvl_msgs, i); -} + for (int i = 0; i < e->value_count; i++) { + const uint32_t current = + upb_EnumValueDef_Number(sorted ? sorted[i] : upb_EnumDef_Value(e, i)); + if (i != 0 && previous == current) continue; -const upb_EnumDef* upb_FileDef_TopLevelEnum(const upb_FileDef* f, int i) { - UPB_ASSERT(0 <= i && i < f->top_lvl_enum_count); - return _upb_EnumDef_At(f->top_lvl_enums, i); -} + if (!_upb_DescState_Grow(&s, a)) return false; + s.ptr = upb_MtDataEncoder_PutEnumValue(&s.e, s.ptr, current); + previous = current; + } -const upb_FieldDef* upb_FileDef_TopLevelExtension(const upb_FileDef* f, int i) { - UPB_ASSERT(0 <= i && i < f->top_lvl_ext_count); - return _upb_FieldDef_At(f->top_lvl_exts, i); -} + if (!_upb_DescState_Grow(&s, a)) return false; + s.ptr = upb_MtDataEncoder_EndEnum(&s.e, s.ptr); -const upb_ServiceDef* upb_FileDef_Service(const upb_FileDef* f, int i) { - UPB_ASSERT(0 <= i && i < f->service_count); - return _upb_ServiceDef_At(f->services, i); + // There will always be room for this '\0' in the encoder buffer because + // kUpb_MtDataEncoder_MinSize is overkill for upb_MtDataEncoder_EndEnum(). + UPB_ASSERT(s.ptr < s.buf + s.bufsize); + *s.ptr = '\0'; + + out->data = s.buf; + out->size = s.ptr - s.buf; + return true; } -const upb_DefPool* upb_FileDef_Pool(const upb_FileDef* f) { return f->symtab; } +static upb_MiniTableEnum* create_enumlayout(upb_DefBuilder* ctx, + const upb_EnumDef* e) { + upb_StringView sv; + bool ok = upb_EnumDef_MiniDescriptorEncode(e, ctx->tmp_arena, &sv); + if (!ok) _upb_DefBuilder_Errf(ctx, "OOM while building enum MiniDescriptor"); -const upb_MiniTableExtension* _upb_FileDef_ExtensionMiniTable( - const upb_FileDef* f, int i) { - return f->ext_layouts[i]; + upb_Status status; + upb_MiniTableEnum* layout = + upb_MiniTableEnum_Build(sv.data, sv.size, ctx->arena, &status); + if (!layout) + _upb_DefBuilder_Errf(ctx, "Error building enum MiniTable: %s", status.msg); + return layout; } -static char* strviewdup(upb_DefBuilder* ctx, upb_StringView view) { - char* ret = upb_strdup2(view.data, view.size, _upb_DefBuilder_Arena(ctx)); - if (!ret) _upb_DefBuilder_OomErr(ctx); - return ret; +static upb_StringView* _upb_EnumReservedNames_New( + upb_DefBuilder* ctx, int n, const upb_StringView* protos) { + upb_StringView* sv = _upb_DefBuilder_Alloc(ctx, sizeof(upb_StringView) * n); + for (int i = 0; i < n; i++) { + sv[i].data = + upb_strdup2(protos[i].data, protos[i].size, _upb_DefBuilder_Arena(ctx)); + sv[i].size = protos[i].size; + } + return sv; } -static bool streql_view(upb_StringView view, const char* b) { - return view.size == strlen(b) && memcmp(view.data, b, view.size) == 0; -} +static void create_enumdef(upb_DefBuilder* ctx, const char* prefix, + const UPB_DESC(EnumDescriptorProto) * enum_proto, + upb_EnumDef* e) { + const UPB_DESC(EnumValueDescriptorProto)* const* values; + const UPB_DESC(EnumDescriptorProto_EnumReservedRange)* const* res_ranges; + const upb_StringView* res_names; + upb_StringView name; + size_t n_value, n_res_range, n_res_name; -static int count_exts_in_msg(const UPB_DESC(DescriptorProto) * msg_proto) { - size_t n; - UPB_DESC(DescriptorProto_extension)(msg_proto, &n); - int ext_count = n; + // Must happen before _upb_DefBuilder_Add() + e->file = _upb_DefBuilder_File(ctx); - const UPB_DESC(DescriptorProto)* const* nested_msgs = - UPB_DESC(DescriptorProto_nested_type)(msg_proto, &n); - for (size_t i = 0; i < n; i++) { - ext_count += count_exts_in_msg(nested_msgs[i]); - } + name = UPB_DESC(EnumDescriptorProto_name)(enum_proto); - return ext_count; -} + e->full_name = _upb_DefBuilder_MakeFullName(ctx, prefix, name); + _upb_DefBuilder_Add(ctx, e->full_name, + _upb_DefType_Pack(e, UPB_DEFTYPE_ENUM)); -// Allocate and initialize one file def, and add it to the context object. -void _upb_FileDef_Create(upb_DefBuilder* ctx, - const UPB_DESC(FileDescriptorProto) * file_proto) { - upb_FileDef* file = _upb_DefBuilder_Alloc(ctx, sizeof(upb_FileDef)); - ctx->file = file; + e->is_closed = (!UPB_TREAT_PROTO2_ENUMS_LIKE_PROTO3) && + (upb_FileDef_Syntax(e->file) == kUpb_Syntax_Proto2); - const UPB_DESC(DescriptorProto)* const* msgs; - const UPB_DESC(EnumDescriptorProto)* const* enums; - const UPB_DESC(FieldDescriptorProto)* const* exts; - const UPB_DESC(ServiceDescriptorProto)* const* services; - const upb_StringView* strs; - const int32_t* public_deps; - const int32_t* weak_deps; - size_t n; + values = UPB_DESC(EnumDescriptorProto_value)(enum_proto, &n_value); - file->symtab = ctx->symtab; + bool ok = upb_strtable_init(&e->ntoi, n_value, ctx->arena); + if (!ok) _upb_DefBuilder_OomErr(ctx); - // Count all extensions in the file, to build a flat array of layouts. - UPB_DESC(FileDescriptorProto_extension)(file_proto, &n); - int ext_count = n; - msgs = UPB_DESC(FileDescriptorProto_message_type)(file_proto, &n); - for (int i = 0; i < n; i++) { - ext_count += count_exts_in_msg(msgs[i]); - } - file->ext_count = ext_count; + ok = upb_inttable_init(&e->iton, ctx->arena); + if (!ok) _upb_DefBuilder_OomErr(ctx); - if (ctx->layout) { - // We are using the ext layouts that were passed in. - file->ext_layouts = ctx->layout->exts; - if (ctx->layout->ext_count != file->ext_count) { - _upb_DefBuilder_Errf(ctx, - "Extension count did not match layout (%d vs %d)", - ctx->layout->ext_count, file->ext_count); - } - } else { - // We are building ext layouts from scratch. - file->ext_layouts = _upb_DefBuilder_Alloc( - ctx, sizeof(*file->ext_layouts) * file->ext_count); - upb_MiniTableExtension* ext = - _upb_DefBuilder_Alloc(ctx, sizeof(*ext) * file->ext_count); - for (int i = 0; i < file->ext_count; i++) { - file->ext_layouts[i] = &ext[i]; - } - } + e->defaultval = 0; + e->value_count = n_value; + e->values = + _upb_EnumValueDefs_New(ctx, prefix, n_value, values, e, &e->is_sorted); - upb_StringView name = UPB_DESC(FileDescriptorProto_name)(file_proto); - file->name = strviewdup(ctx, name); - if (strlen(file->name) != name.size) { - _upb_DefBuilder_Errf(ctx, "File name contained embedded NULL"); + if (n_value == 0) { + _upb_DefBuilder_Errf(ctx, "enums must contain at least one value (%s)", + e->full_name); } - upb_StringView package = UPB_DESC(FileDescriptorProto_package)(file_proto); - - if (package.size) { - _upb_DefBuilder_CheckIdentFull(ctx, package); - file->package = strviewdup(ctx, package); - } else { - file->package = NULL; - } + res_ranges = + UPB_DESC(EnumDescriptorProto_reserved_range)(enum_proto, &n_res_range); + e->res_range_count = n_res_range; + e->res_ranges = _upb_EnumReservedRanges_New(ctx, n_res_range, res_ranges, e); - upb_StringView edition = UPB_DESC(FileDescriptorProto_edition)(file_proto); + res_names = + UPB_DESC(EnumDescriptorProto_reserved_name)(enum_proto, &n_res_name); + e->res_name_count = n_res_name; + e->res_names = _upb_EnumReservedNames_New(ctx, n_res_name, res_names); - if (edition.size == 0) { - file->edition = NULL; - } else { - // TODO(b/267770604): How should we validate this? - file->edition = strviewdup(ctx, edition); - if (strlen(file->edition) != edition.size) { - _upb_DefBuilder_Errf(ctx, "Edition name contained embedded NULL"); - } - } + UPB_DEF_SET_OPTIONS(e->opts, EnumDescriptorProto, EnumOptions, enum_proto); - if (UPB_DESC(FileDescriptorProto_has_syntax)(file_proto)) { - upb_StringView syntax = UPB_DESC(FileDescriptorProto_syntax)(file_proto); + upb_inttable_compact(&e->iton, ctx->arena); - if (streql_view(syntax, "proto2")) { - file->syntax = kUpb_Syntax_Proto2; - } else if (streql_view(syntax, "proto3")) { - file->syntax = kUpb_Syntax_Proto3; + if (e->is_closed) { + if (ctx->layout) { + UPB_ASSERT(ctx->enum_count < ctx->layout->enum_count); + e->layout = ctx->layout->enums[ctx->enum_count++]; } else { - _upb_DefBuilder_Errf(ctx, "Invalid syntax '" UPB_STRINGVIEW_FORMAT "'", - UPB_STRINGVIEW_ARGS(syntax)); + e->layout = create_enumlayout(ctx, e); } } else { - file->syntax = kUpb_Syntax_Proto2; + e->layout = NULL; } +} - // Read options. - UPB_DEF_SET_OPTIONS(file->opts, FileDescriptorProto, FileOptions, file_proto); +upb_EnumDef* _upb_EnumDefs_New( + upb_DefBuilder* ctx, int n, + const UPB_DESC(EnumDescriptorProto) * const* protos, + const upb_MessageDef* containing_type) { + _upb_DefType_CheckPadding(sizeof(upb_EnumDef)); - // Verify dependencies. - strs = UPB_DESC(FileDescriptorProto_dependency)(file_proto, &n); - file->dep_count = n; - file->deps = _upb_DefBuilder_Alloc(ctx, sizeof(*file->deps) * n); + // If a containing type is defined then get the full name from that. + // Otherwise use the package name from the file def. + const char* name = containing_type ? upb_MessageDef_FullName(containing_type) + : _upb_FileDef_RawPackage(ctx->file); - for (size_t i = 0; i < n; i++) { - upb_StringView str = strs[i]; - file->deps[i] = - upb_DefPool_FindFileByNameWithSize(ctx->symtab, str.data, str.size); - if (!file->deps[i]) { - _upb_DefBuilder_Errf(ctx, - "Depends on file '" UPB_STRINGVIEW_FORMAT - "', but it has not been loaded", - UPB_STRINGVIEW_ARGS(str)); - } + upb_EnumDef* e = _upb_DefBuilder_Alloc(ctx, sizeof(upb_EnumDef) * n); + for (int i = 0; i < n; i++) { + create_enumdef(ctx, name, protos[i], &e[i]); + e[i].containing_type = containing_type; } + return e; +} - public_deps = UPB_DESC(FileDescriptorProto_public_dependency)(file_proto, &n); - file->public_dep_count = n; - file->public_deps = - _upb_DefBuilder_Alloc(ctx, sizeof(*file->public_deps) * n); - int32_t* mutable_public_deps = (int32_t*)file->public_deps; - for (size_t i = 0; i < n; i++) { - if (public_deps[i] >= file->dep_count) { - _upb_DefBuilder_Errf(ctx, "public_dep %d is out of range", - (int)public_deps[i]); - } - mutable_public_deps[i] = public_deps[i]; - } - weak_deps = UPB_DESC(FileDescriptorProto_weak_dependency)(file_proto, &n); - file->weak_dep_count = n; - file->weak_deps = _upb_DefBuilder_Alloc(ctx, sizeof(*file->weak_deps) * n); - int32_t* mutable_weak_deps = (int32_t*)file->weak_deps; - for (size_t i = 0; i < n; i++) { - if (weak_deps[i] >= file->dep_count) { - _upb_DefBuilder_Errf(ctx, "weak_dep %d is out of range", - (int)weak_deps[i]); - } - mutable_weak_deps[i] = weak_deps[i]; - } - // Create enums. - enums = UPB_DESC(FileDescriptorProto_enum_type)(file_proto, &n); - file->top_lvl_enum_count = n; - file->top_lvl_enums = _upb_EnumDefs_New(ctx, n, enums, NULL); +// Must be last. - // Create extensions. - exts = UPB_DESC(FileDescriptorProto_extension)(file_proto, &n); - file->top_lvl_ext_count = n; - file->top_lvl_exts = _upb_Extensions_New(ctx, n, exts, file->package, NULL); +struct upb_EnumReservedRange { + int32_t start; + int32_t end; +}; - // Create messages. - msgs = UPB_DESC(FileDescriptorProto_message_type)(file_proto, &n); - file->top_lvl_msg_count = n; - file->top_lvl_msgs = _upb_MessageDefs_New(ctx, n, msgs, NULL); +upb_EnumReservedRange* _upb_EnumReservedRange_At(const upb_EnumReservedRange* r, + int i) { + return (upb_EnumReservedRange*)&r[i]; +} - // Create services. - services = UPB_DESC(FileDescriptorProto_service)(file_proto, &n); - file->service_count = n; - file->services = _upb_ServiceDefs_New(ctx, n, services); - - // Now that all names are in the table, build layouts and resolve refs. +int32_t upb_EnumReservedRange_Start(const upb_EnumReservedRange* r) { + return r->start; +} +int32_t upb_EnumReservedRange_End(const upb_EnumReservedRange* r) { + return r->end; +} - for (int i = 0; i < file->top_lvl_msg_count; i++) { - upb_MessageDef* m = (upb_MessageDef*)upb_FileDef_TopLevelMessage(file, i); - _upb_MessageDef_Resolve(ctx, m); - } +upb_EnumReservedRange* _upb_EnumReservedRanges_New( + upb_DefBuilder* ctx, int n, + const UPB_DESC(EnumDescriptorProto_EnumReservedRange) * const* protos, + const upb_EnumDef* e) { + upb_EnumReservedRange* r = + _upb_DefBuilder_Alloc(ctx, sizeof(upb_EnumReservedRange) * n); - for (int i = 0; i < file->top_lvl_ext_count; i++) { - upb_FieldDef* f = (upb_FieldDef*)upb_FileDef_TopLevelExtension(file, i); - _upb_FieldDef_Resolve(ctx, file->package, f); - } + for (int i = 0; i < n; i++) { + const int32_t start = + UPB_DESC(EnumDescriptorProto_EnumReservedRange_start)(protos[i]); + const int32_t end = + UPB_DESC(EnumDescriptorProto_EnumReservedRange_end)(protos[i]); - for (int i = 0; i < file->top_lvl_msg_count; i++) { - upb_MessageDef* m = (upb_MessageDef*)upb_FileDef_TopLevelMessage(file, i); - _upb_MessageDef_CreateMiniTable(ctx, (upb_MessageDef*)m); - } + // A full validation would also check that each range is disjoint, and that + // none of the fields overlap with the extension ranges, but we are just + // sanity checking here. - for (int i = 0; i < file->top_lvl_ext_count; i++) { - upb_FieldDef* f = (upb_FieldDef*)upb_FileDef_TopLevelExtension(file, i); - _upb_FieldDef_BuildMiniTableExtension(ctx, f); - } + // Note: Not a typo! Unlike extension ranges and message reserved ranges, + // the end value of an enum reserved range is *inclusive*! + if (end < start) { + _upb_DefBuilder_Errf(ctx, "Reserved range (%d, %d) is invalid, enum=%s\n", + (int)start, (int)end, upb_EnumDef_FullName(e)); + } - for (int i = 0; i < file->top_lvl_msg_count; i++) { - upb_MessageDef* m = (upb_MessageDef*)upb_FileDef_TopLevelMessage(file, i); - _upb_MessageDef_LinkMiniTable(ctx, m); + r[i].start = start; + r[i].end = end; } - if (file->ext_count) { - bool ok = upb_ExtensionRegistry_AddArray( - _upb_DefPool_ExtReg(ctx->symtab), file->ext_layouts, file->ext_count); - if (!ok) _upb_DefBuilder_OomErr(ctx); - } + return r; } -#include - // Must be last. -bool upb_Message_HasFieldByDef(const upb_Message* msg, const upb_FieldDef* f) { - UPB_ASSERT(upb_FieldDef_HasPresence(f)); - return upb_Message_HasField(msg, upb_FieldDef_MiniTable(f)); -} +struct upb_EnumValueDef { + const UPB_DESC(EnumValueOptions) * opts; + const upb_EnumDef* parent; + const char* full_name; + int32_t number; +}; -const upb_FieldDef* upb_Message_WhichOneof(const upb_Message* msg, - const upb_OneofDef* o) { - const upb_FieldDef* f = upb_OneofDef_Field(o, 0); - if (upb_OneofDef_IsSynthetic(o)) { - UPB_ASSERT(upb_OneofDef_FieldCount(o) == 1); - return upb_Message_HasFieldByDef(msg, f) ? f : NULL; - } else { - const upb_MiniTableField* field = upb_FieldDef_MiniTable(f); - uint32_t oneof_case = upb_Message_WhichOneofFieldNumber(msg, field); - f = oneof_case ? upb_OneofDef_LookupNumber(o, oneof_case) : NULL; - UPB_ASSERT((f != NULL) == (oneof_case != 0)); - return f; - } +upb_EnumValueDef* _upb_EnumValueDef_At(const upb_EnumValueDef* v, int i) { + return (upb_EnumValueDef*)&v[i]; } -upb_MessageValue upb_Message_GetFieldByDef(const upb_Message* msg, - const upb_FieldDef* f) { - upb_MessageValue default_val = upb_FieldDef_Default(f); - upb_MessageValue ret; - _upb_Message_GetField(msg, upb_FieldDef_MiniTable(f), &default_val, &ret); - return ret; +static int _upb_EnumValueDef_Compare(const void* p1, const void* p2) { + const uint32_t v1 = (*(const upb_EnumValueDef**)p1)->number; + const uint32_t v2 = (*(const upb_EnumValueDef**)p2)->number; + return (v1 < v2) ? -1 : (v1 > v2); } -upb_MutableMessageValue upb_Message_Mutable(upb_Message* msg, - const upb_FieldDef* f, - upb_Arena* a) { - UPB_ASSERT(upb_FieldDef_IsSubMessage(f) || upb_FieldDef_IsRepeated(f)); - if (upb_FieldDef_HasPresence(f) && !upb_Message_HasFieldByDef(msg, f)) { - // We need to skip the upb_Message_GetFieldByDef() call in this case. - goto make; - } +const upb_EnumValueDef** _upb_EnumValueDefs_Sorted(const upb_EnumValueDef* v, + int n, upb_Arena* a) { + // TODO: Try to replace this arena alloc with a persistent scratch buffer. + upb_EnumValueDef** out = + (upb_EnumValueDef**)upb_Arena_Malloc(a, n * sizeof(void*)); + if (!out) return NULL; - upb_MessageValue val = upb_Message_GetFieldByDef(msg, f); - if (val.array_val) { - return (upb_MutableMessageValue){.array = (upb_Array*)val.array_val}; + for (int i = 0; i < n; i++) { + out[i] = (upb_EnumValueDef*)&v[i]; } + qsort(out, n, sizeof(void*), _upb_EnumValueDef_Compare); - upb_MutableMessageValue ret; -make: - if (!a) return (upb_MutableMessageValue){.array = NULL}; - if (upb_FieldDef_IsMap(f)) { - const upb_MessageDef* entry = upb_FieldDef_MessageSubDef(f); - const upb_FieldDef* key = - upb_MessageDef_FindFieldByNumber(entry, kUpb_MapEntry_KeyFieldNumber); - const upb_FieldDef* value = - upb_MessageDef_FindFieldByNumber(entry, kUpb_MapEntry_ValueFieldNumber); - ret.map = - upb_Map_New(a, upb_FieldDef_CType(key), upb_FieldDef_CType(value)); - } else if (upb_FieldDef_IsRepeated(f)) { - ret.array = upb_Array_New(a, upb_FieldDef_CType(f)); - } else { - UPB_ASSERT(upb_FieldDef_IsSubMessage(f)); - const upb_MessageDef* m = upb_FieldDef_MessageSubDef(f); - ret.msg = upb_Message_New(upb_MessageDef_MiniTable(m), a); - } + return (const upb_EnumValueDef**)out; +} - val.array_val = ret.array; - upb_Message_SetFieldByDef(msg, f, val, a); +const UPB_DESC(EnumValueOptions) * + upb_EnumValueDef_Options(const upb_EnumValueDef* v) { + return v->opts; +} - return ret; +bool upb_EnumValueDef_HasOptions(const upb_EnumValueDef* v) { + return v->opts != (void*)kUpbDefOptDefault; } -bool upb_Message_SetFieldByDef(upb_Message* msg, const upb_FieldDef* f, - upb_MessageValue val, upb_Arena* a) { - return _upb_Message_SetField(msg, upb_FieldDef_MiniTable(f), &val, a); +const upb_EnumDef* upb_EnumValueDef_Enum(const upb_EnumValueDef* v) { + return v->parent; } -void upb_Message_ClearFieldByDef(upb_Message* msg, const upb_FieldDef* f) { - upb_Message_ClearField(msg, upb_FieldDef_MiniTable(f)); +const char* upb_EnumValueDef_FullName(const upb_EnumValueDef* v) { + return v->full_name; } -void upb_Message_ClearByDef(upb_Message* msg, const upb_MessageDef* m) { - upb_Message_Clear(msg, upb_MessageDef_MiniTable(m)); +const char* upb_EnumValueDef_Name(const upb_EnumValueDef* v) { + return _upb_DefBuilder_FullToShort(v->full_name); } -bool upb_Message_Next(const upb_Message* msg, const upb_MessageDef* m, - const upb_DefPool* ext_pool, const upb_FieldDef** out_f, - upb_MessageValue* out_val, size_t* iter) { - size_t i = *iter; - size_t n = upb_MessageDef_FieldCount(m); - UPB_UNUSED(ext_pool); +int32_t upb_EnumValueDef_Number(const upb_EnumValueDef* v) { return v->number; } - // Iterate over normal fields, returning the first one that is set. - while (++i < n) { - const upb_FieldDef* f = upb_MessageDef_Field(m, i); - const upb_MiniTableField* field = upb_FieldDef_MiniTable(f); - upb_MessageValue val = upb_Message_GetFieldByDef(msg, f); +uint32_t upb_EnumValueDef_Index(const upb_EnumValueDef* v) { + // Compute index in our parent's array. + return v - upb_EnumDef_Value(v->parent, 0); +} - // Skip field if unset or empty. - if (upb_MiniTableField_HasPresence(field)) { - if (!upb_Message_HasFieldByDef(msg, f)) continue; - } else { - switch (upb_FieldMode_Get(field)) { - case kUpb_FieldMode_Map: - if (!val.map_val || upb_Map_Size(val.map_val) == 0) continue; - break; - case kUpb_FieldMode_Array: - if (!val.array_val || upb_Array_Size(val.array_val) == 0) continue; - break; - case kUpb_FieldMode_Scalar: - if (!_upb_MiniTable_ValueIsNonZero(&val, field)) continue; - break; - } - } +static void create_enumvaldef(upb_DefBuilder* ctx, const char* prefix, + const UPB_DESC(EnumValueDescriptorProto) * + val_proto, + upb_EnumDef* e, upb_EnumValueDef* v) { + upb_StringView name = UPB_DESC(EnumValueDescriptorProto_name)(val_proto); - *out_val = val; - *out_f = f; - *iter = i; - return true; + v->parent = e; // Must happen prior to _upb_DefBuilder_Add() + v->full_name = _upb_DefBuilder_MakeFullName(ctx, prefix, name); + v->number = UPB_DESC(EnumValueDescriptorProto_number)(val_proto); + _upb_DefBuilder_Add(ctx, v->full_name, + _upb_DefType_Pack(v, UPB_DEFTYPE_ENUMVAL)); + + UPB_DEF_SET_OPTIONS(v->opts, EnumValueDescriptorProto, EnumValueOptions, + val_proto); + + bool ok = _upb_EnumDef_Insert(e, v, ctx->arena); + if (!ok) _upb_DefBuilder_OomErr(ctx); +} + +// Allocate and initialize an array of |n| enum value defs owned by |e|. +upb_EnumValueDef* _upb_EnumValueDefs_New( + upb_DefBuilder* ctx, const char* prefix, int n, + const UPB_DESC(EnumValueDescriptorProto) * const* protos, upb_EnumDef* e, + bool* is_sorted) { + _upb_DefType_CheckPadding(sizeof(upb_EnumValueDef)); + + upb_EnumValueDef* v = + _upb_DefBuilder_Alloc(ctx, sizeof(upb_EnumValueDef) * n); + + *is_sorted = true; + uint32_t previous = 0; + for (int i = 0; i < n; i++) { + create_enumvaldef(ctx, prefix, protos[i], e, &v[i]); + + const uint32_t current = v[i].number; + if (previous > current) *is_sorted = false; + previous = current; } - if (ext_pool) { - // Return any extensions that are set. - size_t count; - const upb_Message_Extension* ext = _upb_Message_Getexts(msg, &count); - if (i - n < count) { - ext += count - 1 - (i - n); - memcpy(out_val, &ext->data, sizeof(*out_val)); - *out_f = upb_DefPool_FindExtensionByMiniTable(ext_pool, ext->ext); - *iter = i; - return true; - } + if (upb_FileDef_Syntax(ctx->file) == kUpb_Syntax_Proto3 && n > 0 && + v[0].number != 0) { + _upb_DefBuilder_Errf(ctx, + "for proto3, the first enum value must be zero (%s)", + upb_EnumDef_FullName(e)); } - *iter = i; - return false; + return v; } -bool _upb_Message_DiscardUnknown(upb_Message* msg, const upb_MessageDef* m, - int depth) { - size_t iter = kUpb_Message_Begin; - const upb_FieldDef* f; - upb_MessageValue val; - bool ret = true; - if (--depth == 0) return false; - _upb_Message_DiscardUnknown_shallow(msg); +// Must be last. - while (upb_Message_Next(msg, m, NULL /*ext_pool*/, &f, &val, &iter)) { - const upb_MessageDef* subm = upb_FieldDef_MessageSubDef(f); - if (!subm) continue; - if (upb_FieldDef_IsMap(f)) { - const upb_FieldDef* val_f = upb_MessageDef_FindFieldByNumber(subm, 2); - const upb_MessageDef* val_m = upb_FieldDef_MessageSubDef(val_f); - upb_Map* map = (upb_Map*)val.map_val; - size_t iter = kUpb_Map_Begin; - - if (!val_m) continue; - - upb_MessageValue map_key, map_val; - while (upb_Map_Next(map, &map_key, &map_val, &iter)) { - if (!_upb_Message_DiscardUnknown((upb_Message*)map_val.msg_val, val_m, - depth)) { - ret = false; - } - } - } else if (upb_FieldDef_IsRepeated(f)) { - const upb_Array* arr = val.array_val; - size_t i, n = upb_Array_Size(arr); - for (i = 0; i < n; i++) { - upb_MessageValue elem = upb_Array_Get(arr, i); - if (!_upb_Message_DiscardUnknown((upb_Message*)elem.msg_val, subm, - depth)) { - ret = false; - } - } - } else { - if (!_upb_Message_DiscardUnknown((upb_Message*)val.msg_val, subm, - depth)) { - ret = false; - } - } - } +struct upb_ExtensionRange { + const UPB_DESC(ExtensionRangeOptions) * opts; + int32_t start; + int32_t end; +}; - return ret; +upb_ExtensionRange* _upb_ExtensionRange_At(const upb_ExtensionRange* r, int i) { + return (upb_ExtensionRange*)&r[i]; } -bool upb_Message_DiscardUnknown(upb_Message* msg, const upb_MessageDef* m, - int maxdepth) { - return _upb_Message_DiscardUnknown(msg, m, maxdepth); +const UPB_DESC(ExtensionRangeOptions) * + upb_ExtensionRange_Options(const upb_ExtensionRange* r) { + return r->opts; } +bool upb_ExtensionRange_HasOptions(const upb_ExtensionRange* r) { + return r->opts != (void*)kUpbDefOptDefault; +} -// Must be last. +int32_t upb_ExtensionRange_Start(const upb_ExtensionRange* r) { + return r->start; +} -struct upb_MessageDef { - const UPB_DESC(MessageOptions) * opts; - const upb_MiniTable* layout; - const upb_FileDef* file; - const upb_MessageDef* containing_type; - const char* full_name; +int32_t upb_ExtensionRange_End(const upb_ExtensionRange* r) { return r->end; } - // Tables for looking up fields by number and name. - upb_inttable itof; - upb_strtable ntof; +upb_ExtensionRange* _upb_ExtensionRanges_New( + upb_DefBuilder* ctx, int n, + const UPB_DESC(DescriptorProto_ExtensionRange) * const* protos, + const upb_MessageDef* m) { + upb_ExtensionRange* r = + _upb_DefBuilder_Alloc(ctx, sizeof(upb_ExtensionRange) * n); - /* All nested defs. - * MEM: We could save some space here by putting nested defs in a contiguous - * region and calculating counts from offsets or vice-versa. */ - const upb_FieldDef* fields; - const upb_OneofDef* oneofs; - const upb_ExtensionRange* ext_ranges; - const upb_StringView* res_names; - const upb_MessageDef* nested_msgs; - const upb_MessageReservedRange* res_ranges; - const upb_EnumDef* nested_enums; - const upb_FieldDef* nested_exts; + for (int i = 0; i < n; i++) { + const int32_t start = + UPB_DESC(DescriptorProto_ExtensionRange_start)(protos[i]); + const int32_t end = UPB_DESC(DescriptorProto_ExtensionRange_end)(protos[i]); + const int32_t max = UPB_DESC(MessageOptions_message_set_wire_format)( + upb_MessageDef_Options(m)) + ? INT32_MAX + : kUpb_MaxFieldNumber + 1; - // TODO(salo): These counters don't need anywhere near 32 bits. - int field_count; - int real_oneof_count; - int oneof_count; - int ext_range_count; - int res_range_count; - int res_name_count; - int nested_msg_count; - int nested_enum_count; - int nested_ext_count; - bool in_message_set; - bool is_sorted; - upb_WellKnown well_known_type; -#if UINTPTR_MAX == 0xffffffff - uint32_t padding; // Increase size to a multiple of 8. -#endif -}; + // A full validation would also check that each range is disjoint, and that + // none of the fields overlap with the extension ranges, but we are just + // sanity checking here. + if (start < 1 || end <= start || end > max) { + _upb_DefBuilder_Errf(ctx, + "Extension range (%d, %d) is invalid, message=%s\n", + (int)start, (int)end, upb_MessageDef_FullName(m)); + } -static void assign_msg_wellknowntype(upb_MessageDef* m) { - const char* name = m->full_name; - if (name == NULL) { - m->well_known_type = kUpb_WellKnown_Unspecified; - return; - } - if (!strcmp(name, "google.protobuf.Any")) { - m->well_known_type = kUpb_WellKnown_Any; - } else if (!strcmp(name, "google.protobuf.FieldMask")) { - m->well_known_type = kUpb_WellKnown_FieldMask; - } else if (!strcmp(name, "google.protobuf.Duration")) { - m->well_known_type = kUpb_WellKnown_Duration; - } else if (!strcmp(name, "google.protobuf.Timestamp")) { - m->well_known_type = kUpb_WellKnown_Timestamp; - } else if (!strcmp(name, "google.protobuf.DoubleValue")) { - m->well_known_type = kUpb_WellKnown_DoubleValue; - } else if (!strcmp(name, "google.protobuf.FloatValue")) { - m->well_known_type = kUpb_WellKnown_FloatValue; - } else if (!strcmp(name, "google.protobuf.Int64Value")) { - m->well_known_type = kUpb_WellKnown_Int64Value; - } else if (!strcmp(name, "google.protobuf.UInt64Value")) { - m->well_known_type = kUpb_WellKnown_UInt64Value; - } else if (!strcmp(name, "google.protobuf.Int32Value")) { - m->well_known_type = kUpb_WellKnown_Int32Value; - } else if (!strcmp(name, "google.protobuf.UInt32Value")) { - m->well_known_type = kUpb_WellKnown_UInt32Value; - } else if (!strcmp(name, "google.protobuf.BoolValue")) { - m->well_known_type = kUpb_WellKnown_BoolValue; - } else if (!strcmp(name, "google.protobuf.StringValue")) { - m->well_known_type = kUpb_WellKnown_StringValue; - } else if (!strcmp(name, "google.protobuf.BytesValue")) { - m->well_known_type = kUpb_WellKnown_BytesValue; - } else if (!strcmp(name, "google.protobuf.Value")) { - m->well_known_type = kUpb_WellKnown_Value; - } else if (!strcmp(name, "google.protobuf.ListValue")) { - m->well_known_type = kUpb_WellKnown_ListValue; - } else if (!strcmp(name, "google.protobuf.Struct")) { - m->well_known_type = kUpb_WellKnown_Struct; - } else { - m->well_known_type = kUpb_WellKnown_Unspecified; + r[i].start = start; + r[i].end = end; + UPB_DEF_SET_OPTIONS(r[i].opts, DescriptorProto_ExtensionRange, + ExtensionRangeOptions, protos[i]); } -} -upb_MessageDef* _upb_MessageDef_At(const upb_MessageDef* m, int i) { - return (upb_MessageDef*)&m[i]; + return r; } -bool _upb_MessageDef_IsValidExtensionNumber(const upb_MessageDef* m, int n) { - for (int i = 0; i < m->ext_range_count; i++) { - const upb_ExtensionRange* r = upb_MessageDef_ExtensionRange(m, i); - if (upb_ExtensionRange_Start(r) <= n && n < upb_ExtensionRange_End(r)) { - return true; - } - } - return false; -} -const UPB_DESC(MessageOptions) * - upb_MessageDef_Options(const upb_MessageDef* m) { - return m->opts; -} +#include +#include -bool upb_MessageDef_HasOptions(const upb_MessageDef* m) { - return m->opts != (void*)kUpbDefOptDefault; -} -const char* upb_MessageDef_FullName(const upb_MessageDef* m) { - return m->full_name; -} +// Must be last. -const upb_FileDef* upb_MessageDef_File(const upb_MessageDef* m) { - return m->file; -} +#define UPB_FIELD_TYPE_UNSPECIFIED 0 -const upb_MessageDef* upb_MessageDef_ContainingType(const upb_MessageDef* m) { - return m->containing_type; +typedef struct { + size_t len; + char str[1]; // Null-terminated string data follows. +} str_t; + +struct upb_FieldDef { + const UPB_DESC(FieldOptions) * opts; + const upb_FileDef* file; + const upb_MessageDef* msgdef; + const char* full_name; + const char* json_name; + union { + int64_t sint; + uint64_t uint; + double dbl; + float flt; + bool boolean; + str_t* str; + void* msg; // Always NULL. + } defaultval; + union { + const upb_OneofDef* oneof; + const upb_MessageDef* extension_scope; + } scope; + union { + const upb_MessageDef* msgdef; + const upb_EnumDef* enumdef; + const UPB_DESC(FieldDescriptorProto) * unresolved; + } sub; + uint32_t number_; + uint16_t index_; + uint16_t layout_index; // Index into msgdef->layout->fields or file->exts + bool has_default; + bool has_json_name; + bool has_presence; + bool is_extension; + bool is_packed; + bool is_proto3_optional; + upb_FieldType type_; + upb_Label label_; +#if UINTPTR_MAX == 0xffffffff + uint32_t padding; // Increase size to a multiple of 8. +#endif +}; + +upb_FieldDef* _upb_FieldDef_At(const upb_FieldDef* f, int i) { + return (upb_FieldDef*)&f[i]; } -const char* upb_MessageDef_Name(const upb_MessageDef* m) { - return _upb_DefBuilder_FullToShort(m->full_name); +const UPB_DESC(FieldOptions) * upb_FieldDef_Options(const upb_FieldDef* f) { + return f->opts; } -upb_Syntax upb_MessageDef_Syntax(const upb_MessageDef* m) { - return upb_FileDef_Syntax(m->file); +bool upb_FieldDef_HasOptions(const upb_FieldDef* f) { + return f->opts != (void*)kUpbDefOptDefault; } -const upb_FieldDef* upb_MessageDef_FindFieldByNumber(const upb_MessageDef* m, - uint32_t i) { - upb_value val; - return upb_inttable_lookup(&m->itof, i, &val) ? upb_value_getconstptr(val) - : NULL; +const char* upb_FieldDef_FullName(const upb_FieldDef* f) { + return f->full_name; } -const upb_FieldDef* upb_MessageDef_FindFieldByNameWithSize( - const upb_MessageDef* m, const char* name, size_t size) { - upb_value val; - - if (!upb_strtable_lookup2(&m->ntof, name, size, &val)) { - return NULL; +upb_CType upb_FieldDef_CType(const upb_FieldDef* f) { + switch (f->type_) { + case kUpb_FieldType_Double: + return kUpb_CType_Double; + case kUpb_FieldType_Float: + return kUpb_CType_Float; + case kUpb_FieldType_Int64: + case kUpb_FieldType_SInt64: + case kUpb_FieldType_SFixed64: + return kUpb_CType_Int64; + case kUpb_FieldType_Int32: + case kUpb_FieldType_SFixed32: + case kUpb_FieldType_SInt32: + return kUpb_CType_Int32; + case kUpb_FieldType_UInt64: + case kUpb_FieldType_Fixed64: + return kUpb_CType_UInt64; + case kUpb_FieldType_UInt32: + case kUpb_FieldType_Fixed32: + return kUpb_CType_UInt32; + case kUpb_FieldType_Enum: + return kUpb_CType_Enum; + case kUpb_FieldType_Bool: + return kUpb_CType_Bool; + case kUpb_FieldType_String: + return kUpb_CType_String; + case kUpb_FieldType_Bytes: + return kUpb_CType_Bytes; + case kUpb_FieldType_Group: + case kUpb_FieldType_Message: + return kUpb_CType_Message; } - - return _upb_DefType_Unpack(val, UPB_DEFTYPE_FIELD); + UPB_UNREACHABLE(); } -const upb_OneofDef* upb_MessageDef_FindOneofByNameWithSize( - const upb_MessageDef* m, const char* name, size_t size) { - upb_value val; +upb_FieldType upb_FieldDef_Type(const upb_FieldDef* f) { return f->type_; } - if (!upb_strtable_lookup2(&m->ntof, name, size, &val)) { - return NULL; - } +uint32_t upb_FieldDef_Index(const upb_FieldDef* f) { return f->index_; } - return _upb_DefType_Unpack(val, UPB_DEFTYPE_ONEOF); -} +upb_Label upb_FieldDef_Label(const upb_FieldDef* f) { return f->label_; } -bool _upb_MessageDef_Insert(upb_MessageDef* m, const char* name, size_t len, - upb_value v, upb_Arena* a) { - return upb_strtable_insert(&m->ntof, name, len, v, a); -} +uint32_t upb_FieldDef_Number(const upb_FieldDef* f) { return f->number_; } -bool upb_MessageDef_FindByNameWithSize(const upb_MessageDef* m, - const char* name, size_t len, - const upb_FieldDef** out_f, - const upb_OneofDef** out_o) { - upb_value val; +bool upb_FieldDef_IsExtension(const upb_FieldDef* f) { return f->is_extension; } - if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) { - return false; - } +bool upb_FieldDef_IsPacked(const upb_FieldDef* f) { return f->is_packed; } - const upb_FieldDef* f = _upb_DefType_Unpack(val, UPB_DEFTYPE_FIELD); - const upb_OneofDef* o = _upb_DefType_Unpack(val, UPB_DEFTYPE_ONEOF); - if (out_f) *out_f = f; - if (out_o) *out_o = o; - return f || o; /* False if this was a JSON name. */ +const char* upb_FieldDef_Name(const upb_FieldDef* f) { + return _upb_DefBuilder_FullToShort(f->full_name); } -const upb_FieldDef* upb_MessageDef_FindByJsonNameWithSize( - const upb_MessageDef* m, const char* name, size_t size) { - upb_value val; - const upb_FieldDef* f; +const char* upb_FieldDef_JsonName(const upb_FieldDef* f) { + return f->json_name; +} - if (!upb_strtable_lookup2(&m->ntof, name, size, &val)) { - return NULL; - } +bool upb_FieldDef_HasJsonName(const upb_FieldDef* f) { + return f->has_json_name; +} - f = _upb_DefType_Unpack(val, UPB_DEFTYPE_FIELD); - if (!f) f = _upb_DefType_Unpack(val, UPB_DEFTYPE_FIELD_JSONNAME); +const upb_FileDef* upb_FieldDef_File(const upb_FieldDef* f) { return f->file; } - return f; +const upb_MessageDef* upb_FieldDef_ContainingType(const upb_FieldDef* f) { + return f->msgdef; } -int upb_MessageDef_ExtensionRangeCount(const upb_MessageDef* m) { - return m->ext_range_count; +const upb_MessageDef* upb_FieldDef_ExtensionScope(const upb_FieldDef* f) { + return f->is_extension ? f->scope.extension_scope : NULL; } -int upb_MessageDef_ReservedRangeCount(const upb_MessageDef* m) { - return m->res_range_count; +const upb_OneofDef* upb_FieldDef_ContainingOneof(const upb_FieldDef* f) { + return f->is_extension ? NULL : f->scope.oneof; } -int upb_MessageDef_ReservedNameCount(const upb_MessageDef* m) { - return m->res_name_count; +const upb_OneofDef* upb_FieldDef_RealContainingOneof(const upb_FieldDef* f) { + const upb_OneofDef* oneof = upb_FieldDef_ContainingOneof(f); + if (!oneof || upb_OneofDef_IsSynthetic(oneof)) return NULL; + return oneof; } -int upb_MessageDef_FieldCount(const upb_MessageDef* m) { - return m->field_count; -} +upb_MessageValue upb_FieldDef_Default(const upb_FieldDef* f) { + upb_MessageValue ret; -int upb_MessageDef_OneofCount(const upb_MessageDef* m) { - return m->oneof_count; -} + if (upb_FieldDef_IsRepeated(f) || upb_FieldDef_IsSubMessage(f)) { + return (upb_MessageValue){.msg_val = NULL}; + } -int upb_MessageDef_RealOneofCount(const upb_MessageDef* m) { - return m->real_oneof_count; -} + switch (upb_FieldDef_CType(f)) { + case kUpb_CType_Bool: + return (upb_MessageValue){.bool_val = f->defaultval.boolean}; + case kUpb_CType_Int64: + return (upb_MessageValue){.int64_val = f->defaultval.sint}; + case kUpb_CType_UInt64: + return (upb_MessageValue){.uint64_val = f->defaultval.uint}; + case kUpb_CType_Enum: + case kUpb_CType_Int32: + return (upb_MessageValue){.int32_val = (int32_t)f->defaultval.sint}; + case kUpb_CType_UInt32: + return (upb_MessageValue){.uint32_val = (uint32_t)f->defaultval.uint}; + case kUpb_CType_Float: + return (upb_MessageValue){.float_val = f->defaultval.flt}; + case kUpb_CType_Double: + return (upb_MessageValue){.double_val = f->defaultval.dbl}; + case kUpb_CType_String: + case kUpb_CType_Bytes: { + str_t* str = f->defaultval.str; + if (str) { + return (upb_MessageValue){ + .str_val = (upb_StringView){.data = str->str, .size = str->len}}; + } else { + return (upb_MessageValue){ + .str_val = (upb_StringView){.data = NULL, .size = 0}}; + } + } + default: + UPB_UNREACHABLE(); + } -int upb_MessageDef_NestedMessageCount(const upb_MessageDef* m) { - return m->nested_msg_count; + return ret; } -int upb_MessageDef_NestedEnumCount(const upb_MessageDef* m) { - return m->nested_enum_count; +const upb_MessageDef* upb_FieldDef_MessageSubDef(const upb_FieldDef* f) { + return upb_FieldDef_CType(f) == kUpb_CType_Message ? f->sub.msgdef : NULL; } -int upb_MessageDef_NestedExtensionCount(const upb_MessageDef* m) { - return m->nested_ext_count; +const upb_EnumDef* upb_FieldDef_EnumSubDef(const upb_FieldDef* f) { + return upb_FieldDef_CType(f) == kUpb_CType_Enum ? f->sub.enumdef : NULL; } -const upb_MiniTable* upb_MessageDef_MiniTable(const upb_MessageDef* m) { - return m->layout; +const upb_MiniTableField* upb_FieldDef_MiniTable(const upb_FieldDef* f) { + if (upb_FieldDef_IsExtension(f)) { + const upb_FileDef* file = upb_FieldDef_File(f); + return (upb_MiniTableField*)_upb_FileDef_ExtensionMiniTable( + file, f->layout_index); + } else { + const upb_MiniTable* layout = upb_MessageDef_MiniTable(f->msgdef); + return &layout->fields[f->layout_index]; + } } -const upb_ExtensionRange* upb_MessageDef_ExtensionRange(const upb_MessageDef* m, - int i) { - UPB_ASSERT(0 <= i && i < m->ext_range_count); - return _upb_ExtensionRange_At(m->ext_ranges, i); +const upb_MiniTableExtension* _upb_FieldDef_ExtensionMiniTable( + const upb_FieldDef* f) { + UPB_ASSERT(upb_FieldDef_IsExtension(f)); + const upb_FileDef* file = upb_FieldDef_File(f); + return _upb_FileDef_ExtensionMiniTable(file, f->layout_index); } -const upb_MessageReservedRange* upb_MessageDef_ReservedRange( - const upb_MessageDef* m, int i) { - UPB_ASSERT(0 <= i && i < m->res_range_count); - return _upb_MessageReservedRange_At(m->res_ranges, i); +bool _upb_FieldDef_IsClosedEnum(const upb_FieldDef* f) { + if (f->type_ != kUpb_FieldType_Enum) return false; + return upb_EnumDef_IsClosed(f->sub.enumdef); } -upb_StringView upb_MessageDef_ReservedName(const upb_MessageDef* m, int i) { - UPB_ASSERT(0 <= i && i < m->res_name_count); - return m->res_names[i]; +bool _upb_FieldDef_IsProto3Optional(const upb_FieldDef* f) { + return f->is_proto3_optional; } -const upb_FieldDef* upb_MessageDef_Field(const upb_MessageDef* m, int i) { - UPB_ASSERT(0 <= i && i < m->field_count); - return _upb_FieldDef_At(m->fields, i); -} +int _upb_FieldDef_LayoutIndex(const upb_FieldDef* f) { return f->layout_index; } -const upb_OneofDef* upb_MessageDef_Oneof(const upb_MessageDef* m, int i) { - UPB_ASSERT(0 <= i && i < m->oneof_count); - return _upb_OneofDef_At(m->oneofs, i); -} +uint64_t _upb_FieldDef_Modifiers(const upb_FieldDef* f) { + uint64_t out = f->is_packed ? kUpb_FieldModifier_IsPacked : 0; -const upb_MessageDef* upb_MessageDef_NestedMessage(const upb_MessageDef* m, - int i) { - UPB_ASSERT(0 <= i && i < m->nested_msg_count); - return &m->nested_msgs[i]; -} + switch (f->label_) { + case kUpb_Label_Optional: + if (!upb_FieldDef_HasPresence(f)) { + out |= kUpb_FieldModifier_IsProto3Singular; + } + break; + case kUpb_Label_Repeated: + out |= kUpb_FieldModifier_IsRepeated; + break; + case kUpb_Label_Required: + out |= kUpb_FieldModifier_IsRequired; + break; + } -const upb_EnumDef* upb_MessageDef_NestedEnum(const upb_MessageDef* m, int i) { - UPB_ASSERT(0 <= i && i < m->nested_enum_count); - return _upb_EnumDef_At(m->nested_enums, i); + if (_upb_FieldDef_IsClosedEnum(f)) { + out |= kUpb_FieldModifier_IsClosedEnum; + } + return out; } -const upb_FieldDef* upb_MessageDef_NestedExtension(const upb_MessageDef* m, - int i) { - UPB_ASSERT(0 <= i && i < m->nested_ext_count); - return _upb_FieldDef_At(m->nested_exts, i); +bool upb_FieldDef_HasDefault(const upb_FieldDef* f) { return f->has_default; } +bool upb_FieldDef_HasPresence(const upb_FieldDef* f) { return f->has_presence; } + +bool upb_FieldDef_HasSubDef(const upb_FieldDef* f) { + return upb_FieldDef_IsSubMessage(f) || + upb_FieldDef_CType(f) == kUpb_CType_Enum; } -upb_WellKnown upb_MessageDef_WellKnownType(const upb_MessageDef* m) { - return m->well_known_type; +bool upb_FieldDef_IsMap(const upb_FieldDef* f) { + return upb_FieldDef_IsRepeated(f) && upb_FieldDef_IsSubMessage(f) && + upb_MessageDef_IsMapEntry(upb_FieldDef_MessageSubDef(f)); } -bool _upb_MessageDef_InMessageSet(const upb_MessageDef* m) { - return m->in_message_set; +bool upb_FieldDef_IsOptional(const upb_FieldDef* f) { + return upb_FieldDef_Label(f) == kUpb_Label_Optional; } -const upb_FieldDef* upb_MessageDef_FindFieldByName(const upb_MessageDef* m, - const char* name) { - return upb_MessageDef_FindFieldByNameWithSize(m, name, strlen(name)); +bool upb_FieldDef_IsPrimitive(const upb_FieldDef* f) { + return !upb_FieldDef_IsString(f) && !upb_FieldDef_IsSubMessage(f); } -const upb_OneofDef* upb_MessageDef_FindOneofByName(const upb_MessageDef* m, - const char* name) { - return upb_MessageDef_FindOneofByNameWithSize(m, name, strlen(name)); +bool upb_FieldDef_IsRepeated(const upb_FieldDef* f) { + return upb_FieldDef_Label(f) == kUpb_Label_Repeated; } -bool upb_MessageDef_IsMapEntry(const upb_MessageDef* m) { - return UPB_DESC(MessageOptions_map_entry)(m->opts); +bool upb_FieldDef_IsRequired(const upb_FieldDef* f) { + return upb_FieldDef_Label(f) == kUpb_Label_Required; } -bool upb_MessageDef_IsMessageSet(const upb_MessageDef* m) { - return UPB_DESC(MessageOptions_message_set_wire_format)(m->opts); +bool upb_FieldDef_IsString(const upb_FieldDef* f) { + return upb_FieldDef_CType(f) == kUpb_CType_String || + upb_FieldDef_CType(f) == kUpb_CType_Bytes; } -static upb_MiniTable* _upb_MessageDef_MakeMiniTable(upb_DefBuilder* ctx, - const upb_MessageDef* m) { - upb_StringView desc; - // Note: this will assign layout_index for fields, so upb_FieldDef_MiniTable() - // is safe to call only after this call. - bool ok = upb_MessageDef_MiniDescriptorEncode(m, ctx->tmp_arena, &desc); - if (!ok) _upb_DefBuilder_OomErr(ctx); - - void** scratch_data = _upb_DefPool_ScratchData(ctx->symtab); - size_t* scratch_size = _upb_DefPool_ScratchSize(ctx->symtab); - upb_MiniTable* ret = upb_MiniTable_BuildWithBuf( - desc.data, desc.size, ctx->platform, ctx->arena, scratch_data, - scratch_size, ctx->status); - if (!ret) _upb_DefBuilder_FailJmp(ctx); - - return ret; +bool upb_FieldDef_IsSubMessage(const upb_FieldDef* f) { + return upb_FieldDef_CType(f) == kUpb_CType_Message; } -void _upb_MessageDef_Resolve(upb_DefBuilder* ctx, upb_MessageDef* m) { - for (int i = 0; i < m->field_count; i++) { - upb_FieldDef* f = (upb_FieldDef*)upb_MessageDef_Field(m, i); - _upb_FieldDef_Resolve(ctx, m->full_name, f); - } - - m->in_message_set = false; - for (int i = 0; i < upb_MessageDef_NestedExtensionCount(m); i++) { - upb_FieldDef* ext = (upb_FieldDef*)upb_MessageDef_NestedExtension(m, i); - _upb_FieldDef_Resolve(ctx, m->full_name, ext); - if (upb_FieldDef_Type(ext) == kUpb_FieldType_Message && - upb_FieldDef_Label(ext) == kUpb_Label_Optional && - upb_FieldDef_MessageSubDef(ext) == m && - UPB_DESC(MessageOptions_message_set_wire_format)( - upb_MessageDef_Options(upb_FieldDef_ContainingType(ext)))) { - m->in_message_set = true; - } - } - - for (int i = 0; i < upb_MessageDef_NestedMessageCount(m); i++) { - upb_MessageDef* n = (upb_MessageDef*)upb_MessageDef_NestedMessage(m, i); - _upb_MessageDef_Resolve(ctx, n); - } +static bool between(int32_t x, int32_t low, int32_t high) { + return x >= low && x <= high; } -void _upb_MessageDef_InsertField(upb_DefBuilder* ctx, upb_MessageDef* m, - const upb_FieldDef* f) { - const int32_t field_number = upb_FieldDef_Number(f); - - if (field_number <= 0 || field_number > kUpb_MaxFieldNumber) { - _upb_DefBuilder_Errf(ctx, "invalid field number (%u)", field_number); - } - - const char* json_name = upb_FieldDef_JsonName(f); - const char* shortname = upb_FieldDef_Name(f); - const size_t shortnamelen = strlen(shortname); +bool upb_FieldDef_checklabel(int32_t label) { return between(label, 1, 3); } +bool upb_FieldDef_checktype(int32_t type) { return between(type, 1, 11); } +bool upb_FieldDef_checkintfmt(int32_t fmt) { return between(fmt, 1, 3); } - upb_value v = upb_value_constptr(f); +bool upb_FieldDef_checkdescriptortype(int32_t type) { + return between(type, 1, 18); +} - upb_value existing_v; - if (upb_strtable_lookup(&m->ntof, shortname, &existing_v)) { - _upb_DefBuilder_Errf(ctx, "duplicate field name (%s)", shortname); - } +static bool streql2(const char* a, size_t n, const char* b) { + return n == strlen(b) && memcmp(a, b, n) == 0; +} - const upb_value field_v = _upb_DefType_Pack(f, UPB_DEFTYPE_FIELD); - bool ok = - _upb_MessageDef_Insert(m, shortname, shortnamelen, field_v, ctx->arena); - if (!ok) _upb_DefBuilder_OomErr(ctx); +// Implement the transformation as described in the spec: +// 1. upper case all letters after an underscore. +// 2. remove all underscores. +static char* make_json_name(const char* name, size_t size, upb_Arena* a) { + char* out = upb_Arena_Malloc(a, size + 1); // +1 is to add a trailing '\0' + if (out == NULL) return NULL; - if (strcmp(shortname, json_name) != 0) { - if (upb_strtable_lookup(&m->ntof, json_name, &v)) { - _upb_DefBuilder_Errf(ctx, "duplicate json_name (%s)", json_name); + bool ucase_next = false; + char* des = out; + for (size_t i = 0; i < size; i++) { + if (name[i] == '_') { + ucase_next = true; + } else { + *des++ = ucase_next ? toupper(name[i]) : name[i]; + ucase_next = false; } - - const size_t json_size = strlen(json_name); - const upb_value json_v = _upb_DefType_Pack(f, UPB_DEFTYPE_FIELD_JSONNAME); - ok = _upb_MessageDef_Insert(m, json_name, json_size, json_v, ctx->arena); - if (!ok) _upb_DefBuilder_OomErr(ctx); - } - - if (upb_inttable_lookup(&m->itof, field_number, NULL)) { - _upb_DefBuilder_Errf(ctx, "duplicate field number (%u)", field_number); } + *des++ = '\0'; + return out; +} - ok = upb_inttable_insert(&m->itof, field_number, v, ctx->arena); - if (!ok) _upb_DefBuilder_OomErr(ctx); +static str_t* newstr(upb_DefBuilder* ctx, const char* data, size_t len) { + str_t* ret = _upb_DefBuilder_Alloc(ctx, sizeof(*ret) + len); + if (!ret) _upb_DefBuilder_OomErr(ctx); + ret->len = len; + if (len) memcpy(ret->str, data, len); + ret->str[len] = '\0'; + return ret; } -void _upb_MessageDef_CreateMiniTable(upb_DefBuilder* ctx, upb_MessageDef* m) { - if (ctx->layout == NULL) { - m->layout = _upb_MessageDef_MakeMiniTable(ctx, m); - } else { - UPB_ASSERT(ctx->msg_count < ctx->layout->msg_count); - m->layout = ctx->layout->msgs[ctx->msg_count++]; - UPB_ASSERT(m->field_count == m->layout->field_count); +static str_t* unescape(upb_DefBuilder* ctx, const upb_FieldDef* f, + const char* data, size_t len) { + // Size here is an upper bound; escape sequences could ultimately shrink it. + str_t* ret = _upb_DefBuilder_Alloc(ctx, sizeof(*ret) + len); + char* dst = &ret->str[0]; + const char* src = data; + const char* end = data + len; - // We don't need the result of this call, but it will assign layout_index - // for all the fields in O(n lg n) time. - _upb_FieldDefs_Sorted(m->fields, m->field_count, ctx->tmp_arena); + while (src < end) { + if (*src == '\\') { + src++; + *dst++ = _upb_DefBuilder_ParseEscape(ctx, f, &src, end); + } else { + *dst++ = *src++; + } } - for (int i = 0; i < m->nested_msg_count; i++) { - upb_MessageDef* nested = - (upb_MessageDef*)upb_MessageDef_NestedMessage(m, i); - _upb_MessageDef_CreateMiniTable(ctx, nested); - } + ret->len = dst - &ret->str[0]; + return ret; } -void _upb_MessageDef_LinkMiniTable(upb_DefBuilder* ctx, - const upb_MessageDef* m) { - for (int i = 0; i < upb_MessageDef_NestedExtensionCount(m); i++) { - const upb_FieldDef* ext = upb_MessageDef_NestedExtension(m, i); - _upb_FieldDef_BuildMiniTableExtension(ctx, ext); - } +static void parse_default(upb_DefBuilder* ctx, const char* str, size_t len, + upb_FieldDef* f) { + char* end; + char nullz[64]; + errno = 0; - for (int i = 0; i < m->nested_msg_count; i++) { - _upb_MessageDef_LinkMiniTable(ctx, upb_MessageDef_NestedMessage(m, i)); + switch (upb_FieldDef_CType(f)) { + case kUpb_CType_Int32: + case kUpb_CType_Int64: + case kUpb_CType_UInt32: + case kUpb_CType_UInt64: + case kUpb_CType_Double: + case kUpb_CType_Float: + // Standard C number parsing functions expect null-terminated strings. + if (len >= sizeof(nullz) - 1) { + _upb_DefBuilder_Errf(ctx, "Default too long: %.*s", (int)len, str); + } + memcpy(nullz, str, len); + nullz[len] = '\0'; + str = nullz; + break; + default: + break; } - if (ctx->layout) return; - - for (int i = 0; i < m->field_count; i++) { - const upb_FieldDef* f = upb_MessageDef_Field(m, i); - const upb_MessageDef* sub_m = upb_FieldDef_MessageSubDef(f); - const upb_EnumDef* sub_e = upb_FieldDef_EnumSubDef(f); - const int layout_index = _upb_FieldDef_LayoutIndex(f); - upb_MiniTable* mt = (upb_MiniTable*)upb_MessageDef_MiniTable(m); - - UPB_ASSERT(layout_index < m->field_count); - upb_MiniTableField* mt_f = - (upb_MiniTableField*)&m->layout->fields[layout_index]; - if (sub_m) { - if (!mt->subs) { - _upb_DefBuilder_Errf(ctx, "unexpected submsg for (%s)", m->full_name); + switch (upb_FieldDef_CType(f)) { + case kUpb_CType_Int32: { + long val = strtol(str, &end, 0); + if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || *end) { + goto invalid; } - UPB_ASSERT(mt_f); - UPB_ASSERT(sub_m->layout); - if (UPB_UNLIKELY(!upb_MiniTable_SetSubMessage(mt, mt_f, sub_m->layout))) { - _upb_DefBuilder_Errf(ctx, "invalid submsg for (%s)", m->full_name); + f->defaultval.sint = val; + break; + } + case kUpb_CType_Enum: { + const upb_EnumDef* e = f->sub.enumdef; + const upb_EnumValueDef* ev = + upb_EnumDef_FindValueByNameWithSize(e, str, len); + if (!ev) { + goto invalid; } - } else if (_upb_FieldDef_IsClosedEnum(f)) { - const upb_MiniTableEnum* mt_e = _upb_EnumDef_MiniTable(sub_e); - if (UPB_UNLIKELY(!upb_MiniTable_SetSubEnum(mt, mt_f, mt_e))) { - _upb_DefBuilder_Errf(ctx, "invalid subenum for (%s)", m->full_name); + f->defaultval.sint = upb_EnumValueDef_Number(ev); + break; + } + case kUpb_CType_Int64: { + long long val = strtoll(str, &end, 0); + if (val > INT64_MAX || val < INT64_MIN || errno == ERANGE || *end) { + goto invalid; } + f->defaultval.sint = val; + break; } - } - -#ifndef NDEBUG - for (int i = 0; i < m->field_count; i++) { - const upb_FieldDef* f = upb_MessageDef_Field(m, i); - const int layout_index = _upb_FieldDef_LayoutIndex(f); - UPB_ASSERT(layout_index < m->layout->field_count); - const upb_MiniTableField* mt_f = &m->layout->fields[layout_index]; - UPB_ASSERT(upb_FieldDef_Type(f) == upb_MiniTableField_Type(mt_f)); - UPB_ASSERT(upb_FieldDef_CType(f) == upb_MiniTableField_CType(mt_f)); - UPB_ASSERT(upb_FieldDef_HasPresence(f) == - upb_MiniTableField_HasPresence(mt_f)); - } -#endif -} - -static uint64_t _upb_MessageDef_Modifiers(const upb_MessageDef* m) { - uint64_t out = 0; - if (upb_FileDef_Syntax(m->file) == kUpb_Syntax_Proto3) { - out |= kUpb_MessageModifier_ValidateUtf8; - out |= kUpb_MessageModifier_DefaultIsPacked; - } - if (m->ext_range_count) { - out |= kUpb_MessageModifier_IsExtendable; - } - return out; -} - -static bool _upb_MessageDef_EncodeMap(upb_DescState* s, const upb_MessageDef* m, - upb_Arena* a) { - if (m->field_count != 2) return false; - - const upb_FieldDef* key_field = upb_MessageDef_Field(m, 0); - const upb_FieldDef* val_field = upb_MessageDef_Field(m, 1); - if (key_field == NULL || val_field == NULL) return false; + case kUpb_CType_UInt32: { + unsigned long val = strtoul(str, &end, 0); + if (val > UINT32_MAX || errno == ERANGE || *end) { + goto invalid; + } + f->defaultval.uint = val; + break; + } + case kUpb_CType_UInt64: { + unsigned long long val = strtoull(str, &end, 0); + if (val > UINT64_MAX || errno == ERANGE || *end) { + goto invalid; + } + f->defaultval.uint = val; + break; + } + case kUpb_CType_Double: { + double val = strtod(str, &end); + if (errno == ERANGE || *end) { + goto invalid; + } + f->defaultval.dbl = val; + break; + } + case kUpb_CType_Float: { + float val = strtof(str, &end); + if (errno == ERANGE || *end) { + goto invalid; + } + f->defaultval.flt = val; + break; + } + case kUpb_CType_Bool: { + if (streql2(str, len, "false")) { + f->defaultval.boolean = false; + } else if (streql2(str, len, "true")) { + f->defaultval.boolean = true; + } else { + goto invalid; + } + break; + } + case kUpb_CType_String: + f->defaultval.str = newstr(ctx, str, len); + break; + case kUpb_CType_Bytes: + f->defaultval.str = unescape(ctx, f, str, len); + break; + case kUpb_CType_Message: + /* Should not have a default value. */ + _upb_DefBuilder_Errf(ctx, "Message should not have a default (%s)", + upb_FieldDef_FullName(f)); + } - UPB_ASSERT(_upb_FieldDef_LayoutIndex(key_field) == 0); - UPB_ASSERT(_upb_FieldDef_LayoutIndex(val_field) == 1); + return; - s->ptr = upb_MtDataEncoder_EncodeMap( - &s->e, s->ptr, upb_FieldDef_Type(key_field), upb_FieldDef_Type(val_field), - _upb_FieldDef_Modifiers(key_field), _upb_FieldDef_Modifiers(val_field)); - return true; +invalid: + _upb_DefBuilder_Errf(ctx, "Invalid default '%.*s' for field %s of type %d", + (int)len, str, upb_FieldDef_FullName(f), + (int)upb_FieldDef_Type(f)); } -static bool _upb_MessageDef_EncodeMessage(upb_DescState* s, - const upb_MessageDef* m, - upb_Arena* a) { - const upb_FieldDef** sorted = NULL; - if (!m->is_sorted) { - sorted = _upb_FieldDefs_Sorted(m->fields, m->field_count, a); - if (!sorted) return false; +static void set_default_default(upb_DefBuilder* ctx, upb_FieldDef* f) { + switch (upb_FieldDef_CType(f)) { + case kUpb_CType_Int32: + case kUpb_CType_Int64: + f->defaultval.sint = 0; + break; + case kUpb_CType_UInt64: + case kUpb_CType_UInt32: + f->defaultval.uint = 0; + break; + case kUpb_CType_Double: + case kUpb_CType_Float: + f->defaultval.dbl = 0; + break; + case kUpb_CType_String: + case kUpb_CType_Bytes: + f->defaultval.str = newstr(ctx, NULL, 0); + break; + case kUpb_CType_Bool: + f->defaultval.boolean = false; + break; + case kUpb_CType_Enum: { + const upb_EnumValueDef* v = upb_EnumDef_Value(f->sub.enumdef, 0); + f->defaultval.sint = upb_EnumValueDef_Number(v); + break; + } + case kUpb_CType_Message: + break; } +} - s->ptr = upb_MtDataEncoder_StartMessage(&s->e, s->ptr, - _upb_MessageDef_Modifiers(m)); - - for (int i = 0; i < m->field_count; i++) { - const upb_FieldDef* f = sorted ? sorted[i] : upb_MessageDef_Field(m, i); - const upb_FieldType type = upb_FieldDef_Type(f); - const int number = upb_FieldDef_Number(f); - const uint64_t modifiers = _upb_FieldDef_Modifiers(f); +static void _upb_FieldDef_Create(upb_DefBuilder* ctx, const char* prefix, + const UPB_DESC(FieldDescriptorProto) * + field_proto, + upb_MessageDef* m, upb_FieldDef* f) { + // Must happen before _upb_DefBuilder_Add() + f->file = _upb_DefBuilder_File(ctx); - if (!_upb_DescState_Grow(s, a)) return false; - s->ptr = upb_MtDataEncoder_PutField(&s->e, s->ptr, type, number, modifiers); + if (!UPB_DESC(FieldDescriptorProto_has_name)(field_proto)) { + _upb_DefBuilder_Errf(ctx, "field has no name"); } - for (int i = 0; i < m->real_oneof_count; i++) { - if (!_upb_DescState_Grow(s, a)) return false; - s->ptr = upb_MtDataEncoder_StartOneof(&s->e, s->ptr); + const upb_StringView name = UPB_DESC(FieldDescriptorProto_name)(field_proto); - const upb_OneofDef* o = upb_MessageDef_Oneof(m, i); - const int field_count = upb_OneofDef_FieldCount(o); - for (int j = 0; j < field_count; j++) { - const int number = upb_FieldDef_Number(upb_OneofDef_Field(o, j)); + f->full_name = _upb_DefBuilder_MakeFullName(ctx, prefix, name); + f->label_ = (int)UPB_DESC(FieldDescriptorProto_label)(field_proto); + f->number_ = UPB_DESC(FieldDescriptorProto_number)(field_proto); + f->is_proto3_optional = + UPB_DESC(FieldDescriptorProto_proto3_optional)(field_proto); + f->msgdef = m; + f->scope.oneof = NULL; - if (!_upb_DescState_Grow(s, a)) return false; - s->ptr = upb_MtDataEncoder_PutOneofField(&s->e, s->ptr, number); - } + f->has_json_name = UPB_DESC(FieldDescriptorProto_has_json_name)(field_proto); + if (f->has_json_name) { + const upb_StringView sv = + UPB_DESC(FieldDescriptorProto_json_name)(field_proto); + f->json_name = upb_strdup2(sv.data, sv.size, ctx->arena); + } else { + f->json_name = make_json_name(name.data, name.size, ctx->arena); } + if (!f->json_name) _upb_DefBuilder_OomErr(ctx); - return true; -} - -static bool _upb_MessageDef_EncodeMessageSet(upb_DescState* s, - const upb_MessageDef* m, - upb_Arena* a) { - s->ptr = upb_MtDataEncoder_EncodeMessageSet(&s->e, s->ptr); - - return true; -} + const bool has_type = UPB_DESC(FieldDescriptorProto_has_type)(field_proto); + const bool has_type_name = + UPB_DESC(FieldDescriptorProto_has_type_name)(field_proto); -bool upb_MessageDef_MiniDescriptorEncode(const upb_MessageDef* m, upb_Arena* a, - upb_StringView* out) { - upb_DescState s; - _upb_DescState_Init(&s); + f->type_ = (int)UPB_DESC(FieldDescriptorProto_type)(field_proto); - if (!_upb_DescState_Grow(&s, a)) return false; + if (has_type) { + switch (f->type_) { + case kUpb_FieldType_Message: + case kUpb_FieldType_Group: + case kUpb_FieldType_Enum: + if (!has_type_name) { + _upb_DefBuilder_Errf(ctx, "field of type %d requires type name (%s)", + (int)f->type_, f->full_name); + } + break; + default: + if (has_type_name) { + _upb_DefBuilder_Errf( + ctx, "invalid type for field with type_name set (%s, %d)", + f->full_name, (int)f->type_); + } + } + } - if (upb_MessageDef_IsMapEntry(m)) { - if (!_upb_MessageDef_EncodeMap(&s, m, a)) return false; - } else if (UPB_DESC(MessageOptions_message_set_wire_format)(m->opts)) { - if (!_upb_MessageDef_EncodeMessageSet(&s, m, a)) return false; + if (!has_type && has_type_name) { + f->type_ = + UPB_FIELD_TYPE_UNSPECIFIED; // We'll assign this in resolve_subdef() } else { - if (!_upb_MessageDef_EncodeMessage(&s, m, a)) return false; + if (f->type_ < kUpb_FieldType_Double || f->type_ > kUpb_FieldType_SInt64) { + _upb_DefBuilder_Errf(ctx, "invalid type for field %s (%d)", f->full_name, + f->type_); + } } - if (!_upb_DescState_Grow(&s, a)) return false; - *s.ptr = '\0'; + if (f->label_ < kUpb_Label_Optional || f->label_ > kUpb_Label_Repeated) { + _upb_DefBuilder_Errf(ctx, "invalid label for field %s (%d)", f->full_name, + f->label_); + } - out->data = s.buf; - out->size = s.ptr - s.buf; - return true; -} + /* We can't resolve the subdef or (in the case of extensions) the containing + * message yet, because it may not have been defined yet. We stash a pointer + * to the field_proto until later when we can properly resolve it. */ + f->sub.unresolved = field_proto; -static upb_StringView* _upb_ReservedNames_New(upb_DefBuilder* ctx, int n, - const upb_StringView* protos) { - upb_StringView* sv = _upb_DefBuilder_Alloc(ctx, sizeof(upb_StringView) * n); - for (size_t i = 0; i < n; i++) { - sv[i].data = - upb_strdup2(protos[i].data, protos[i].size, _upb_DefBuilder_Arena(ctx)); - sv[i].size = protos[i].size; + if (f->label_ == kUpb_Label_Required && + upb_FileDef_Syntax(f->file) == kUpb_Syntax_Proto3) { + _upb_DefBuilder_Errf(ctx, "proto3 fields cannot be required (%s)", + f->full_name); } - return sv; -} -static void create_msgdef(upb_DefBuilder* ctx, const char* prefix, - const UPB_DESC(DescriptorProto) * msg_proto, - const upb_MessageDef* containing_type, - upb_MessageDef* m) { - const UPB_DESC(OneofDescriptorProto)* const* oneofs; - const UPB_DESC(FieldDescriptorProto)* const* fields; - const UPB_DESC(DescriptorProto_ExtensionRange)* const* ext_ranges; - const UPB_DESC(DescriptorProto_ReservedRange)* const* res_ranges; - const upb_StringView* res_names; - size_t n_oneof, n_field, n_enum, n_ext, n_msg; - size_t n_ext_range, n_res_range, n_res_name; - upb_StringView name; + if (UPB_DESC(FieldDescriptorProto_has_oneof_index)(field_proto)) { + int oneof_index = UPB_DESC(FieldDescriptorProto_oneof_index)(field_proto); - // Must happen before _upb_DefBuilder_Add() - m->file = _upb_DefBuilder_File(ctx); + if (upb_FieldDef_Label(f) != kUpb_Label_Optional) { + _upb_DefBuilder_Errf(ctx, "fields in oneof must have OPTIONAL label (%s)", + f->full_name); + } - m->containing_type = containing_type; - m->is_sorted = true; + if (!m) { + _upb_DefBuilder_Errf(ctx, "oneof field (%s) has no containing msg", + f->full_name); + } - name = UPB_DESC(DescriptorProto_name)(msg_proto); + if (oneof_index >= upb_MessageDef_OneofCount(m)) { + _upb_DefBuilder_Errf(ctx, "oneof_index out of range (%s)", f->full_name); + } - m->full_name = _upb_DefBuilder_MakeFullName(ctx, prefix, name); - _upb_DefBuilder_Add(ctx, m->full_name, _upb_DefType_Pack(m, UPB_DEFTYPE_MSG)); - - oneofs = UPB_DESC(DescriptorProto_oneof_decl)(msg_proto, &n_oneof); - fields = UPB_DESC(DescriptorProto_field)(msg_proto, &n_field); - ext_ranges = - UPB_DESC(DescriptorProto_extension_range)(msg_proto, &n_ext_range); - res_ranges = - UPB_DESC(DescriptorProto_reserved_range)(msg_proto, &n_res_range); - res_names = UPB_DESC(DescriptorProto_reserved_name)(msg_proto, &n_res_name); - - bool ok = upb_inttable_init(&m->itof, ctx->arena); - if (!ok) _upb_DefBuilder_OomErr(ctx); - - ok = upb_strtable_init(&m->ntof, n_oneof + n_field, ctx->arena); - if (!ok) _upb_DefBuilder_OomErr(ctx); - - UPB_DEF_SET_OPTIONS(m->opts, DescriptorProto, MessageOptions, msg_proto); + upb_OneofDef* oneof = (upb_OneofDef*)upb_MessageDef_Oneof(m, oneof_index); + f->scope.oneof = oneof; - m->oneof_count = n_oneof; - m->oneofs = _upb_OneofDefs_New(ctx, n_oneof, oneofs, m); + _upb_OneofDef_Insert(ctx, oneof, f, name.data, name.size); + } - m->field_count = n_field; - m->fields = - _upb_FieldDefs_New(ctx, n_field, fields, m->full_name, m, &m->is_sorted); + UPB_DEF_SET_OPTIONS(f->opts, FieldDescriptorProto, FieldOptions, field_proto); - // Message Sets may not contain fields. - if (UPB_UNLIKELY(UPB_DESC(MessageOptions_message_set_wire_format)(m->opts))) { - if (UPB_UNLIKELY(n_field > 0)) { - _upb_DefBuilder_Errf(ctx, "invalid message set (%s)", m->full_name); - } + if (UPB_DESC(FieldOptions_has_packed)(f->opts)) { + f->is_packed = UPB_DESC(FieldOptions_packed)(f->opts); + } else { + // Repeated fields default to packed for proto3 only. + f->is_packed = has_type && upb_FieldDef_IsPrimitive(f) && + f->label_ == kUpb_Label_Repeated && + upb_FileDef_Syntax(f->file) == kUpb_Syntax_Proto3; } - m->ext_range_count = n_ext_range; - m->ext_ranges = _upb_ExtensionRanges_New(ctx, n_ext_range, ext_ranges, m); + f->has_presence = + (!upb_FieldDef_IsRepeated(f)) && + (f->type_ == kUpb_FieldType_Message || f->type_ == kUpb_FieldType_Group || + upb_FieldDef_ContainingOneof(f) || + (upb_FileDef_Syntax(f->file) == kUpb_Syntax_Proto2)); +} - m->res_range_count = n_res_range; - m->res_ranges = - _upb_MessageReservedRanges_New(ctx, n_res_range, res_ranges, m); +static void _upb_FieldDef_CreateExt(upb_DefBuilder* ctx, const char* prefix, + const UPB_DESC(FieldDescriptorProto) * + field_proto, + upb_MessageDef* m, upb_FieldDef* f) { + f->is_extension = true; + _upb_FieldDef_Create(ctx, prefix, field_proto, m, f); - m->res_name_count = n_res_name; - m->res_names = _upb_ReservedNames_New(ctx, n_res_name, res_names); + if (UPB_DESC(FieldDescriptorProto_has_oneof_index)(field_proto)) { + _upb_DefBuilder_Errf(ctx, "oneof_index provided for extension field (%s)", + f->full_name); + } - const size_t synthetic_count = _upb_OneofDefs_Finalize(ctx, m); - m->real_oneof_count = m->oneof_count - synthetic_count; + f->scope.extension_scope = m; + _upb_DefBuilder_Add(ctx, f->full_name, _upb_DefType_Pack(f, UPB_DEFTYPE_EXT)); + f->layout_index = ctx->ext_count++; - assign_msg_wellknowntype(m); - upb_inttable_compact(&m->itof, ctx->arena); + if (ctx->layout) { + UPB_ASSERT(_upb_FieldDef_ExtensionMiniTable(f)->field.number == f->number_); + } +} - const UPB_DESC(EnumDescriptorProto)* const* enums = - UPB_DESC(DescriptorProto_enum_type)(msg_proto, &n_enum); - m->nested_enum_count = n_enum; - m->nested_enums = _upb_EnumDefs_New(ctx, n_enum, enums, m); +static void _upb_FieldDef_CreateNotExt(upb_DefBuilder* ctx, const char* prefix, + const UPB_DESC(FieldDescriptorProto) * + field_proto, + upb_MessageDef* m, upb_FieldDef* f) { + f->is_extension = false; + _upb_FieldDef_Create(ctx, prefix, field_proto, m, f); - const UPB_DESC(FieldDescriptorProto)* const* exts = - UPB_DESC(DescriptorProto_extension)(msg_proto, &n_ext); - m->nested_ext_count = n_ext; - m->nested_exts = _upb_Extensions_New(ctx, n_ext, exts, m->full_name, m); + if (!UPB_DESC(FieldDescriptorProto_has_oneof_index)(field_proto)) { + if (f->is_proto3_optional) { + _upb_DefBuilder_Errf( + ctx, + "non-extension field (%s) with proto3_optional was not in a oneof", + f->full_name); + } + } - const UPB_DESC(DescriptorProto)* const* msgs = - UPB_DESC(DescriptorProto_nested_type)(msg_proto, &n_msg); - m->nested_msg_count = n_msg; - m->nested_msgs = _upb_MessageDefs_New(ctx, n_msg, msgs, m); + _upb_MessageDef_InsertField(ctx, m, f); } -// Allocate and initialize an array of |n| message defs. -upb_MessageDef* _upb_MessageDefs_New( - upb_DefBuilder* ctx, int n, const UPB_DESC(DescriptorProto) * const* protos, - const upb_MessageDef* containing_type) { - _upb_DefType_CheckPadding(sizeof(upb_MessageDef)); - - const char* name = containing_type ? containing_type->full_name - : _upb_FileDef_RawPackage(ctx->file); +upb_FieldDef* _upb_Extensions_New( + upb_DefBuilder* ctx, int n, + const UPB_DESC(FieldDescriptorProto) * const* protos, const char* prefix, + upb_MessageDef* m) { + _upb_DefType_CheckPadding(sizeof(upb_FieldDef)); + upb_FieldDef* defs = + (upb_FieldDef*)_upb_DefBuilder_Alloc(ctx, sizeof(upb_FieldDef) * n); - upb_MessageDef* m = _upb_DefBuilder_Alloc(ctx, sizeof(upb_MessageDef) * n); for (int i = 0; i < n; i++) { - create_msgdef(ctx, name, protos[i], containing_type, &m[i]); + upb_FieldDef* f = &defs[i]; + + _upb_FieldDef_CreateExt(ctx, prefix, protos[i], m, f); + f->index_ = i; } - return m; + + return defs; } +upb_FieldDef* _upb_FieldDefs_New( + upb_DefBuilder* ctx, int n, + const UPB_DESC(FieldDescriptorProto) * const* protos, const char* prefix, + upb_MessageDef* m, bool* is_sorted) { + _upb_DefType_CheckPadding(sizeof(upb_FieldDef)); + upb_FieldDef* defs = + (upb_FieldDef*)_upb_DefBuilder_Alloc(ctx, sizeof(upb_FieldDef) * n); + + uint32_t previous = 0; + for (int i = 0; i < n; i++) { + upb_FieldDef* f = &defs[i]; -// Must be last. + _upb_FieldDef_CreateNotExt(ctx, prefix, protos[i], m, f); + f->index_ = i; + if (!ctx->layout) { + // Speculate that the def fields are sorted. We will always sort the + // MiniTable fields, so if defs are sorted then indices will match. + // + // If this is incorrect, we will overwrite later. + f->layout_index = i; + } -struct upb_MessageReservedRange { - int32_t start; - int32_t end; -}; + const uint32_t current = f->number_; + if (previous > current) *is_sorted = false; + previous = current; + } -upb_MessageReservedRange* _upb_MessageReservedRange_At( - const upb_MessageReservedRange* r, int i) { - return (upb_MessageReservedRange*)&r[i]; + return defs; } -int32_t upb_MessageReservedRange_Start(const upb_MessageReservedRange* r) { - return r->start; +static void resolve_subdef(upb_DefBuilder* ctx, const char* prefix, + upb_FieldDef* f) { + const UPB_DESC(FieldDescriptorProto)* field_proto = f->sub.unresolved; + upb_StringView name = UPB_DESC(FieldDescriptorProto_type_name)(field_proto); + bool has_name = UPB_DESC(FieldDescriptorProto_has_type_name)(field_proto); + switch ((int)f->type_) { + case UPB_FIELD_TYPE_UNSPECIFIED: { + // Type was not specified and must be inferred. + UPB_ASSERT(has_name); + upb_deftype_t type; + const void* def = + _upb_DefBuilder_ResolveAny(ctx, f->full_name, prefix, name, &type); + switch (type) { + case UPB_DEFTYPE_ENUM: + f->sub.enumdef = def; + f->type_ = kUpb_FieldType_Enum; + if (!UPB_DESC(FieldOptions_has_packed)(f->opts)) { + f->is_packed = f->label_ == kUpb_Label_Repeated && + upb_FileDef_Syntax(f->file) == kUpb_Syntax_Proto3; + } + break; + case UPB_DEFTYPE_MSG: + f->sub.msgdef = def; + f->type_ = kUpb_FieldType_Message; // It appears there is no way of + // this being a group. + f->has_presence = !upb_FieldDef_IsRepeated(f); + break; + default: + _upb_DefBuilder_Errf(ctx, "Couldn't resolve type name for field %s", + f->full_name); + } + break; + } + case kUpb_FieldType_Message: + case kUpb_FieldType_Group: + UPB_ASSERT(has_name); + f->sub.msgdef = _upb_DefBuilder_Resolve(ctx, f->full_name, prefix, name, + UPB_DEFTYPE_MSG); + break; + case kUpb_FieldType_Enum: + UPB_ASSERT(has_name); + f->sub.enumdef = _upb_DefBuilder_Resolve(ctx, f->full_name, prefix, name, + UPB_DEFTYPE_ENUM); + break; + default: + // No resolution necessary. + break; + } } -int32_t upb_MessageReservedRange_End(const upb_MessageReservedRange* r) { - return r->end; + +static int _upb_FieldDef_Compare(const void* p1, const void* p2) { + const uint32_t v1 = (*(upb_FieldDef**)p1)->number_; + const uint32_t v2 = (*(upb_FieldDef**)p2)->number_; + return (v1 < v2) ? -1 : (v1 > v2); } -upb_MessageReservedRange* _upb_MessageReservedRanges_New( - upb_DefBuilder* ctx, int n, - const UPB_DESC(DescriptorProto_ReservedRange) * const* protos, - const upb_MessageDef* m) { - upb_MessageReservedRange* r = - _upb_DefBuilder_Alloc(ctx, sizeof(upb_MessageReservedRange) * n); +// _upb_FieldDefs_Sorted() is mostly a pure function of its inputs, but has one +// critical side effect that we depend on: it sets layout_index appropriately +// for non-sorted lists of fields. +const upb_FieldDef** _upb_FieldDefs_Sorted(const upb_FieldDef* f, int n, + upb_Arena* a) { + // TODO(salo): Replace this arena alloc with a persistent scratch buffer. + upb_FieldDef** out = (upb_FieldDef**)upb_Arena_Malloc(a, n * sizeof(void*)); + if (!out) return NULL; for (int i = 0; i < n; i++) { - const int32_t start = - UPB_DESC(DescriptorProto_ReservedRange_start)(protos[i]); - const int32_t end = UPB_DESC(DescriptorProto_ReservedRange_end)(protos[i]); - const int32_t max = kUpb_MaxFieldNumber + 1; - - // A full validation would also check that each range is disjoint, and that - // none of the fields overlap with the extension ranges, but we are just - // sanity checking here. - if (start < 1 || end <= start || end > max) { - _upb_DefBuilder_Errf(ctx, - "Reserved range (%d, %d) is invalid, message=%s\n", - (int)start, (int)end, upb_MessageDef_FullName(m)); - } - - r[i].start = start; - r[i].end = end; + out[i] = (upb_FieldDef*)&f[i]; } + qsort(out, n, sizeof(void*), _upb_FieldDef_Compare); - return r; + for (int i = 0; i < n; i++) { + out[i]->layout_index = i; + } + return (const upb_FieldDef**)out; } +bool upb_FieldDef_MiniDescriptorEncode(const upb_FieldDef* f, upb_Arena* a, + upb_StringView* out) { + UPB_ASSERT(f->is_extension); -// Must be last. + upb_DescState s; + _upb_DescState_Init(&s); -struct upb_MethodDef { - const UPB_DESC(MethodOptions) * opts; - upb_ServiceDef* service; - const char* full_name; - const upb_MessageDef* input_type; - const upb_MessageDef* output_type; - int index; - bool client_streaming; - bool server_streaming; -}; - -upb_MethodDef* _upb_MethodDef_At(const upb_MethodDef* m, int i) { - return (upb_MethodDef*)&m[i]; -} + const int number = upb_FieldDef_Number(f); + const uint64_t modifiers = _upb_FieldDef_Modifiers(f); -const upb_ServiceDef* upb_MethodDef_Service(const upb_MethodDef* m) { - return m->service; -} + if (!_upb_DescState_Grow(&s, a)) return false; + s.ptr = upb_MtDataEncoder_EncodeExtension(&s.e, s.ptr, f->type_, number, + modifiers); + *s.ptr = '\0'; -const UPB_DESC(MethodOptions) * upb_MethodDef_Options(const upb_MethodDef* m) { - return m->opts; + out->data = s.buf; + out->size = s.ptr - s.buf; + return true; } -bool upb_MethodDef_HasOptions(const upb_MethodDef* m) { - return m->opts != (void*)kUpbDefOptDefault; -} +static void resolve_extension(upb_DefBuilder* ctx, const char* prefix, + upb_FieldDef* f, + const UPB_DESC(FieldDescriptorProto) * + field_proto) { + if (!UPB_DESC(FieldDescriptorProto_has_extendee)(field_proto)) { + _upb_DefBuilder_Errf(ctx, "extension for field '%s' had no extendee", + f->full_name); + } -const char* upb_MethodDef_FullName(const upb_MethodDef* m) { - return m->full_name; -} + upb_StringView name = UPB_DESC(FieldDescriptorProto_extendee)(field_proto); + const upb_MessageDef* m = + _upb_DefBuilder_Resolve(ctx, f->full_name, prefix, name, UPB_DEFTYPE_MSG); + f->msgdef = m; -const char* upb_MethodDef_Name(const upb_MethodDef* m) { - return _upb_DefBuilder_FullToShort(m->full_name); + if (!_upb_MessageDef_IsValidExtensionNumber(m, f->number_)) { + _upb_DefBuilder_Errf( + ctx, + "field number %u in extension %s has no extension range in message %s", + (unsigned)f->number_, f->full_name, upb_MessageDef_FullName(m)); + } } -int upb_MethodDef_Index(const upb_MethodDef* m) { return m->index; } +void _upb_FieldDef_BuildMiniTableExtension(upb_DefBuilder* ctx, + const upb_FieldDef* f) { + const upb_MiniTableExtension* ext = _upb_FieldDef_ExtensionMiniTable(f); -const upb_MessageDef* upb_MethodDef_InputType(const upb_MethodDef* m) { - return m->input_type; -} + if (ctx->layout) { + UPB_ASSERT(upb_FieldDef_Number(f) == ext->field.number); + } else { + upb_StringView desc; + if (!upb_FieldDef_MiniDescriptorEncode(f, ctx->tmp_arena, &desc)) { + _upb_DefBuilder_OomErr(ctx); + } -const upb_MessageDef* upb_MethodDef_OutputType(const upb_MethodDef* m) { - return m->output_type; -} + upb_MiniTableExtension* mut_ext = (upb_MiniTableExtension*)ext; + upb_MiniTableSub sub = {NULL}; + if (upb_FieldDef_IsSubMessage(f)) { + sub.submsg = upb_MessageDef_MiniTable(f->sub.msgdef); + } else if (_upb_FieldDef_IsClosedEnum(f)) { + sub.subenum = _upb_EnumDef_MiniTable(f->sub.enumdef); + } + bool ok2 = upb_MiniTableExtension_Init(desc.data, desc.size, mut_ext, + upb_MessageDef_MiniTable(f->msgdef), + sub, ctx->status); + if (!ok2) _upb_DefBuilder_Errf(ctx, "Could not build extension mini table"); + } -bool upb_MethodDef_ClientStreaming(const upb_MethodDef* m) { - return m->client_streaming; + bool ok = _upb_DefPool_InsertExt(ctx->symtab, ext, f); + if (!ok) _upb_DefBuilder_OomErr(ctx); } -bool upb_MethodDef_ServerStreaming(const upb_MethodDef* m) { - return m->server_streaming; -} +static void resolve_default(upb_DefBuilder* ctx, upb_FieldDef* f, + const UPB_DESC(FieldDescriptorProto) * + field_proto) { + // Have to delay resolving of the default value until now because of the enum + // case, since enum defaults are specified with a label. + if (UPB_DESC(FieldDescriptorProto_has_default_value)(field_proto)) { + upb_StringView defaultval = + UPB_DESC(FieldDescriptorProto_default_value)(field_proto); -static void create_method(upb_DefBuilder* ctx, - const UPB_DESC(MethodDescriptorProto) * method_proto, - upb_ServiceDef* s, upb_MethodDef* m) { - upb_StringView name = UPB_DESC(MethodDescriptorProto_name)(method_proto); + if (upb_FileDef_Syntax(f->file) == kUpb_Syntax_Proto3) { + _upb_DefBuilder_Errf(ctx, + "proto3 fields cannot have explicit defaults (%s)", + f->full_name); + } - m->service = s; - m->full_name = - _upb_DefBuilder_MakeFullName(ctx, upb_ServiceDef_FullName(s), name); - m->client_streaming = - UPB_DESC(MethodDescriptorProto_client_streaming)(method_proto); - m->server_streaming = - UPB_DESC(MethodDescriptorProto_server_streaming)(method_proto); - m->input_type = _upb_DefBuilder_Resolve( - ctx, m->full_name, m->full_name, - UPB_DESC(MethodDescriptorProto_input_type)(method_proto), - UPB_DEFTYPE_MSG); - m->output_type = _upb_DefBuilder_Resolve( - ctx, m->full_name, m->full_name, - UPB_DESC(MethodDescriptorProto_output_type)(method_proto), - UPB_DEFTYPE_MSG); + if (upb_FieldDef_IsSubMessage(f)) { + _upb_DefBuilder_Errf(ctx, + "message fields cannot have explicit defaults (%s)", + f->full_name); + } - UPB_DEF_SET_OPTIONS(m->opts, MethodDescriptorProto, MethodOptions, - method_proto); + parse_default(ctx, defaultval.data, defaultval.size, f); + f->has_default = true; + } else { + set_default_default(ctx, f); + f->has_default = false; + } } -// Allocate and initialize an array of |n| method defs belonging to |s|. -upb_MethodDef* _upb_MethodDefs_New( - upb_DefBuilder* ctx, int n, - const UPB_DESC(MethodDescriptorProto) * const* protos, upb_ServiceDef* s) { - upb_MethodDef* m = _upb_DefBuilder_Alloc(ctx, sizeof(upb_MethodDef) * n); - for (int i = 0; i < n; i++) { - create_method(ctx, protos[i], s, &m[i]); - m[i].index = i; +void _upb_FieldDef_Resolve(upb_DefBuilder* ctx, const char* prefix, + upb_FieldDef* f) { + // We have to stash this away since resolve_subdef() may overwrite it. + const UPB_DESC(FieldDescriptorProto)* field_proto = f->sub.unresolved; + + resolve_subdef(ctx, prefix, f); + resolve_default(ctx, f, field_proto); + + if (f->is_extension) { + resolve_extension(ctx, prefix, f, field_proto); } - return m; } -#include -#include -#include // Must be last. -struct upb_OneofDef { - const UPB_DESC(OneofOptions) * opts; - const upb_MessageDef* parent; - const char* full_name; - int field_count; - bool synthetic; - const upb_FieldDef** fields; - upb_strtable ntof; // lookup a field by name - upb_inttable itof; // lookup a field by number (index) -#if UINTPTR_MAX == 0xffffffff - uint32_t padding; // Increase size to a multiple of 8. -#endif +struct upb_FileDef { + const UPB_DESC(FileOptions) * opts; + const char* name; + const char* package; + const char* edition; + + const upb_FileDef** deps; + const int32_t* public_deps; + const int32_t* weak_deps; + const upb_MessageDef* top_lvl_msgs; + const upb_EnumDef* top_lvl_enums; + const upb_FieldDef* top_lvl_exts; + const upb_ServiceDef* services; + const upb_MiniTableExtension** ext_layouts; + const upb_DefPool* symtab; + + int dep_count; + int public_dep_count; + int weak_dep_count; + int top_lvl_msg_count; + int top_lvl_enum_count; + int top_lvl_ext_count; + int service_count; + int ext_count; // All exts in the file. + upb_Syntax syntax; }; -upb_OneofDef* _upb_OneofDef_At(const upb_OneofDef* o, int i) { - return (upb_OneofDef*)&o[i]; +const UPB_DESC(FileOptions) * upb_FileDef_Options(const upb_FileDef* f) { + return f->opts; } -const UPB_DESC(OneofOptions) * upb_OneofDef_Options(const upb_OneofDef* o) { - return o->opts; +bool upb_FileDef_HasOptions(const upb_FileDef* f) { + return f->opts != (void*)kUpbDefOptDefault; } -bool upb_OneofDef_HasOptions(const upb_OneofDef* o) { - return o->opts != (void*)kUpbDefOptDefault; -} +const char* upb_FileDef_Name(const upb_FileDef* f) { return f->name; } -const char* upb_OneofDef_FullName(const upb_OneofDef* o) { - return o->full_name; +const char* upb_FileDef_Package(const upb_FileDef* f) { + return f->package ? f->package : ""; } -const char* upb_OneofDef_Name(const upb_OneofDef* o) { - return _upb_DefBuilder_FullToShort(o->full_name); +const char* upb_FileDef_Edition(const upb_FileDef* f) { + return f->edition ? f->edition : ""; } -const upb_MessageDef* upb_OneofDef_ContainingType(const upb_OneofDef* o) { - return o->parent; -} +const char* _upb_FileDef_RawPackage(const upb_FileDef* f) { return f->package; } -int upb_OneofDef_FieldCount(const upb_OneofDef* o) { return o->field_count; } +upb_Syntax upb_FileDef_Syntax(const upb_FileDef* f) { return f->syntax; } -const upb_FieldDef* upb_OneofDef_Field(const upb_OneofDef* o, int i) { - UPB_ASSERT(i < o->field_count); - return o->fields[i]; +int upb_FileDef_TopLevelMessageCount(const upb_FileDef* f) { + return f->top_lvl_msg_count; } -int upb_OneofDef_numfields(const upb_OneofDef* o) { return o->field_count; } +int upb_FileDef_DependencyCount(const upb_FileDef* f) { return f->dep_count; } -uint32_t upb_OneofDef_Index(const upb_OneofDef* o) { - // Compute index in our parent's array. - return o - upb_MessageDef_Oneof(o->parent, 0); +int upb_FileDef_PublicDependencyCount(const upb_FileDef* f) { + return f->public_dep_count; } -bool upb_OneofDef_IsSynthetic(const upb_OneofDef* o) { return o->synthetic; } - -const upb_FieldDef* upb_OneofDef_LookupNameWithSize(const upb_OneofDef* o, - const char* name, - size_t size) { - upb_value val; - return upb_strtable_lookup2(&o->ntof, name, size, &val) - ? upb_value_getptr(val) - : NULL; +int upb_FileDef_WeakDependencyCount(const upb_FileDef* f) { + return f->weak_dep_count; } -const upb_FieldDef* upb_OneofDef_LookupName(const upb_OneofDef* o, - const char* name) { - return upb_OneofDef_LookupNameWithSize(o, name, strlen(name)); +const int32_t* _upb_FileDef_PublicDependencyIndexes(const upb_FileDef* f) { + return f->public_deps; } -const upb_FieldDef* upb_OneofDef_LookupNumber(const upb_OneofDef* o, - uint32_t num) { - upb_value val; - return upb_inttable_lookup(&o->itof, num, &val) ? upb_value_getptr(val) - : NULL; +const int32_t* _upb_FileDef_WeakDependencyIndexes(const upb_FileDef* f) { + return f->weak_deps; } -void _upb_OneofDef_Insert(upb_DefBuilder* ctx, upb_OneofDef* o, - const upb_FieldDef* f, const char* name, - size_t size) { - o->field_count++; - if (_upb_FieldDef_IsProto3Optional(f)) o->synthetic = true; - - const int number = upb_FieldDef_Number(f); - const upb_value v = upb_value_constptr(f); - - // TODO(salo): This lookup is unfortunate because we also perform it when - // inserting into the message's table. Unfortunately that step occurs after - // this one and moving things around could be tricky so let's leave it for - // a future refactoring. - const bool number_exists = upb_inttable_lookup(&o->itof, number, NULL); - if (UPB_UNLIKELY(number_exists)) { - _upb_DefBuilder_Errf(ctx, "oneof fields have the same number (%d)", number); - } - - // TODO(salo): More redundant work happening here. - const bool name_exists = upb_strtable_lookup2(&o->ntof, name, size, NULL); - if (UPB_UNLIKELY(name_exists)) { - _upb_DefBuilder_Errf(ctx, "oneof fields have the same name (%.*s)", - (int)size, name); - } - - const bool ok = upb_inttable_insert(&o->itof, number, v, ctx->arena) && - upb_strtable_insert(&o->ntof, name, size, v, ctx->arena); - if (UPB_UNLIKELY(!ok)) { - _upb_DefBuilder_OomErr(ctx); - } +int upb_FileDef_TopLevelEnumCount(const upb_FileDef* f) { + return f->top_lvl_enum_count; } -// Returns the synthetic count. -size_t _upb_OneofDefs_Finalize(upb_DefBuilder* ctx, upb_MessageDef* m) { - int synthetic_count = 0; - - for (int i = 0; i < upb_MessageDef_OneofCount(m); i++) { - upb_OneofDef* o = (upb_OneofDef*)upb_MessageDef_Oneof(m, i); - - if (o->synthetic && o->field_count != 1) { - _upb_DefBuilder_Errf(ctx, - "Synthetic oneofs must have one field, not %d: %s", - o->field_count, upb_OneofDef_Name(o)); - } - - if (o->synthetic) { - synthetic_count++; - } else if (synthetic_count != 0) { - _upb_DefBuilder_Errf( - ctx, "Synthetic oneofs must be after all other oneofs: %s", - upb_OneofDef_Name(o)); - } - - o->fields = - _upb_DefBuilder_Alloc(ctx, sizeof(upb_FieldDef*) * o->field_count); - o->field_count = 0; - } - - for (int i = 0; i < upb_MessageDef_FieldCount(m); i++) { - const upb_FieldDef* f = upb_MessageDef_Field(m, i); - upb_OneofDef* o = (upb_OneofDef*)upb_FieldDef_ContainingOneof(f); - if (o) { - o->fields[o->field_count++] = f; - } - } - - return synthetic_count; +int upb_FileDef_TopLevelExtensionCount(const upb_FileDef* f) { + return f->top_lvl_ext_count; } -static void create_oneofdef(upb_DefBuilder* ctx, upb_MessageDef* m, - const UPB_DESC(OneofDescriptorProto) * oneof_proto, - const upb_OneofDef* _o) { - upb_OneofDef* o = (upb_OneofDef*)_o; - upb_StringView name = UPB_DESC(OneofDescriptorProto_name)(oneof_proto); - - o->parent = m; - o->full_name = - _upb_DefBuilder_MakeFullName(ctx, upb_MessageDef_FullName(m), name); - o->field_count = 0; - o->synthetic = false; - - UPB_DEF_SET_OPTIONS(o->opts, OneofDescriptorProto, OneofOptions, oneof_proto); - - if (upb_MessageDef_FindByNameWithSize(m, name.data, name.size, NULL, NULL)) { - _upb_DefBuilder_Errf(ctx, "duplicate oneof name (%s)", o->full_name); - } - - upb_value v = _upb_DefType_Pack(o, UPB_DEFTYPE_ONEOF); - bool ok = _upb_MessageDef_Insert(m, name.data, name.size, v, ctx->arena); - if (!ok) _upb_DefBuilder_OomErr(ctx); - - ok = upb_inttable_init(&o->itof, ctx->arena); - if (!ok) _upb_DefBuilder_OomErr(ctx); +int upb_FileDef_ServiceCount(const upb_FileDef* f) { return f->service_count; } - ok = upb_strtable_init(&o->ntof, 4, ctx->arena); - if (!ok) _upb_DefBuilder_OomErr(ctx); +const upb_FileDef* upb_FileDef_Dependency(const upb_FileDef* f, int i) { + UPB_ASSERT(0 <= i && i < f->dep_count); + return f->deps[i]; } -// Allocate and initialize an array of |n| oneof defs. -upb_OneofDef* _upb_OneofDefs_New( - upb_DefBuilder* ctx, int n, - const UPB_DESC(OneofDescriptorProto) * const* protos, upb_MessageDef* m) { - _upb_DefType_CheckPadding(sizeof(upb_OneofDef)); - - upb_OneofDef* o = _upb_DefBuilder_Alloc(ctx, sizeof(upb_OneofDef) * n); - for (int i = 0; i < n; i++) { - create_oneofdef(ctx, m, protos[i], &o[i]); - } - return o; +const upb_FileDef* upb_FileDef_PublicDependency(const upb_FileDef* f, int i) { + UPB_ASSERT(0 <= i && i < f->public_dep_count); + return f->deps[f->public_deps[i]]; } - -// Must be last. - -struct upb_ServiceDef { - const UPB_DESC(ServiceOptions) * opts; - const upb_FileDef* file; - const char* full_name; - upb_MethodDef* methods; - int method_count; - int index; -}; - -upb_ServiceDef* _upb_ServiceDef_At(const upb_ServiceDef* s, int index) { - return (upb_ServiceDef*)&s[index]; +const upb_FileDef* upb_FileDef_WeakDependency(const upb_FileDef* f, int i) { + UPB_ASSERT(0 <= i && i < f->public_dep_count); + return f->deps[f->weak_deps[i]]; } -const UPB_DESC(ServiceOptions) * - upb_ServiceDef_Options(const upb_ServiceDef* s) { - return s->opts; +const upb_MessageDef* upb_FileDef_TopLevelMessage(const upb_FileDef* f, int i) { + UPB_ASSERT(0 <= i && i < f->top_lvl_msg_count); + return _upb_MessageDef_At(f->top_lvl_msgs, i); } -bool upb_ServiceDef_HasOptions(const upb_ServiceDef* s) { - return s->opts != (void*)kUpbDefOptDefault; +const upb_EnumDef* upb_FileDef_TopLevelEnum(const upb_FileDef* f, int i) { + UPB_ASSERT(0 <= i && i < f->top_lvl_enum_count); + return _upb_EnumDef_At(f->top_lvl_enums, i); } -const char* upb_ServiceDef_FullName(const upb_ServiceDef* s) { - return s->full_name; +const upb_FieldDef* upb_FileDef_TopLevelExtension(const upb_FileDef* f, int i) { + UPB_ASSERT(0 <= i && i < f->top_lvl_ext_count); + return _upb_FieldDef_At(f->top_lvl_exts, i); } -const char* upb_ServiceDef_Name(const upb_ServiceDef* s) { - return _upb_DefBuilder_FullToShort(s->full_name); +const upb_ServiceDef* upb_FileDef_Service(const upb_FileDef* f, int i) { + UPB_ASSERT(0 <= i && i < f->service_count); + return _upb_ServiceDef_At(f->services, i); } -int upb_ServiceDef_Index(const upb_ServiceDef* s) { return s->index; } - -const upb_FileDef* upb_ServiceDef_File(const upb_ServiceDef* s) { - return s->file; -} +const upb_DefPool* upb_FileDef_Pool(const upb_FileDef* f) { return f->symtab; } -int upb_ServiceDef_MethodCount(const upb_ServiceDef* s) { - return s->method_count; +const upb_MiniTableExtension* _upb_FileDef_ExtensionMiniTable( + const upb_FileDef* f, int i) { + return f->ext_layouts[i]; } -const upb_MethodDef* upb_ServiceDef_Method(const upb_ServiceDef* s, int i) { - return (i < 0 || i >= s->method_count) ? NULL - : _upb_MethodDef_At(s->methods, i); +static char* strviewdup(upb_DefBuilder* ctx, upb_StringView view) { + char* ret = upb_strdup2(view.data, view.size, _upb_DefBuilder_Arena(ctx)); + if (!ret) _upb_DefBuilder_OomErr(ctx); + return ret; } -const upb_MethodDef* upb_ServiceDef_FindMethodByName(const upb_ServiceDef* s, - const char* name) { - for (int i = 0; i < s->method_count; i++) { - const upb_MethodDef* m = _upb_MethodDef_At(s->methods, i); - if (strcmp(name, upb_MethodDef_Name(m)) == 0) { - return m; - } - } - return NULL; +static bool streql_view(upb_StringView view, const char* b) { + return view.size == strlen(b) && memcmp(view.data, b, view.size) == 0; } -static void create_service(upb_DefBuilder* ctx, - const UPB_DESC(ServiceDescriptorProto) * svc_proto, - upb_ServiceDef* s) { - upb_StringView name; +static int count_exts_in_msg(const UPB_DESC(DescriptorProto) * msg_proto) { size_t n; + UPB_DESC(DescriptorProto_extension)(msg_proto, &n); + int ext_count = n; - // Must happen before _upb_DefBuilder_Add() - s->file = _upb_DefBuilder_File(ctx); + const UPB_DESC(DescriptorProto)* const* nested_msgs = + UPB_DESC(DescriptorProto_nested_type)(msg_proto, &n); + for (size_t i = 0; i < n; i++) { + ext_count += count_exts_in_msg(nested_msgs[i]); + } - name = UPB_DESC(ServiceDescriptorProto_name)(svc_proto); - const char* package = _upb_FileDef_RawPackage(s->file); - s->full_name = _upb_DefBuilder_MakeFullName(ctx, package, name); - _upb_DefBuilder_Add(ctx, s->full_name, - _upb_DefType_Pack(s, UPB_DEFTYPE_SERVICE)); + return ext_count; +} - const UPB_DESC(MethodDescriptorProto)* const* methods = - UPB_DESC(ServiceDescriptorProto_method)(svc_proto, &n); - s->method_count = n; - s->methods = _upb_MethodDefs_New(ctx, n, methods, s); +// Allocate and initialize one file def, and add it to the context object. +void _upb_FileDef_Create(upb_DefBuilder* ctx, + const UPB_DESC(FileDescriptorProto) * file_proto) { + upb_FileDef* file = _upb_DefBuilder_Alloc(ctx, sizeof(upb_FileDef)); + ctx->file = file; - UPB_DEF_SET_OPTIONS(s->opts, ServiceDescriptorProto, ServiceOptions, - svc_proto); -} + const UPB_DESC(DescriptorProto)* const* msgs; + const UPB_DESC(EnumDescriptorProto)* const* enums; + const UPB_DESC(FieldDescriptorProto)* const* exts; + const UPB_DESC(ServiceDescriptorProto)* const* services; + const upb_StringView* strs; + const int32_t* public_deps; + const int32_t* weak_deps; + size_t n; -upb_ServiceDef* _upb_ServiceDefs_New( - upb_DefBuilder* ctx, int n, - const UPB_DESC(ServiceDescriptorProto) * const* protos) { - _upb_DefType_CheckPadding(sizeof(upb_ServiceDef)); + file->symtab = ctx->symtab; - upb_ServiceDef* s = _upb_DefBuilder_Alloc(ctx, sizeof(upb_ServiceDef) * n); - for (int i = 0; i < n; i++) { - create_service(ctx, protos[i], &s[i]); - s[i].index = i; + // Count all extensions in the file, to build a flat array of layouts. + UPB_DESC(FileDescriptorProto_extension)(file_proto, &n); + int ext_count = n; + msgs = UPB_DESC(FileDescriptorProto_message_type)(file_proto, &n); + for (size_t i = 0; i < n; i++) { + ext_count += count_exts_in_msg(msgs[i]); } - return s; -} + file->ext_count = ext_count; + if (ctx->layout) { + // We are using the ext layouts that were passed in. + file->ext_layouts = ctx->layout->exts; + if (ctx->layout->ext_count != file->ext_count) { + _upb_DefBuilder_Errf(ctx, + "Extension count did not match layout (%d vs %d)", + ctx->layout->ext_count, file->ext_count); + } + } else { + // We are building ext layouts from scratch. + file->ext_layouts = _upb_DefBuilder_Alloc( + ctx, sizeof(*file->ext_layouts) * file->ext_count); + upb_MiniTableExtension* ext = + _upb_DefBuilder_Alloc(ctx, sizeof(*ext) * file->ext_count); + for (int i = 0; i < file->ext_count; i++) { + file->ext_layouts[i] = &ext[i]; + } + } -#include + upb_StringView name = UPB_DESC(FileDescriptorProto_name)(file_proto); + file->name = strviewdup(ctx, name); + if (strlen(file->name) != name.size) { + _upb_DefBuilder_Errf(ctx, "File name contained embedded NULL"); + } + upb_StringView package = UPB_DESC(FileDescriptorProto_package)(file_proto); -// Must be last. + if (package.size) { + _upb_DefBuilder_CheckIdentFull(ctx, package); + file->package = strviewdup(ctx, package); + } else { + file->package = NULL; + } -// A few fake field types for our tables. -enum { - kUpb_FakeFieldType_FieldNotFound = 0, - kUpb_FakeFieldType_MessageSetItem = 19, -}; + upb_StringView edition = UPB_DESC(FileDescriptorProto_edition)(file_proto); -// DecodeOp: an action to be performed for a wire-type/field-type combination. -enum { - // Special ops: we don't write data to regular fields for these. - kUpb_DecodeOp_UnknownField = -1, - kUpb_DecodeOp_MessageSetItem = -2, + if (edition.size == 0) { + file->edition = NULL; + } else { + // TODO(b/267770604): How should we validate this? + file->edition = strviewdup(ctx, edition); + if (strlen(file->edition) != edition.size) { + _upb_DefBuilder_Errf(ctx, "Edition name contained embedded NULL"); + } + } - // Scalar-only ops. - kUpb_DecodeOp_Scalar1Byte = 0, - kUpb_DecodeOp_Scalar4Byte = 2, - kUpb_DecodeOp_Scalar8Byte = 3, - kUpb_DecodeOp_Enum = 1, + if (UPB_DESC(FileDescriptorProto_has_syntax)(file_proto)) { + upb_StringView syntax = UPB_DESC(FileDescriptorProto_syntax)(file_proto); - // Scalar/repeated ops. - kUpb_DecodeOp_String = 4, - kUpb_DecodeOp_Bytes = 5, - kUpb_DecodeOp_SubMessage = 6, + if (streql_view(syntax, "proto2")) { + file->syntax = kUpb_Syntax_Proto2; + } else if (streql_view(syntax, "proto3")) { + file->syntax = kUpb_Syntax_Proto3; + } else { + _upb_DefBuilder_Errf(ctx, "Invalid syntax '" UPB_STRINGVIEW_FORMAT "'", + UPB_STRINGVIEW_ARGS(syntax)); + } + } else { + file->syntax = kUpb_Syntax_Proto2; + } - // Repeated-only ops (also see macros below). - kUpb_DecodeOp_PackedEnum = 13, -}; + // Read options. + UPB_DEF_SET_OPTIONS(file->opts, FileDescriptorProto, FileOptions, file_proto); -// For packed fields it is helpful to be able to recover the lg2 of the data -// size from the op. -#define OP_FIXPCK_LG2(n) (n + 5) /* n in [2, 3] => op in [7, 8] */ -#define OP_VARPCK_LG2(n) (n + 9) /* n in [0, 2, 3] => op in [9, 11, 12] */ + // Verify dependencies. + strs = UPB_DESC(FileDescriptorProto_dependency)(file_proto, &n); + file->dep_count = n; + file->deps = _upb_DefBuilder_Alloc(ctx, sizeof(*file->deps) * n); -typedef union { - bool bool_val; - uint32_t uint32_val; - uint64_t uint64_val; - uint32_t size; -} wireval; + for (size_t i = 0; i < n; i++) { + upb_StringView str = strs[i]; + file->deps[i] = + upb_DefPool_FindFileByNameWithSize(ctx->symtab, str.data, str.size); + if (!file->deps[i]) { + _upb_DefBuilder_Errf(ctx, + "Depends on file '" UPB_STRINGVIEW_FORMAT + "', but it has not been loaded", + UPB_STRINGVIEW_ARGS(str)); + } + } -static const char* _upb_Decoder_DecodeMessage(upb_Decoder* d, const char* ptr, - upb_Message* msg, - const upb_MiniTable* layout); + public_deps = UPB_DESC(FileDescriptorProto_public_dependency)(file_proto, &n); + file->public_dep_count = n; + file->public_deps = + _upb_DefBuilder_Alloc(ctx, sizeof(*file->public_deps) * n); + int32_t* mutable_public_deps = (int32_t*)file->public_deps; + for (size_t i = 0; i < n; i++) { + if (public_deps[i] >= file->dep_count) { + _upb_DefBuilder_Errf(ctx, "public_dep %d is out of range", + (int)public_deps[i]); + } + mutable_public_deps[i] = public_deps[i]; + } -UPB_NORETURN static void* _upb_Decoder_ErrorJmp(upb_Decoder* d, - upb_DecodeStatus status) { - assert(status != kUpb_DecodeStatus_Ok); - d->status = status; - UPB_LONGJMP(d->err, 1); -} + weak_deps = UPB_DESC(FileDescriptorProto_weak_dependency)(file_proto, &n); + file->weak_dep_count = n; + file->weak_deps = _upb_DefBuilder_Alloc(ctx, sizeof(*file->weak_deps) * n); + int32_t* mutable_weak_deps = (int32_t*)file->weak_deps; + for (size_t i = 0; i < n; i++) { + if (weak_deps[i] >= file->dep_count) { + _upb_DefBuilder_Errf(ctx, "weak_dep %d is out of range", + (int)weak_deps[i]); + } + mutable_weak_deps[i] = weak_deps[i]; + } -const char* _upb_FastDecoder_ErrorJmp(upb_Decoder* d, int status) { - assert(status != kUpb_DecodeStatus_Ok); - d->status = status; - UPB_LONGJMP(d->err, 1); - return NULL; -} + // Create enums. + enums = UPB_DESC(FileDescriptorProto_enum_type)(file_proto, &n); + file->top_lvl_enum_count = n; + file->top_lvl_enums = _upb_EnumDefs_New(ctx, n, enums, NULL); -static void _upb_Decoder_VerifyUtf8(upb_Decoder* d, const char* buf, int len) { - if (!_upb_Decoder_VerifyUtf8Inline(buf, len)) { - _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_BadUtf8); - } -} + // Create extensions. + exts = UPB_DESC(FileDescriptorProto_extension)(file_proto, &n); + file->top_lvl_ext_count = n; + file->top_lvl_exts = _upb_Extensions_New(ctx, n, exts, file->package, NULL); -static bool _upb_Decoder_Reserve(upb_Decoder* d, upb_Array* arr, size_t elem) { - bool need_realloc = arr->capacity - arr->size < elem; - if (need_realloc && !_upb_array_realloc(arr, arr->size + elem, &d->arena)) { - _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); + // Create messages. + msgs = UPB_DESC(FileDescriptorProto_message_type)(file_proto, &n); + file->top_lvl_msg_count = n; + file->top_lvl_msgs = _upb_MessageDefs_New(ctx, n, msgs, NULL); + + // Create services. + services = UPB_DESC(FileDescriptorProto_service)(file_proto, &n); + file->service_count = n; + file->services = _upb_ServiceDefs_New(ctx, n, services); + + // Now that all names are in the table, build layouts and resolve refs. + + for (int i = 0; i < file->top_lvl_msg_count; i++) { + upb_MessageDef* m = (upb_MessageDef*)upb_FileDef_TopLevelMessage(file, i); + _upb_MessageDef_Resolve(ctx, m); } - return need_realloc; -} -typedef struct { - const char* ptr; - uint64_t val; -} _upb_DecodeLongVarintReturn; + for (int i = 0; i < file->top_lvl_ext_count; i++) { + upb_FieldDef* f = (upb_FieldDef*)upb_FileDef_TopLevelExtension(file, i); + _upb_FieldDef_Resolve(ctx, file->package, f); + } -UPB_NOINLINE -static _upb_DecodeLongVarintReturn _upb_Decoder_DecodeLongVarint( - const char* ptr, uint64_t val) { - _upb_DecodeLongVarintReturn ret = {NULL, 0}; - uint64_t byte; - int i; - for (i = 1; i < 10; i++) { - byte = (uint8_t)ptr[i]; - val += (byte - 1) << (i * 7); - if (!(byte & 0x80)) { - ret.ptr = ptr + i + 1; - ret.val = val; - return ret; - } + for (int i = 0; i < file->top_lvl_msg_count; i++) { + upb_MessageDef* m = (upb_MessageDef*)upb_FileDef_TopLevelMessage(file, i); + _upb_MessageDef_CreateMiniTable(ctx, (upb_MessageDef*)m); } - return ret; -} -UPB_FORCEINLINE -static const char* _upb_Decoder_DecodeVarint(upb_Decoder* d, const char* ptr, - uint64_t* val) { - uint64_t byte = (uint8_t)*ptr; - if (UPB_LIKELY((byte & 0x80) == 0)) { - *val = byte; - return ptr + 1; - } else { - _upb_DecodeLongVarintReturn res = _upb_Decoder_DecodeLongVarint(ptr, byte); - if (!res.ptr) _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); - *val = res.val; - return res.ptr; + for (int i = 0; i < file->top_lvl_ext_count; i++) { + upb_FieldDef* f = (upb_FieldDef*)upb_FileDef_TopLevelExtension(file, i); + _upb_FieldDef_BuildMiniTableExtension(ctx, f); } -} -UPB_FORCEINLINE -static const char* _upb_Decoder_DecodeTag(upb_Decoder* d, const char* ptr, - uint32_t* val) { - uint64_t byte = (uint8_t)*ptr; - if (UPB_LIKELY((byte & 0x80) == 0)) { - *val = byte; - return ptr + 1; - } else { - const char* start = ptr; - _upb_DecodeLongVarintReturn res = _upb_Decoder_DecodeLongVarint(ptr, byte); - if (!res.ptr || res.ptr - start > 5 || res.val > UINT32_MAX) { - _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); - } - *val = res.val; - return res.ptr; + for (int i = 0; i < file->top_lvl_msg_count; i++) { + upb_MessageDef* m = (upb_MessageDef*)upb_FileDef_TopLevelMessage(file, i); + _upb_MessageDef_LinkMiniTable(ctx, m); } -} -UPB_FORCEINLINE -static const char* upb_Decoder_DecodeSize(upb_Decoder* d, const char* ptr, - uint32_t* size) { - uint64_t size64; - ptr = _upb_Decoder_DecodeVarint(d, ptr, &size64); - if (size64 >= INT32_MAX || - !upb_EpsCopyInputStream_CheckSize(&d->input, ptr, (int)size64)) { - _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); + if (file->ext_count) { + bool ok = upb_ExtensionRegistry_AddArray( + _upb_DefPool_ExtReg(ctx->symtab), file->ext_layouts, file->ext_count); + if (!ok) _upb_DefBuilder_OomErr(ctx); } - *size = size64; - return ptr; } -static void _upb_Decoder_MungeInt32(wireval* val) { - if (!_upb_IsLittleEndian()) { - /* The next stage will memcpy(dst, &val, 4) */ - val->uint32_val = val->uint64_val; - } + +#include + + +// Must be last. + +bool upb_Message_HasFieldByDef(const upb_Message* msg, const upb_FieldDef* f) { + UPB_ASSERT(upb_FieldDef_HasPresence(f)); + return upb_Message_HasField(msg, upb_FieldDef_MiniTable(f)); } -static void _upb_Decoder_Munge(int type, wireval* val) { - switch (type) { - case kUpb_FieldType_Bool: - val->bool_val = val->uint64_val != 0; - break; - case kUpb_FieldType_SInt32: { - uint32_t n = val->uint64_val; - val->uint32_val = (n >> 1) ^ -(int32_t)(n & 1); - break; - } - case kUpb_FieldType_SInt64: { - uint64_t n = val->uint64_val; - val->uint64_val = (n >> 1) ^ -(int64_t)(n & 1); - break; - } - case kUpb_FieldType_Int32: - case kUpb_FieldType_UInt32: - case kUpb_FieldType_Enum: - _upb_Decoder_MungeInt32(val); - break; +const upb_FieldDef* upb_Message_WhichOneof(const upb_Message* msg, + const upb_OneofDef* o) { + const upb_FieldDef* f = upb_OneofDef_Field(o, 0); + if (upb_OneofDef_IsSynthetic(o)) { + UPB_ASSERT(upb_OneofDef_FieldCount(o) == 1); + return upb_Message_HasFieldByDef(msg, f) ? f : NULL; + } else { + const upb_MiniTableField* field = upb_FieldDef_MiniTable(f); + uint32_t oneof_case = upb_Message_WhichOneofFieldNumber(msg, field); + f = oneof_case ? upb_OneofDef_LookupNumber(o, oneof_case) : NULL; + UPB_ASSERT((f != NULL) == (oneof_case != 0)); + return f; } } -static upb_Message* _upb_Decoder_NewSubMessage(upb_Decoder* d, - const upb_MiniTableSub* subs, - const upb_MiniTableField* field, - upb_TaggedMessagePtr* target) { - const upb_MiniTable* subl = subs[field->UPB_PRIVATE(submsg_index)].submsg; - UPB_ASSERT(subl); - upb_Message* msg = _upb_Message_New(subl, &d->arena); - if (!msg) _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); - - // Extensions should not be unlinked. A message extension should not be - // registered until its sub-message type is available to be linked. - bool is_empty = subl == &_kUpb_MiniTable_Empty; - bool is_extension = field->mode & kUpb_LabelFlags_IsExtension; - UPB_ASSERT(!(is_empty && is_extension)); +upb_MessageValue upb_Message_GetFieldByDef(const upb_Message* msg, + const upb_FieldDef* f) { + upb_MessageValue default_val = upb_FieldDef_Default(f); + upb_MessageValue ret; + _upb_Message_GetField(msg, upb_FieldDef_MiniTable(f), &default_val, &ret); + return ret; +} - if (is_empty && !(d->options & kUpb_DecodeOption_ExperimentalAllowUnlinked)) { - _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_UnlinkedSubMessage); +upb_MutableMessageValue upb_Message_Mutable(upb_Message* msg, + const upb_FieldDef* f, + upb_Arena* a) { + UPB_ASSERT(upb_FieldDef_IsSubMessage(f) || upb_FieldDef_IsRepeated(f)); + if (upb_FieldDef_HasPresence(f) && !upb_Message_HasFieldByDef(msg, f)) { + // We need to skip the upb_Message_GetFieldByDef() call in this case. + goto make; } - upb_TaggedMessagePtr tagged = _upb_TaggedMessagePtr_Pack(msg, is_empty); - memcpy(target, &tagged, sizeof(tagged)); - return msg; -} - -static upb_Message* _upb_Decoder_ReuseSubMessage( - upb_Decoder* d, const upb_MiniTableSub* subs, - const upb_MiniTableField* field, upb_TaggedMessagePtr* target) { - upb_TaggedMessagePtr tagged = *target; - const upb_MiniTable* subl = subs[field->UPB_PRIVATE(submsg_index)].submsg; - UPB_ASSERT(subl); - if (!upb_TaggedMessagePtr_IsEmpty(tagged) || subl == &_kUpb_MiniTable_Empty) { - return _upb_TaggedMessagePtr_GetMessage(tagged); + upb_MessageValue val = upb_Message_GetFieldByDef(msg, f); + if (val.array_val) { + return (upb_MutableMessageValue){.array = (upb_Array*)val.array_val}; } - // We found an empty message from a previous parse that was performed before - // this field was linked. But it is linked now, so we want to allocate a new - // message of the correct type and promote data into it before continuing. - upb_Message* existing = _upb_TaggedMessagePtr_GetEmptyMessage(tagged); - upb_Message* promoted = _upb_Decoder_NewSubMessage(d, subs, field, target); - size_t size; - const char* unknown = upb_Message_GetUnknown(existing, &size); - upb_DecodeStatus status = upb_Decode(unknown, size, promoted, subl, d->extreg, - d->options, &d->arena); - if (status != kUpb_DecodeStatus_Ok) _upb_Decoder_ErrorJmp(d, status); - return promoted; -} + upb_MutableMessageValue ret; +make: + if (!a) return (upb_MutableMessageValue){.array = NULL}; + if (upb_FieldDef_IsMap(f)) { + const upb_MessageDef* entry = upb_FieldDef_MessageSubDef(f); + const upb_FieldDef* key = + upb_MessageDef_FindFieldByNumber(entry, kUpb_MapEntry_KeyFieldNumber); + const upb_FieldDef* value = + upb_MessageDef_FindFieldByNumber(entry, kUpb_MapEntry_ValueFieldNumber); + ret.map = + upb_Map_New(a, upb_FieldDef_CType(key), upb_FieldDef_CType(value)); + } else if (upb_FieldDef_IsRepeated(f)) { + ret.array = upb_Array_New(a, upb_FieldDef_CType(f)); + } else { + UPB_ASSERT(upb_FieldDef_IsSubMessage(f)); + const upb_MessageDef* m = upb_FieldDef_MessageSubDef(f); + ret.msg = upb_Message_New(upb_MessageDef_MiniTable(m), a); + } -static const char* _upb_Decoder_ReadString(upb_Decoder* d, const char* ptr, - int size, upb_StringView* str) { - const char* str_ptr = ptr; - ptr = upb_EpsCopyInputStream_ReadString(&d->input, &str_ptr, size, &d->arena); - if (!ptr) _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); - str->data = str_ptr; - str->size = size; - return ptr; -} + val.array_val = ret.array; + upb_Message_SetFieldByDef(msg, f, val, a); -UPB_FORCEINLINE -static const char* _upb_Decoder_RecurseSubMessage(upb_Decoder* d, - const char* ptr, - upb_Message* submsg, - const upb_MiniTable* subl, - uint32_t expected_end_group) { - if (--d->depth < 0) { - _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_MaxDepthExceeded); - } - ptr = _upb_Decoder_DecodeMessage(d, ptr, submsg, subl); - d->depth++; - if (d->end_group != expected_end_group) { - _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); - } - return ptr; + return ret; } -UPB_FORCEINLINE -static const char* _upb_Decoder_DecodeSubMessage( - upb_Decoder* d, const char* ptr, upb_Message* submsg, - const upb_MiniTableSub* subs, const upb_MiniTableField* field, int size) { - int saved_delta = upb_EpsCopyInputStream_PushLimit(&d->input, ptr, size); - const upb_MiniTable* subl = subs[field->UPB_PRIVATE(submsg_index)].submsg; - UPB_ASSERT(subl); - ptr = _upb_Decoder_RecurseSubMessage(d, ptr, submsg, subl, DECODE_NOGROUP); - upb_EpsCopyInputStream_PopLimit(&d->input, ptr, saved_delta); - return ptr; +bool upb_Message_SetFieldByDef(upb_Message* msg, const upb_FieldDef* f, + upb_MessageValue val, upb_Arena* a) { + return _upb_Message_SetField(msg, upb_FieldDef_MiniTable(f), &val, a); } -UPB_FORCEINLINE -static const char* _upb_Decoder_DecodeGroup(upb_Decoder* d, const char* ptr, - upb_Message* submsg, - const upb_MiniTable* subl, - uint32_t number) { - if (_upb_Decoder_IsDone(d, &ptr)) { - _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); - } - ptr = _upb_Decoder_RecurseSubMessage(d, ptr, submsg, subl, number); - d->end_group = DECODE_NOGROUP; - return ptr; +void upb_Message_ClearFieldByDef(upb_Message* msg, const upb_FieldDef* f) { + upb_Message_ClearField(msg, upb_FieldDef_MiniTable(f)); } -UPB_FORCEINLINE -static const char* _upb_Decoder_DecodeUnknownGroup(upb_Decoder* d, - const char* ptr, - uint32_t number) { - return _upb_Decoder_DecodeGroup(d, ptr, NULL, NULL, number); +void upb_Message_ClearByDef(upb_Message* msg, const upb_MessageDef* m) { + upb_Message_Clear(msg, upb_MessageDef_MiniTable(m)); } -UPB_FORCEINLINE -static const char* _upb_Decoder_DecodeKnownGroup( - upb_Decoder* d, const char* ptr, upb_Message* submsg, - const upb_MiniTableSub* subs, const upb_MiniTableField* field) { - const upb_MiniTable* subl = subs[field->UPB_PRIVATE(submsg_index)].submsg; - UPB_ASSERT(subl); - return _upb_Decoder_DecodeGroup(d, ptr, submsg, subl, field->number); -} +bool upb_Message_Next(const upb_Message* msg, const upb_MessageDef* m, + const upb_DefPool* ext_pool, const upb_FieldDef** out_f, + upb_MessageValue* out_val, size_t* iter) { + size_t i = *iter; + size_t n = upb_MessageDef_FieldCount(m); + UPB_UNUSED(ext_pool); -static char* upb_Decoder_EncodeVarint32(uint32_t val, char* ptr) { - do { - uint8_t byte = val & 0x7fU; - val >>= 7; - if (val) byte |= 0x80U; - *(ptr++) = byte; - } while (val); - return ptr; -} + // Iterate over normal fields, returning the first one that is set. + while (++i < n) { + const upb_FieldDef* f = upb_MessageDef_Field(m, i); + const upb_MiniTableField* field = upb_FieldDef_MiniTable(f); + upb_MessageValue val = upb_Message_GetFieldByDef(msg, f); -static void _upb_Decoder_AddUnknownVarints(upb_Decoder* d, upb_Message* msg, - uint32_t val1, uint32_t val2) { - char buf[20]; - char* end = buf; - end = upb_Decoder_EncodeVarint32(val1, end); - end = upb_Decoder_EncodeVarint32(val2, end); + // Skip field if unset or empty. + if (upb_MiniTableField_HasPresence(field)) { + if (!upb_Message_HasFieldByDef(msg, f)) continue; + } else { + switch (upb_FieldMode_Get(field)) { + case kUpb_FieldMode_Map: + if (!val.map_val || upb_Map_Size(val.map_val) == 0) continue; + break; + case kUpb_FieldMode_Array: + if (!val.array_val || upb_Array_Size(val.array_val) == 0) continue; + break; + case kUpb_FieldMode_Scalar: + if (!_upb_MiniTable_ValueIsNonZero(&val, field)) continue; + break; + } + } - if (!_upb_Message_AddUnknown(msg, buf, end - buf, &d->arena)) { - _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); + *out_val = val; + *out_f = f; + *iter = i; + return true; } -} -UPB_NOINLINE -static bool _upb_Decoder_CheckEnumSlow(upb_Decoder* d, const char* ptr, - upb_Message* msg, - const upb_MiniTableEnum* e, - const upb_MiniTableField* field, - uint32_t v) { - if (_upb_MiniTable_CheckEnumValueSlow(e, v)) return true; + if (ext_pool) { + // Return any extensions that are set. + size_t count; + const upb_Message_Extension* ext = _upb_Message_Getexts(msg, &count); + if (i - n < count) { + ext += count - 1 - (i - n); + memcpy(out_val, &ext->data, sizeof(*out_val)); + *out_f = upb_DefPool_FindExtensionByMiniTable(ext_pool, ext->ext); + *iter = i; + return true; + } + } - // Unrecognized enum goes into unknown fields. - // For packed fields the tag could be arbitrarily far in the past, so we - // just re-encode the tag and value here. - uint32_t tag = ((uint32_t)field->number << 3) | kUpb_WireType_Varint; - upb_Message* unknown_msg = - field->mode & kUpb_LabelFlags_IsExtension ? d->unknown_msg : msg; - _upb_Decoder_AddUnknownVarints(d, unknown_msg, tag, v); + *iter = i; return false; } -UPB_FORCEINLINE -static bool _upb_Decoder_CheckEnum(upb_Decoder* d, const char* ptr, - upb_Message* msg, const upb_MiniTableEnum* e, - const upb_MiniTableField* field, - wireval* val) { - uint32_t v = val->uint32_val; +bool _upb_Message_DiscardUnknown(upb_Message* msg, const upb_MessageDef* m, + int depth) { + size_t iter = kUpb_Message_Begin; + const upb_FieldDef* f; + upb_MessageValue val; + bool ret = true; - _kUpb_FastEnumCheck_Status status = _upb_MiniTable_CheckEnumValueFast(e, v); - if (UPB_LIKELY(status == _kUpb_FastEnumCheck_ValueIsInEnum)) return true; - return _upb_Decoder_CheckEnumSlow(d, ptr, msg, e, field, v); -} + if (--depth == 0) return false; -UPB_NOINLINE -static const char* _upb_Decoder_DecodeEnumArray(upb_Decoder* d, const char* ptr, - upb_Message* msg, - upb_Array* arr, - const upb_MiniTableSub* subs, - const upb_MiniTableField* field, - wireval* val) { - const upb_MiniTableEnum* e = subs[field->UPB_PRIVATE(submsg_index)].subenum; - if (!_upb_Decoder_CheckEnum(d, ptr, msg, e, field, val)) return ptr; - void* mem = UPB_PTR_AT(_upb_array_ptr(arr), arr->size * 4, void); - arr->size++; - memcpy(mem, val, 4); - return ptr; -} + _upb_Message_DiscardUnknown_shallow(msg); -UPB_FORCEINLINE -static const char* _upb_Decoder_DecodeFixedPacked( - upb_Decoder* d, const char* ptr, upb_Array* arr, wireval* val, - const upb_MiniTableField* field, int lg2) { - int mask = (1 << lg2) - 1; - size_t count = val->size >> lg2; - if ((val->size & mask) != 0) { - // Length isn't a round multiple of elem size. - _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); - } - _upb_Decoder_Reserve(d, arr, count); - void* mem = UPB_PTR_AT(_upb_array_ptr(arr), arr->size << lg2, void); - arr->size += count; - // Note: if/when the decoder supports multi-buffer input, we will need to - // handle buffer seams here. - if (_upb_IsLittleEndian()) { - ptr = upb_EpsCopyInputStream_Copy(&d->input, ptr, mem, val->size); - } else { - int delta = upb_EpsCopyInputStream_PushLimit(&d->input, ptr, val->size); - char* dst = mem; - while (!_upb_Decoder_IsDone(d, &ptr)) { - if (lg2 == 2) { - ptr = upb_WireReader_ReadFixed32(ptr, dst); - dst += 4; - } else { - UPB_ASSERT(lg2 == 3); - ptr = upb_WireReader_ReadFixed64(ptr, dst); - dst += 8; + while (upb_Message_Next(msg, m, NULL /*ext_pool*/, &f, &val, &iter)) { + const upb_MessageDef* subm = upb_FieldDef_MessageSubDef(f); + if (!subm) continue; + if (upb_FieldDef_IsMap(f)) { + const upb_FieldDef* val_f = upb_MessageDef_FindFieldByNumber(subm, 2); + const upb_MessageDef* val_m = upb_FieldDef_MessageSubDef(val_f); + upb_Map* map = (upb_Map*)val.map_val; + size_t iter = kUpb_Map_Begin; + + if (!val_m) continue; + + upb_MessageValue map_key, map_val; + while (upb_Map_Next(map, &map_key, &map_val, &iter)) { + if (!_upb_Message_DiscardUnknown((upb_Message*)map_val.msg_val, val_m, + depth)) { + ret = false; + } + } + } else if (upb_FieldDef_IsRepeated(f)) { + const upb_Array* arr = val.array_val; + size_t i, n = upb_Array_Size(arr); + for (i = 0; i < n; i++) { + upb_MessageValue elem = upb_Array_Get(arr, i); + if (!_upb_Message_DiscardUnknown((upb_Message*)elem.msg_val, subm, + depth)) { + ret = false; + } + } + } else { + if (!_upb_Message_DiscardUnknown((upb_Message*)val.msg_val, subm, + depth)) { + ret = false; } } - upb_EpsCopyInputStream_PopLimit(&d->input, ptr, delta); } - return ptr; + return ret; } -UPB_FORCEINLINE -static const char* _upb_Decoder_DecodeVarintPacked( - upb_Decoder* d, const char* ptr, upb_Array* arr, wireval* val, - const upb_MiniTableField* field, int lg2) { - int scale = 1 << lg2; - int saved_limit = upb_EpsCopyInputStream_PushLimit(&d->input, ptr, val->size); - char* out = UPB_PTR_AT(_upb_array_ptr(arr), arr->size << lg2, void); - while (!_upb_Decoder_IsDone(d, &ptr)) { - wireval elem; - ptr = _upb_Decoder_DecodeVarint(d, ptr, &elem.uint64_val); - _upb_Decoder_Munge(field->UPB_PRIVATE(descriptortype), &elem); - if (_upb_Decoder_Reserve(d, arr, 1)) { - out = UPB_PTR_AT(_upb_array_ptr(arr), arr->size << lg2, void); - } - arr->size++; - memcpy(out, &elem, scale); - out += scale; - } - upb_EpsCopyInputStream_PopLimit(&d->input, ptr, saved_limit); - return ptr; +bool upb_Message_DiscardUnknown(upb_Message* msg, const upb_MessageDef* m, + int maxdepth) { + return _upb_Message_DiscardUnknown(msg, m, maxdepth); } -UPB_NOINLINE -static const char* _upb_Decoder_DecodeEnumPacked( - upb_Decoder* d, const char* ptr, upb_Message* msg, upb_Array* arr, - const upb_MiniTableSub* subs, const upb_MiniTableField* field, - wireval* val) { - const upb_MiniTableEnum* e = subs[field->UPB_PRIVATE(submsg_index)].subenum; - int saved_limit = upb_EpsCopyInputStream_PushLimit(&d->input, ptr, val->size); - char* out = UPB_PTR_AT(_upb_array_ptr(arr), arr->size * 4, void); - while (!_upb_Decoder_IsDone(d, &ptr)) { - wireval elem; - ptr = _upb_Decoder_DecodeVarint(d, ptr, &elem.uint64_val); - _upb_Decoder_MungeInt32(&elem); - if (!_upb_Decoder_CheckEnum(d, ptr, msg, e, field, &elem)) { - continue; - } - if (_upb_Decoder_Reserve(d, arr, 1)) { - out = UPB_PTR_AT(_upb_array_ptr(arr), arr->size * 4, void); - } - arr->size++; - memcpy(out, &elem, 4); - out += 4; - } - upb_EpsCopyInputStream_PopLimit(&d->input, ptr, saved_limit); - return ptr; -} -upb_Array* _upb_Decoder_CreateArray(upb_Decoder* d, - const upb_MiniTableField* field) { - /* Maps descriptor type -> elem_size_lg2. */ - static const uint8_t kElemSizeLg2[] = { - [0] = -1, // invalid descriptor type - [kUpb_FieldType_Double] = 3, - [kUpb_FieldType_Float] = 2, - [kUpb_FieldType_Int64] = 3, - [kUpb_FieldType_UInt64] = 3, - [kUpb_FieldType_Int32] = 2, - [kUpb_FieldType_Fixed64] = 3, - [kUpb_FieldType_Fixed32] = 2, - [kUpb_FieldType_Bool] = 0, - [kUpb_FieldType_String] = UPB_SIZE(3, 4), - [kUpb_FieldType_Group] = UPB_SIZE(2, 3), - [kUpb_FieldType_Message] = UPB_SIZE(2, 3), - [kUpb_FieldType_Bytes] = UPB_SIZE(3, 4), - [kUpb_FieldType_UInt32] = 2, - [kUpb_FieldType_Enum] = 2, - [kUpb_FieldType_SFixed32] = 2, - [kUpb_FieldType_SFixed64] = 3, - [kUpb_FieldType_SInt32] = 2, - [kUpb_FieldType_SInt64] = 3, - }; - size_t lg2 = kElemSizeLg2[field->UPB_PRIVATE(descriptortype)]; - upb_Array* ret = _upb_Array_New(&d->arena, 4, lg2); - if (!ret) _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); - return ret; -} +// Must be last. -static const char* _upb_Decoder_DecodeToArray(upb_Decoder* d, const char* ptr, - upb_Message* msg, - const upb_MiniTableSub* subs, - const upb_MiniTableField* field, - wireval* val, int op) { - upb_Array** arrp = UPB_PTR_AT(msg, field->offset, void); - upb_Array* arr = *arrp; - void* mem; +struct upb_MessageDef { + const UPB_DESC(MessageOptions) * opts; + const upb_MiniTable* layout; + const upb_FileDef* file; + const upb_MessageDef* containing_type; + const char* full_name; - if (arr) { - _upb_Decoder_Reserve(d, arr, 1); + // Tables for looking up fields by number and name. + upb_inttable itof; + upb_strtable ntof; + + /* All nested defs. + * MEM: We could save some space here by putting nested defs in a contiguous + * region and calculating counts from offsets or vice-versa. */ + const upb_FieldDef* fields; + const upb_OneofDef* oneofs; + const upb_ExtensionRange* ext_ranges; + const upb_StringView* res_names; + const upb_MessageDef* nested_msgs; + const upb_MessageReservedRange* res_ranges; + const upb_EnumDef* nested_enums; + const upb_FieldDef* nested_exts; + + // TODO(salo): These counters don't need anywhere near 32 bits. + int field_count; + int real_oneof_count; + int oneof_count; + int ext_range_count; + int res_range_count; + int res_name_count; + int nested_msg_count; + int nested_enum_count; + int nested_ext_count; + bool in_message_set; + bool is_sorted; + upb_WellKnown well_known_type; +#if UINTPTR_MAX == 0xffffffff + uint32_t padding; // Increase size to a multiple of 8. +#endif +}; + +static void assign_msg_wellknowntype(upb_MessageDef* m) { + const char* name = m->full_name; + if (name == NULL) { + m->well_known_type = kUpb_WellKnown_Unspecified; + return; + } + if (!strcmp(name, "google.protobuf.Any")) { + m->well_known_type = kUpb_WellKnown_Any; + } else if (!strcmp(name, "google.protobuf.FieldMask")) { + m->well_known_type = kUpb_WellKnown_FieldMask; + } else if (!strcmp(name, "google.protobuf.Duration")) { + m->well_known_type = kUpb_WellKnown_Duration; + } else if (!strcmp(name, "google.protobuf.Timestamp")) { + m->well_known_type = kUpb_WellKnown_Timestamp; + } else if (!strcmp(name, "google.protobuf.DoubleValue")) { + m->well_known_type = kUpb_WellKnown_DoubleValue; + } else if (!strcmp(name, "google.protobuf.FloatValue")) { + m->well_known_type = kUpb_WellKnown_FloatValue; + } else if (!strcmp(name, "google.protobuf.Int64Value")) { + m->well_known_type = kUpb_WellKnown_Int64Value; + } else if (!strcmp(name, "google.protobuf.UInt64Value")) { + m->well_known_type = kUpb_WellKnown_UInt64Value; + } else if (!strcmp(name, "google.protobuf.Int32Value")) { + m->well_known_type = kUpb_WellKnown_Int32Value; + } else if (!strcmp(name, "google.protobuf.UInt32Value")) { + m->well_known_type = kUpb_WellKnown_UInt32Value; + } else if (!strcmp(name, "google.protobuf.BoolValue")) { + m->well_known_type = kUpb_WellKnown_BoolValue; + } else if (!strcmp(name, "google.protobuf.StringValue")) { + m->well_known_type = kUpb_WellKnown_StringValue; + } else if (!strcmp(name, "google.protobuf.BytesValue")) { + m->well_known_type = kUpb_WellKnown_BytesValue; + } else if (!strcmp(name, "google.protobuf.Value")) { + m->well_known_type = kUpb_WellKnown_Value; + } else if (!strcmp(name, "google.protobuf.ListValue")) { + m->well_known_type = kUpb_WellKnown_ListValue; + } else if (!strcmp(name, "google.protobuf.Struct")) { + m->well_known_type = kUpb_WellKnown_Struct; } else { - arr = _upb_Decoder_CreateArray(d, field); - *arrp = arr; + m->well_known_type = kUpb_WellKnown_Unspecified; } +} - switch (op) { - case kUpb_DecodeOp_Scalar1Byte: - case kUpb_DecodeOp_Scalar4Byte: - case kUpb_DecodeOp_Scalar8Byte: - /* Append scalar value. */ - mem = UPB_PTR_AT(_upb_array_ptr(arr), arr->size << op, void); - arr->size++; - memcpy(mem, val, 1 << op); - return ptr; - case kUpb_DecodeOp_String: - _upb_Decoder_VerifyUtf8(d, ptr, val->size); - /* Fallthrough. */ - case kUpb_DecodeOp_Bytes: { - /* Append bytes. */ - upb_StringView* str = (upb_StringView*)_upb_array_ptr(arr) + arr->size; - arr->size++; - return _upb_Decoder_ReadString(d, ptr, val->size, str); - } - case kUpb_DecodeOp_SubMessage: { - /* Append submessage / group. */ - upb_TaggedMessagePtr* target = UPB_PTR_AT( - _upb_array_ptr(arr), arr->size * sizeof(void*), upb_TaggedMessagePtr); - upb_Message* submsg = _upb_Decoder_NewSubMessage(d, subs, field, target); - arr->size++; - if (UPB_UNLIKELY(field->UPB_PRIVATE(descriptortype) == - kUpb_FieldType_Group)) { - return _upb_Decoder_DecodeKnownGroup(d, ptr, submsg, subs, field); - } else { - return _upb_Decoder_DecodeSubMessage(d, ptr, submsg, subs, field, - val->size); - } +upb_MessageDef* _upb_MessageDef_At(const upb_MessageDef* m, int i) { + return (upb_MessageDef*)&m[i]; +} + +bool _upb_MessageDef_IsValidExtensionNumber(const upb_MessageDef* m, int n) { + for (int i = 0; i < m->ext_range_count; i++) { + const upb_ExtensionRange* r = upb_MessageDef_ExtensionRange(m, i); + if (upb_ExtensionRange_Start(r) <= n && n < upb_ExtensionRange_End(r)) { + return true; } - case OP_FIXPCK_LG2(2): - case OP_FIXPCK_LG2(3): - return _upb_Decoder_DecodeFixedPacked(d, ptr, arr, val, field, - op - OP_FIXPCK_LG2(0)); - case OP_VARPCK_LG2(0): - case OP_VARPCK_LG2(2): - case OP_VARPCK_LG2(3): - return _upb_Decoder_DecodeVarintPacked(d, ptr, arr, val, field, - op - OP_VARPCK_LG2(0)); - case kUpb_DecodeOp_Enum: - return _upb_Decoder_DecodeEnumArray(d, ptr, msg, arr, subs, field, val); - case kUpb_DecodeOp_PackedEnum: - return _upb_Decoder_DecodeEnumPacked(d, ptr, msg, arr, subs, field, val); - default: - UPB_UNREACHABLE(); } + return false; } -upb_Map* _upb_Decoder_CreateMap(upb_Decoder* d, const upb_MiniTable* entry) { - /* Maps descriptor type -> upb map size. */ - static const uint8_t kSizeInMap[] = { - [0] = -1, // invalid descriptor type */ - [kUpb_FieldType_Double] = 8, - [kUpb_FieldType_Float] = 4, - [kUpb_FieldType_Int64] = 8, - [kUpb_FieldType_UInt64] = 8, - [kUpb_FieldType_Int32] = 4, - [kUpb_FieldType_Fixed64] = 8, - [kUpb_FieldType_Fixed32] = 4, - [kUpb_FieldType_Bool] = 1, - [kUpb_FieldType_String] = UPB_MAPTYPE_STRING, - [kUpb_FieldType_Group] = sizeof(void*), - [kUpb_FieldType_Message] = sizeof(void*), - [kUpb_FieldType_Bytes] = UPB_MAPTYPE_STRING, - [kUpb_FieldType_UInt32] = 4, - [kUpb_FieldType_Enum] = 4, - [kUpb_FieldType_SFixed32] = 4, - [kUpb_FieldType_SFixed64] = 8, - [kUpb_FieldType_SInt32] = 4, - [kUpb_FieldType_SInt64] = 8, - }; +const UPB_DESC(MessageOptions) * + upb_MessageDef_Options(const upb_MessageDef* m) { + return m->opts; +} - const upb_MiniTableField* key_field = &entry->fields[0]; - const upb_MiniTableField* val_field = &entry->fields[1]; - char key_size = kSizeInMap[key_field->UPB_PRIVATE(descriptortype)]; - char val_size = kSizeInMap[val_field->UPB_PRIVATE(descriptortype)]; - UPB_ASSERT(key_field->offset == offsetof(upb_MapEntryData, k)); - UPB_ASSERT(val_field->offset == offsetof(upb_MapEntryData, v)); - upb_Map* ret = _upb_Map_New(&d->arena, key_size, val_size); - if (!ret) _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); - return ret; +bool upb_MessageDef_HasOptions(const upb_MessageDef* m) { + return m->opts != (void*)kUpbDefOptDefault; } -static const char* _upb_Decoder_DecodeToMap(upb_Decoder* d, const char* ptr, - upb_Message* msg, - const upb_MiniTableSub* subs, - const upb_MiniTableField* field, - wireval* val) { - upb_Map** map_p = UPB_PTR_AT(msg, field->offset, upb_Map*); - upb_Map* map = *map_p; - upb_MapEntry ent; - UPB_ASSERT(upb_MiniTableField_Type(field) == kUpb_FieldType_Message); - const upb_MiniTable* entry = subs[field->UPB_PRIVATE(submsg_index)].submsg; +const char* upb_MessageDef_FullName(const upb_MessageDef* m) { + return m->full_name; +} - UPB_ASSERT(entry); - UPB_ASSERT(entry->field_count == 2); - UPB_ASSERT(!upb_IsRepeatedOrMap(&entry->fields[0])); - UPB_ASSERT(!upb_IsRepeatedOrMap(&entry->fields[1])); +const upb_FileDef* upb_MessageDef_File(const upb_MessageDef* m) { + return m->file; +} - if (!map) { - map = _upb_Decoder_CreateMap(d, entry); - *map_p = map; - } +const upb_MessageDef* upb_MessageDef_ContainingType(const upb_MessageDef* m) { + return m->containing_type; +} - // Parse map entry. - memset(&ent, 0, sizeof(ent)); +const char* upb_MessageDef_Name(const upb_MessageDef* m) { + return _upb_DefBuilder_FullToShort(m->full_name); +} - if (entry->fields[1].UPB_PRIVATE(descriptortype) == kUpb_FieldType_Message || - entry->fields[1].UPB_PRIVATE(descriptortype) == kUpb_FieldType_Group) { - // Create proactively to handle the case where it doesn't appear. - upb_TaggedMessagePtr msg; - _upb_Decoder_NewSubMessage(d, entry->subs, &entry->fields[1], &msg); - ent.data.v.val = upb_value_uintptr(msg); - } +upb_Syntax upb_MessageDef_Syntax(const upb_MessageDef* m) { + return upb_FileDef_Syntax(m->file); +} - ptr = - _upb_Decoder_DecodeSubMessage(d, ptr, &ent.data, subs, field, val->size); - // check if ent had any unknown fields - size_t size; - upb_Message_GetUnknown(&ent.data, &size); - if (size != 0) { - char* buf; - size_t size; - uint32_t tag = ((uint32_t)field->number << 3) | kUpb_WireType_Delimited; - upb_EncodeStatus status = - upb_Encode(&ent.data, entry, 0, &d->arena, &buf, &size); - if (status != kUpb_EncodeStatus_Ok) { - _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); - } - _upb_Decoder_AddUnknownVarints(d, msg, tag, size); - if (!_upb_Message_AddUnknown(msg, buf, size, &d->arena)) { - _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); - } - } else { - if (_upb_Map_Insert(map, &ent.data.k, map->key_size, &ent.data.v, - map->val_size, - &d->arena) == kUpb_MapInsertStatus_OutOfMemory) { - _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); - } - } - return ptr; +const upb_FieldDef* upb_MessageDef_FindFieldByNumber(const upb_MessageDef* m, + uint32_t i) { + upb_value val; + return upb_inttable_lookup(&m->itof, i, &val) ? upb_value_getconstptr(val) + : NULL; } -static const char* _upb_Decoder_DecodeToSubMessage( - upb_Decoder* d, const char* ptr, upb_Message* msg, - const upb_MiniTableSub* subs, const upb_MiniTableField* field, wireval* val, - int op) { - void* mem = UPB_PTR_AT(msg, field->offset, void); - int type = field->UPB_PRIVATE(descriptortype); +const upb_FieldDef* upb_MessageDef_FindFieldByNameWithSize( + const upb_MessageDef* m, const char* name, size_t size) { + upb_value val; - if (UPB_UNLIKELY(op == kUpb_DecodeOp_Enum) && - !_upb_Decoder_CheckEnum(d, ptr, msg, - subs[field->UPB_PRIVATE(submsg_index)].subenum, - field, val)) { - return ptr; + if (!upb_strtable_lookup2(&m->ntof, name, size, &val)) { + return NULL; } - /* Set presence if necessary. */ - if (field->presence > 0) { - _upb_sethas_field(msg, field); - } else if (field->presence < 0) { - /* Oneof case */ - uint32_t* oneof_case = _upb_oneofcase_field(msg, field); - if (op == kUpb_DecodeOp_SubMessage && *oneof_case != field->number) { - memset(mem, 0, sizeof(void*)); - } - *oneof_case = field->number; - } + return _upb_DefType_Unpack(val, UPB_DEFTYPE_FIELD); +} - /* Store into message. */ - switch (op) { - case kUpb_DecodeOp_SubMessage: { - upb_TaggedMessagePtr* submsgp = mem; - upb_Message* submsg; - if (*submsgp) { - submsg = _upb_Decoder_ReuseSubMessage(d, subs, field, submsgp); - } else { - submsg = _upb_Decoder_NewSubMessage(d, subs, field, submsgp); - } - if (UPB_UNLIKELY(type == kUpb_FieldType_Group)) { - ptr = _upb_Decoder_DecodeKnownGroup(d, ptr, submsg, subs, field); - } else { - ptr = _upb_Decoder_DecodeSubMessage(d, ptr, submsg, subs, field, - val->size); - } - break; - } - case kUpb_DecodeOp_String: - _upb_Decoder_VerifyUtf8(d, ptr, val->size); - /* Fallthrough. */ - case kUpb_DecodeOp_Bytes: - return _upb_Decoder_ReadString(d, ptr, val->size, mem); - case kUpb_DecodeOp_Scalar8Byte: - memcpy(mem, val, 8); - break; - case kUpb_DecodeOp_Enum: - case kUpb_DecodeOp_Scalar4Byte: - memcpy(mem, val, 4); - break; - case kUpb_DecodeOp_Scalar1Byte: - memcpy(mem, val, 1); - break; - default: - UPB_UNREACHABLE(); +const upb_OneofDef* upb_MessageDef_FindOneofByNameWithSize( + const upb_MessageDef* m, const char* name, size_t size) { + upb_value val; + + if (!upb_strtable_lookup2(&m->ntof, name, size, &val)) { + return NULL; } - return ptr; + return _upb_DefType_Unpack(val, UPB_DEFTYPE_ONEOF); } -UPB_NOINLINE -const char* _upb_Decoder_CheckRequired(upb_Decoder* d, const char* ptr, - const upb_Message* msg, - const upb_MiniTable* l) { - assert(l->required_count); - if (UPB_LIKELY((d->options & kUpb_DecodeOption_CheckRequired) == 0)) { - return ptr; - } - uint64_t msg_head; - memcpy(&msg_head, msg, 8); - msg_head = _upb_BigEndian_Swap64(msg_head); - if (upb_MiniTable_requiredmask(l) & ~msg_head) { - d->missing_required = true; - } - return ptr; +bool _upb_MessageDef_Insert(upb_MessageDef* m, const char* name, size_t len, + upb_value v, upb_Arena* a) { + return upb_strtable_insert(&m->ntof, name, len, v, a); } -UPB_FORCEINLINE -static bool _upb_Decoder_TryFastDispatch(upb_Decoder* d, const char** ptr, - upb_Message* msg, - const upb_MiniTable* layout) { -#if UPB_FASTTABLE - if (layout && layout->table_mask != (unsigned char)-1) { - uint16_t tag = _upb_FastDecoder_LoadTag(*ptr); - intptr_t table = decode_totable(layout); - *ptr = _upb_FastDecoder_TagDispatch(d, *ptr, msg, table, 0, tag); - return true; +bool upb_MessageDef_FindByNameWithSize(const upb_MessageDef* m, + const char* name, size_t len, + const upb_FieldDef** out_f, + const upb_OneofDef** out_o) { + upb_value val; + + if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) { + return false; } -#endif - return false; + + const upb_FieldDef* f = _upb_DefType_Unpack(val, UPB_DEFTYPE_FIELD); + const upb_OneofDef* o = _upb_DefType_Unpack(val, UPB_DEFTYPE_ONEOF); + if (out_f) *out_f = f; + if (out_o) *out_o = o; + return f || o; /* False if this was a JSON name. */ } -static const char* upb_Decoder_SkipField(upb_Decoder* d, const char* ptr, - uint32_t tag) { - int field_number = tag >> 3; - int wire_type = tag & 7; - switch (wire_type) { - case kUpb_WireType_Varint: { - uint64_t val; - return _upb_Decoder_DecodeVarint(d, ptr, &val); - } - case kUpb_WireType_64Bit: - return ptr + 8; - case kUpb_WireType_32Bit: - return ptr + 4; - case kUpb_WireType_Delimited: { - uint32_t size; - ptr = upb_Decoder_DecodeSize(d, ptr, &size); - return ptr + size; - } - case kUpb_WireType_StartGroup: - return _upb_Decoder_DecodeUnknownGroup(d, ptr, field_number); - default: - _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); +const upb_FieldDef* upb_MessageDef_FindByJsonNameWithSize( + const upb_MessageDef* m, const char* name, size_t size) { + upb_value val; + const upb_FieldDef* f; + + if (!upb_strtable_lookup2(&m->ntof, name, size, &val)) { + return NULL; } + + f = _upb_DefType_Unpack(val, UPB_DEFTYPE_FIELD); + if (!f) f = _upb_DefType_Unpack(val, UPB_DEFTYPE_FIELD_JSONNAME); + + return f; } -enum { - kStartItemTag = ((kUpb_MsgSet_Item << 3) | kUpb_WireType_StartGroup), - kEndItemTag = ((kUpb_MsgSet_Item << 3) | kUpb_WireType_EndGroup), - kTypeIdTag = ((kUpb_MsgSet_TypeId << 3) | kUpb_WireType_Varint), - kMessageTag = ((kUpb_MsgSet_Message << 3) | kUpb_WireType_Delimited), -}; +int upb_MessageDef_ExtensionRangeCount(const upb_MessageDef* m) { + return m->ext_range_count; +} -static void upb_Decoder_AddKnownMessageSetItem( - upb_Decoder* d, upb_Message* msg, const upb_MiniTableExtension* item_mt, - const char* data, uint32_t size) { - upb_Message_Extension* ext = - _upb_Message_GetOrCreateExtension(msg, item_mt, &d->arena); - if (UPB_UNLIKELY(!ext)) { - _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); - } - upb_Message* submsg = _upb_Decoder_NewSubMessage( - d, &ext->ext->sub, &ext->ext->field, (upb_TaggedMessagePtr*)&ext->data); - upb_DecodeStatus status = upb_Decode(data, size, submsg, item_mt->sub.submsg, - d->extreg, d->options, &d->arena); - if (status != kUpb_DecodeStatus_Ok) _upb_Decoder_ErrorJmp(d, status); +int upb_MessageDef_ReservedRangeCount(const upb_MessageDef* m) { + return m->res_range_count; } -static void upb_Decoder_AddUnknownMessageSetItem(upb_Decoder* d, - upb_Message* msg, - uint32_t type_id, - const char* message_data, - uint32_t message_size) { - char buf[60]; - char* ptr = buf; - ptr = upb_Decoder_EncodeVarint32(kStartItemTag, ptr); - ptr = upb_Decoder_EncodeVarint32(kTypeIdTag, ptr); - ptr = upb_Decoder_EncodeVarint32(type_id, ptr); - ptr = upb_Decoder_EncodeVarint32(kMessageTag, ptr); - ptr = upb_Decoder_EncodeVarint32(message_size, ptr); - char* split = ptr; +int upb_MessageDef_ReservedNameCount(const upb_MessageDef* m) { + return m->res_name_count; +} - ptr = upb_Decoder_EncodeVarint32(kEndItemTag, ptr); - char* end = ptr; +int upb_MessageDef_FieldCount(const upb_MessageDef* m) { + return m->field_count; +} - if (!_upb_Message_AddUnknown(msg, buf, split - buf, &d->arena) || - !_upb_Message_AddUnknown(msg, message_data, message_size, &d->arena) || - !_upb_Message_AddUnknown(msg, split, end - split, &d->arena)) { - _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); - } +int upb_MessageDef_OneofCount(const upb_MessageDef* m) { + return m->oneof_count; } -static void upb_Decoder_AddMessageSetItem(upb_Decoder* d, upb_Message* msg, - const upb_MiniTable* t, - uint32_t type_id, const char* data, - uint32_t size) { - const upb_MiniTableExtension* item_mt = - upb_ExtensionRegistry_Lookup(d->extreg, t, type_id); - if (item_mt) { - upb_Decoder_AddKnownMessageSetItem(d, msg, item_mt, data, size); - } else { - upb_Decoder_AddUnknownMessageSetItem(d, msg, type_id, data, size); - } +int upb_MessageDef_RealOneofCount(const upb_MessageDef* m) { + return m->real_oneof_count; } -static const char* upb_Decoder_DecodeMessageSetItem( - upb_Decoder* d, const char* ptr, upb_Message* msg, - const upb_MiniTable* layout) { - uint32_t type_id = 0; - upb_StringView preserved = {NULL, 0}; - typedef enum { - kUpb_HaveId = 1 << 0, - kUpb_HavePayload = 1 << 1, - } StateMask; - StateMask state_mask = 0; - while (!_upb_Decoder_IsDone(d, &ptr)) { - uint32_t tag; - ptr = _upb_Decoder_DecodeTag(d, ptr, &tag); - switch (tag) { - case kEndItemTag: - return ptr; - case kTypeIdTag: { - uint64_t tmp; - ptr = _upb_Decoder_DecodeVarint(d, ptr, &tmp); - if (state_mask & kUpb_HaveId) break; // Ignore dup. - state_mask |= kUpb_HaveId; - type_id = tmp; - if (state_mask & kUpb_HavePayload) { - upb_Decoder_AddMessageSetItem(d, msg, layout, type_id, preserved.data, - preserved.size); - } - break; - } - case kMessageTag: { - uint32_t size; - ptr = upb_Decoder_DecodeSize(d, ptr, &size); - const char* data = ptr; - ptr += size; - if (state_mask & kUpb_HavePayload) break; // Ignore dup. - state_mask |= kUpb_HavePayload; - if (state_mask & kUpb_HaveId) { - upb_Decoder_AddMessageSetItem(d, msg, layout, type_id, data, size); - } else { - // Out of order, we must preserve the payload. - preserved.data = data; - preserved.size = size; - } - break; - } - default: - // We do not preserve unexpected fields inside a message set item. - ptr = upb_Decoder_SkipField(d, ptr, tag); - break; - } - } - _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); +int upb_MessageDef_NestedMessageCount(const upb_MessageDef* m) { + return m->nested_msg_count; } -static const upb_MiniTableField* _upb_Decoder_FindField(upb_Decoder* d, - const upb_MiniTable* t, - uint32_t field_number, - int* last_field_index) { - static upb_MiniTableField none = { - 0, 0, 0, 0, kUpb_FakeFieldType_FieldNotFound, 0}; - if (t == NULL) return &none; +int upb_MessageDef_NestedEnumCount(const upb_MessageDef* m) { + return m->nested_enum_count; +} - size_t idx = ((size_t)field_number) - 1; // 0 wraps to SIZE_MAX - if (idx < t->dense_below) { - /* Fastest case: index into dense fields. */ - goto found; - } +int upb_MessageDef_NestedExtensionCount(const upb_MessageDef* m) { + return m->nested_ext_count; +} - if (t->dense_below < t->field_count) { - /* Linear search non-dense fields. Resume scanning from last_field_index - * since fields are usually in order. */ - size_t last = *last_field_index; - for (idx = last; idx < t->field_count; idx++) { - if (t->fields[idx].number == field_number) { - goto found; - } - } +const upb_MiniTable* upb_MessageDef_MiniTable(const upb_MessageDef* m) { + return m->layout; +} - for (idx = t->dense_below; idx < last; idx++) { - if (t->fields[idx].number == field_number) { - goto found; - } - } - } +const upb_ExtensionRange* upb_MessageDef_ExtensionRange(const upb_MessageDef* m, + int i) { + UPB_ASSERT(0 <= i && i < m->ext_range_count); + return _upb_ExtensionRange_At(m->ext_ranges, i); +} - if (d->extreg) { - switch (t->ext) { - case kUpb_ExtMode_Extendable: { - const upb_MiniTableExtension* ext = - upb_ExtensionRegistry_Lookup(d->extreg, t, field_number); - if (ext) return &ext->field; - break; - } - case kUpb_ExtMode_IsMessageSet: - if (field_number == kUpb_MsgSet_Item) { - static upb_MiniTableField item = { - 0, 0, 0, 0, kUpb_FakeFieldType_MessageSetItem, 0}; - return &item; - } - break; - } - } +const upb_MessageReservedRange* upb_MessageDef_ReservedRange( + const upb_MessageDef* m, int i) { + UPB_ASSERT(0 <= i && i < m->res_range_count); + return _upb_MessageReservedRange_At(m->res_ranges, i); +} - return &none; /* Unknown field. */ +upb_StringView upb_MessageDef_ReservedName(const upb_MessageDef* m, int i) { + UPB_ASSERT(0 <= i && i < m->res_name_count); + return m->res_names[i]; +} -found: - UPB_ASSERT(t->fields[idx].number == field_number); - *last_field_index = idx; - return &t->fields[idx]; +const upb_FieldDef* upb_MessageDef_Field(const upb_MessageDef* m, int i) { + UPB_ASSERT(0 <= i && i < m->field_count); + return _upb_FieldDef_At(m->fields, i); } -int _upb_Decoder_GetVarintOp(const upb_MiniTableField* field) { - static const int8_t kVarintOps[] = { - [kUpb_FakeFieldType_FieldNotFound] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_Double] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_Float] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_Int64] = kUpb_DecodeOp_Scalar8Byte, - [kUpb_FieldType_UInt64] = kUpb_DecodeOp_Scalar8Byte, - [kUpb_FieldType_Int32] = kUpb_DecodeOp_Scalar4Byte, - [kUpb_FieldType_Fixed64] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_Fixed32] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_Bool] = kUpb_DecodeOp_Scalar1Byte, - [kUpb_FieldType_String] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_Group] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_Message] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_Bytes] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_UInt32] = kUpb_DecodeOp_Scalar4Byte, - [kUpb_FieldType_Enum] = kUpb_DecodeOp_Enum, - [kUpb_FieldType_SFixed32] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_SFixed64] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_SInt32] = kUpb_DecodeOp_Scalar4Byte, - [kUpb_FieldType_SInt64] = kUpb_DecodeOp_Scalar8Byte, - [kUpb_FakeFieldType_MessageSetItem] = kUpb_DecodeOp_UnknownField, - }; +const upb_OneofDef* upb_MessageDef_Oneof(const upb_MessageDef* m, int i) { + UPB_ASSERT(0 <= i && i < m->oneof_count); + return _upb_OneofDef_At(m->oneofs, i); +} - return kVarintOps[field->UPB_PRIVATE(descriptortype)]; +const upb_MessageDef* upb_MessageDef_NestedMessage(const upb_MessageDef* m, + int i) { + UPB_ASSERT(0 <= i && i < m->nested_msg_count); + return &m->nested_msgs[i]; } -UPB_FORCEINLINE -static void _upb_Decoder_CheckUnlinked(upb_Decoder* d, const upb_MiniTable* mt, - const upb_MiniTableField* field, - int* op) { - // If sub-message is not linked, treat as unknown. - if (field->mode & kUpb_LabelFlags_IsExtension) return; - const upb_MiniTableSub* sub = &mt->subs[field->UPB_PRIVATE(submsg_index)]; - if ((d->options & kUpb_DecodeOption_ExperimentalAllowUnlinked) || - sub->submsg != &_kUpb_MiniTable_Empty) { - return; - } -#ifndef NDEBUG - const upb_MiniTableField* oneof = upb_MiniTable_GetOneof(mt, field); - if (oneof) { - // All other members of the oneof must be message fields that are also - // unlinked. - do { - assert(upb_MiniTableField_CType(oneof) == kUpb_CType_Message); - const upb_MiniTableSub* oneof_sub = - &mt->subs[oneof->UPB_PRIVATE(submsg_index)]; - assert(!oneof_sub); - } while (upb_MiniTable_NextOneofField(mt, &oneof)); - } -#endif // NDEBUG - *op = kUpb_DecodeOp_UnknownField; +const upb_EnumDef* upb_MessageDef_NestedEnum(const upb_MessageDef* m, int i) { + UPB_ASSERT(0 <= i && i < m->nested_enum_count); + return _upb_EnumDef_At(m->nested_enums, i); } -int _upb_Decoder_GetDelimitedOp(upb_Decoder* d, const upb_MiniTable* mt, - const upb_MiniTableField* field) { - enum { kRepeatedBase = 19 }; +const upb_FieldDef* upb_MessageDef_NestedExtension(const upb_MessageDef* m, + int i) { + UPB_ASSERT(0 <= i && i < m->nested_ext_count); + return _upb_FieldDef_At(m->nested_exts, i); +} - static const int8_t kDelimitedOps[] = { - /* For non-repeated field type. */ - [kUpb_FakeFieldType_FieldNotFound] = - kUpb_DecodeOp_UnknownField, // Field not found. - [kUpb_FieldType_Double] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_Float] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_Int64] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_UInt64] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_Int32] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_Fixed64] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_Fixed32] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_Bool] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_String] = kUpb_DecodeOp_String, - [kUpb_FieldType_Group] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_Message] = kUpb_DecodeOp_SubMessage, - [kUpb_FieldType_Bytes] = kUpb_DecodeOp_Bytes, - [kUpb_FieldType_UInt32] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_Enum] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_SFixed32] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_SFixed64] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_SInt32] = kUpb_DecodeOp_UnknownField, - [kUpb_FieldType_SInt64] = kUpb_DecodeOp_UnknownField, - [kUpb_FakeFieldType_MessageSetItem] = kUpb_DecodeOp_UnknownField, - // For repeated field type. */ - [kRepeatedBase + kUpb_FieldType_Double] = OP_FIXPCK_LG2(3), - [kRepeatedBase + kUpb_FieldType_Float] = OP_FIXPCK_LG2(2), - [kRepeatedBase + kUpb_FieldType_Int64] = OP_VARPCK_LG2(3), - [kRepeatedBase + kUpb_FieldType_UInt64] = OP_VARPCK_LG2(3), - [kRepeatedBase + kUpb_FieldType_Int32] = OP_VARPCK_LG2(2), - [kRepeatedBase + kUpb_FieldType_Fixed64] = OP_FIXPCK_LG2(3), - [kRepeatedBase + kUpb_FieldType_Fixed32] = OP_FIXPCK_LG2(2), - [kRepeatedBase + kUpb_FieldType_Bool] = OP_VARPCK_LG2(0), - [kRepeatedBase + kUpb_FieldType_String] = kUpb_DecodeOp_String, - [kRepeatedBase + kUpb_FieldType_Group] = kUpb_DecodeOp_SubMessage, - [kRepeatedBase + kUpb_FieldType_Message] = kUpb_DecodeOp_SubMessage, - [kRepeatedBase + kUpb_FieldType_Bytes] = kUpb_DecodeOp_Bytes, - [kRepeatedBase + kUpb_FieldType_UInt32] = OP_VARPCK_LG2(2), - [kRepeatedBase + kUpb_FieldType_Enum] = kUpb_DecodeOp_PackedEnum, - [kRepeatedBase + kUpb_FieldType_SFixed32] = OP_FIXPCK_LG2(2), - [kRepeatedBase + kUpb_FieldType_SFixed64] = OP_FIXPCK_LG2(3), - [kRepeatedBase + kUpb_FieldType_SInt32] = OP_VARPCK_LG2(2), - [kRepeatedBase + kUpb_FieldType_SInt64] = OP_VARPCK_LG2(3), - // Omitting kUpb_FakeFieldType_MessageSetItem, because we never emit a - // repeated msgset type - }; +upb_WellKnown upb_MessageDef_WellKnownType(const upb_MessageDef* m) { + return m->well_known_type; +} - int ndx = field->UPB_PRIVATE(descriptortype); - if (upb_FieldMode_Get(field) == kUpb_FieldMode_Array) ndx += kRepeatedBase; - int op = kDelimitedOps[ndx]; +bool _upb_MessageDef_InMessageSet(const upb_MessageDef* m) { + return m->in_message_set; +} - if (op == kUpb_DecodeOp_SubMessage) { - _upb_Decoder_CheckUnlinked(d, mt, field, &op); - } +const upb_FieldDef* upb_MessageDef_FindFieldByName(const upb_MessageDef* m, + const char* name) { + return upb_MessageDef_FindFieldByNameWithSize(m, name, strlen(name)); +} - return op; +const upb_OneofDef* upb_MessageDef_FindOneofByName(const upb_MessageDef* m, + const char* name) { + return upb_MessageDef_FindOneofByNameWithSize(m, name, strlen(name)); } -UPB_FORCEINLINE -static const char* _upb_Decoder_DecodeWireValue(upb_Decoder* d, const char* ptr, - const upb_MiniTable* mt, - const upb_MiniTableField* field, - int wire_type, wireval* val, - int* op) { - static const unsigned kFixed32OkMask = (1 << kUpb_FieldType_Float) | - (1 << kUpb_FieldType_Fixed32) | - (1 << kUpb_FieldType_SFixed32); +bool upb_MessageDef_IsMapEntry(const upb_MessageDef* m) { + return UPB_DESC(MessageOptions_map_entry)(m->opts); +} - static const unsigned kFixed64OkMask = (1 << kUpb_FieldType_Double) | - (1 << kUpb_FieldType_Fixed64) | - (1 << kUpb_FieldType_SFixed64); +bool upb_MessageDef_IsMessageSet(const upb_MessageDef* m) { + return UPB_DESC(MessageOptions_message_set_wire_format)(m->opts); +} - switch (wire_type) { - case kUpb_WireType_Varint: - ptr = _upb_Decoder_DecodeVarint(d, ptr, &val->uint64_val); - *op = _upb_Decoder_GetVarintOp(field); - _upb_Decoder_Munge(field->UPB_PRIVATE(descriptortype), val); - return ptr; - case kUpb_WireType_32Bit: - *op = kUpb_DecodeOp_Scalar4Byte; - if (((1 << field->UPB_PRIVATE(descriptortype)) & kFixed32OkMask) == 0) { - *op = kUpb_DecodeOp_UnknownField; - } - return upb_WireReader_ReadFixed32(ptr, &val->uint32_val); - case kUpb_WireType_64Bit: - *op = kUpb_DecodeOp_Scalar8Byte; - if (((1 << field->UPB_PRIVATE(descriptortype)) & kFixed64OkMask) == 0) { - *op = kUpb_DecodeOp_UnknownField; - } - return upb_WireReader_ReadFixed64(ptr, &val->uint64_val); - case kUpb_WireType_Delimited: - ptr = upb_Decoder_DecodeSize(d, ptr, &val->size); - *op = _upb_Decoder_GetDelimitedOp(d, mt, field); - return ptr; - case kUpb_WireType_StartGroup: - val->uint32_val = field->number; - if (field->UPB_PRIVATE(descriptortype) == kUpb_FieldType_Group) { - *op = kUpb_DecodeOp_SubMessage; - _upb_Decoder_CheckUnlinked(d, mt, field, op); - } else if (field->UPB_PRIVATE(descriptortype) == - kUpb_FakeFieldType_MessageSetItem) { - *op = kUpb_DecodeOp_MessageSetItem; - } else { - *op = kUpb_DecodeOp_UnknownField; - } - return ptr; - default: - break; - } - _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); +static upb_MiniTable* _upb_MessageDef_MakeMiniTable(upb_DefBuilder* ctx, + const upb_MessageDef* m) { + upb_StringView desc; + // Note: this will assign layout_index for fields, so upb_FieldDef_MiniTable() + // is safe to call only after this call. + bool ok = upb_MessageDef_MiniDescriptorEncode(m, ctx->tmp_arena, &desc); + if (!ok) _upb_DefBuilder_OomErr(ctx); + + void** scratch_data = _upb_DefPool_ScratchData(ctx->symtab); + size_t* scratch_size = _upb_DefPool_ScratchSize(ctx->symtab); + upb_MiniTable* ret = upb_MiniTable_BuildWithBuf( + desc.data, desc.size, ctx->platform, ctx->arena, scratch_data, + scratch_size, ctx->status); + if (!ret) _upb_DefBuilder_FailJmp(ctx); + + return ret; } -UPB_FORCEINLINE -static const char* _upb_Decoder_DecodeKnownField( - upb_Decoder* d, const char* ptr, upb_Message* msg, - const upb_MiniTable* layout, const upb_MiniTableField* field, int op, - wireval* val) { - const upb_MiniTableSub* subs = layout->subs; - uint8_t mode = field->mode; +void _upb_MessageDef_Resolve(upb_DefBuilder* ctx, upb_MessageDef* m) { + for (int i = 0; i < m->field_count; i++) { + upb_FieldDef* f = (upb_FieldDef*)upb_MessageDef_Field(m, i); + _upb_FieldDef_Resolve(ctx, m->full_name, f); + } - if (UPB_UNLIKELY(mode & kUpb_LabelFlags_IsExtension)) { - const upb_MiniTableExtension* ext_layout = - (const upb_MiniTableExtension*)field; - upb_Message_Extension* ext = - _upb_Message_GetOrCreateExtension(msg, ext_layout, &d->arena); - if (UPB_UNLIKELY(!ext)) { - _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); + m->in_message_set = false; + for (int i = 0; i < upb_MessageDef_NestedExtensionCount(m); i++) { + upb_FieldDef* ext = (upb_FieldDef*)upb_MessageDef_NestedExtension(m, i); + _upb_FieldDef_Resolve(ctx, m->full_name, ext); + if (upb_FieldDef_Type(ext) == kUpb_FieldType_Message && + upb_FieldDef_Label(ext) == kUpb_Label_Optional && + upb_FieldDef_MessageSubDef(ext) == m && + UPB_DESC(MessageOptions_message_set_wire_format)( + upb_MessageDef_Options(upb_FieldDef_ContainingType(ext)))) { + m->in_message_set = true; } - d->unknown_msg = msg; - msg = &ext->data; - subs = &ext->ext->sub; } - switch (mode & kUpb_FieldMode_Mask) { - case kUpb_FieldMode_Array: - return _upb_Decoder_DecodeToArray(d, ptr, msg, subs, field, val, op); - case kUpb_FieldMode_Map: - return _upb_Decoder_DecodeToMap(d, ptr, msg, subs, field, val); - case kUpb_FieldMode_Scalar: - return _upb_Decoder_DecodeToSubMessage(d, ptr, msg, subs, field, val, op); - default: - UPB_UNREACHABLE(); + for (int i = 0; i < upb_MessageDef_NestedMessageCount(m); i++) { + upb_MessageDef* n = (upb_MessageDef*)upb_MessageDef_NestedMessage(m, i); + _upb_MessageDef_Resolve(ctx, n); } } -static const char* _upb_Decoder_ReverseSkipVarint(const char* ptr, - uint32_t val) { - uint32_t seen = 0; - do { - ptr--; - seen <<= 7; - seen |= *ptr & 0x7f; - } while (seen != val); - return ptr; -} - -static const char* _upb_Decoder_DecodeUnknownField(upb_Decoder* d, - const char* ptr, - upb_Message* msg, - int field_number, - int wire_type, wireval val) { - if (field_number == 0) _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); +void _upb_MessageDef_InsertField(upb_DefBuilder* ctx, upb_MessageDef* m, + const upb_FieldDef* f) { + const int32_t field_number = upb_FieldDef_Number(f); - // Since unknown fields are the uncommon case, we do a little extra work here - // to walk backwards through the buffer to find the field start. This frees - // up a register in the fast paths (when the field is known), which leads to - // significant speedups in benchmarks. - const char* start = ptr; + if (field_number <= 0 || field_number > kUpb_MaxFieldNumber) { + _upb_DefBuilder_Errf(ctx, "invalid field number (%u)", field_number); + } - if (wire_type == kUpb_WireType_Delimited) ptr += val.size; - if (msg) { - switch (wire_type) { - case kUpb_WireType_Varint: - case kUpb_WireType_Delimited: - start--; - while (start[-1] & 0x80) start--; - break; - case kUpb_WireType_32Bit: - start -= 4; - break; - case kUpb_WireType_64Bit: - start -= 8; - break; - default: - break; - } + const char* json_name = upb_FieldDef_JsonName(f); + const char* shortname = upb_FieldDef_Name(f); + const size_t shortnamelen = strlen(shortname); - assert(start == d->debug_valstart); - uint32_t tag = ((uint32_t)field_number << 3) | wire_type; - start = _upb_Decoder_ReverseSkipVarint(start, tag); - assert(start == d->debug_tagstart); + upb_value v = upb_value_constptr(f); - if (wire_type == kUpb_WireType_StartGroup) { - d->unknown = start; - d->unknown_msg = msg; - ptr = _upb_Decoder_DecodeUnknownGroup(d, ptr, field_number); - start = d->unknown; - d->unknown = NULL; - } - if (!_upb_Message_AddUnknown(msg, start, ptr - start, &d->arena)) { - _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); - } - } else if (wire_type == kUpb_WireType_StartGroup) { - ptr = _upb_Decoder_DecodeUnknownGroup(d, ptr, field_number); + upb_value existing_v; + if (upb_strtable_lookup(&m->ntof, shortname, &existing_v)) { + _upb_DefBuilder_Errf(ctx, "duplicate field name (%s)", shortname); } - return ptr; -} -UPB_NOINLINE -static const char* _upb_Decoder_DecodeMessage(upb_Decoder* d, const char* ptr, - upb_Message* msg, - const upb_MiniTable* layout) { - int last_field_index = 0; + const upb_value field_v = _upb_DefType_Pack(f, UPB_DEFTYPE_FIELD); + bool ok = + _upb_MessageDef_Insert(m, shortname, shortnamelen, field_v, ctx->arena); + if (!ok) _upb_DefBuilder_OomErr(ctx); -#if UPB_FASTTABLE - // The first time we want to skip fast dispatch, because we may have just been - // invoked by the fast parser to handle a case that it bailed on. - if (!_upb_Decoder_IsDone(d, &ptr)) goto nofast; -#endif + if (strcmp(shortname, json_name) != 0) { + if (upb_strtable_lookup(&m->ntof, json_name, &v)) { + _upb_DefBuilder_Errf(ctx, "duplicate json_name (%s)", json_name); + } - while (!_upb_Decoder_IsDone(d, &ptr)) { - uint32_t tag; - const upb_MiniTableField* field; - int field_number; - int wire_type; - wireval val; - int op; + const size_t json_size = strlen(json_name); + const upb_value json_v = _upb_DefType_Pack(f, UPB_DEFTYPE_FIELD_JSONNAME); + ok = _upb_MessageDef_Insert(m, json_name, json_size, json_v, ctx->arena); + if (!ok) _upb_DefBuilder_OomErr(ctx); + } - if (_upb_Decoder_TryFastDispatch(d, &ptr, msg, layout)) break; + if (upb_inttable_lookup(&m->itof, field_number, NULL)) { + _upb_DefBuilder_Errf(ctx, "duplicate field number (%u)", field_number); + } -#if UPB_FASTTABLE - nofast: -#endif + ok = upb_inttable_insert(&m->itof, field_number, v, ctx->arena); + if (!ok) _upb_DefBuilder_OomErr(ctx); +} -#ifndef NDEBUG - d->debug_tagstart = ptr; -#endif +void _upb_MessageDef_CreateMiniTable(upb_DefBuilder* ctx, upb_MessageDef* m) { + if (ctx->layout == NULL) { + m->layout = _upb_MessageDef_MakeMiniTable(ctx, m); + } else { + UPB_ASSERT(ctx->msg_count < ctx->layout->msg_count); + m->layout = ctx->layout->msgs[ctx->msg_count++]; + UPB_ASSERT(m->field_count == m->layout->field_count); - UPB_ASSERT(ptr < d->input.limit_ptr); - ptr = _upb_Decoder_DecodeTag(d, ptr, &tag); - field_number = tag >> 3; - wire_type = tag & 7; + // We don't need the result of this call, but it will assign layout_index + // for all the fields in O(n lg n) time. + _upb_FieldDefs_Sorted(m->fields, m->field_count, ctx->tmp_arena); + } -#ifndef NDEBUG - d->debug_valstart = ptr; -#endif + for (int i = 0; i < m->nested_msg_count; i++) { + upb_MessageDef* nested = + (upb_MessageDef*)upb_MessageDef_NestedMessage(m, i); + _upb_MessageDef_CreateMiniTable(ctx, nested); + } +} - if (wire_type == kUpb_WireType_EndGroup) { - d->end_group = field_number; - return ptr; - } +void _upb_MessageDef_LinkMiniTable(upb_DefBuilder* ctx, + const upb_MessageDef* m) { + for (int i = 0; i < upb_MessageDef_NestedExtensionCount(m); i++) { + const upb_FieldDef* ext = upb_MessageDef_NestedExtension(m, i); + _upb_FieldDef_BuildMiniTableExtension(ctx, ext); + } - field = _upb_Decoder_FindField(d, layout, field_number, &last_field_index); - ptr = _upb_Decoder_DecodeWireValue(d, ptr, layout, field, wire_type, &val, - &op); + for (int i = 0; i < m->nested_msg_count; i++) { + _upb_MessageDef_LinkMiniTable(ctx, upb_MessageDef_NestedMessage(m, i)); + } - if (op >= 0) { - ptr = _upb_Decoder_DecodeKnownField(d, ptr, msg, layout, field, op, &val); - } else { - switch (op) { - case kUpb_DecodeOp_UnknownField: - ptr = _upb_Decoder_DecodeUnknownField(d, ptr, msg, field_number, - wire_type, val); - break; - case kUpb_DecodeOp_MessageSetItem: - ptr = upb_Decoder_DecodeMessageSetItem(d, ptr, msg, layout); - break; + if (ctx->layout) return; + + for (int i = 0; i < m->field_count; i++) { + const upb_FieldDef* f = upb_MessageDef_Field(m, i); + const upb_MessageDef* sub_m = upb_FieldDef_MessageSubDef(f); + const upb_EnumDef* sub_e = upb_FieldDef_EnumSubDef(f); + const int layout_index = _upb_FieldDef_LayoutIndex(f); + upb_MiniTable* mt = (upb_MiniTable*)upb_MessageDef_MiniTable(m); + + UPB_ASSERT(layout_index < m->field_count); + upb_MiniTableField* mt_f = + (upb_MiniTableField*)&m->layout->fields[layout_index]; + if (sub_m) { + if (!mt->subs) { + _upb_DefBuilder_Errf(ctx, "unexpected submsg for (%s)", m->full_name); + } + UPB_ASSERT(mt_f); + UPB_ASSERT(sub_m->layout); + if (UPB_UNLIKELY(!upb_MiniTable_SetSubMessage(mt, mt_f, sub_m->layout))) { + _upb_DefBuilder_Errf(ctx, "invalid submsg for (%s)", m->full_name); + } + } else if (_upb_FieldDef_IsClosedEnum(f)) { + const upb_MiniTableEnum* mt_e = _upb_EnumDef_MiniTable(sub_e); + if (UPB_UNLIKELY(!upb_MiniTable_SetSubEnum(mt, mt_f, mt_e))) { + _upb_DefBuilder_Errf(ctx, "invalid subenum for (%s)", m->full_name); } } } - return UPB_UNLIKELY(layout && layout->required_count) - ? _upb_Decoder_CheckRequired(d, ptr, msg, layout) - : ptr; -} - -const char* _upb_FastDecoder_DecodeGeneric(struct upb_Decoder* d, - const char* ptr, upb_Message* msg, - intptr_t table, uint64_t hasbits, - uint64_t data) { - (void)data; - *(uint32_t*)msg |= hasbits; - return _upb_Decoder_DecodeMessage(d, ptr, msg, decode_totablep(table)); +#ifndef NDEBUG + for (int i = 0; i < m->field_count; i++) { + const upb_FieldDef* f = upb_MessageDef_Field(m, i); + const int layout_index = _upb_FieldDef_LayoutIndex(f); + UPB_ASSERT(layout_index < m->layout->field_count); + const upb_MiniTableField* mt_f = &m->layout->fields[layout_index]; + UPB_ASSERT(upb_FieldDef_Type(f) == upb_MiniTableField_Type(mt_f)); + UPB_ASSERT(upb_FieldDef_CType(f) == upb_MiniTableField_CType(mt_f)); + UPB_ASSERT(upb_FieldDef_HasPresence(f) == + upb_MiniTableField_HasPresence(mt_f)); + } +#endif } -static upb_DecodeStatus _upb_Decoder_DecodeTop(struct upb_Decoder* d, - const char* buf, void* msg, - const upb_MiniTable* l) { - if (!_upb_Decoder_TryFastDispatch(d, &buf, msg, l)) { - _upb_Decoder_DecodeMessage(d, buf, msg, l); +static uint64_t _upb_MessageDef_Modifiers(const upb_MessageDef* m) { + uint64_t out = 0; + if (upb_FileDef_Syntax(m->file) == kUpb_Syntax_Proto3) { + out |= kUpb_MessageModifier_ValidateUtf8; + out |= kUpb_MessageModifier_DefaultIsPacked; } - if (d->end_group != DECODE_NOGROUP) return kUpb_DecodeStatus_Malformed; - if (d->missing_required) return kUpb_DecodeStatus_MissingRequired; - return kUpb_DecodeStatus_Ok; + if (m->ext_range_count) { + out |= kUpb_MessageModifier_IsExtendable; + } + return out; } -UPB_NOINLINE -const char* _upb_Decoder_IsDoneFallback(upb_EpsCopyInputStream* e, - const char* ptr, int overrun) { - return _upb_EpsCopyInputStream_IsDoneFallbackInline( - e, ptr, overrun, _upb_Decoder_BufferFlipCallback); -} +static bool _upb_MessageDef_EncodeMap(upb_DescState* s, const upb_MessageDef* m, + upb_Arena* a) { + if (m->field_count != 2) return false; -static upb_DecodeStatus upb_Decoder_Decode(upb_Decoder* const decoder, - const char* const buf, - void* const msg, - const upb_MiniTable* const l, - upb_Arena* const arena) { - if (UPB_SETJMP(decoder->err) == 0) { - decoder->status = _upb_Decoder_DecodeTop(decoder, buf, msg, l); - } else { - UPB_ASSERT(decoder->status != kUpb_DecodeStatus_Ok); - } + const upb_FieldDef* key_field = upb_MessageDef_Field(m, 0); + const upb_FieldDef* val_field = upb_MessageDef_Field(m, 1); + if (key_field == NULL || val_field == NULL) return false; - _upb_MemBlock* blocks = - upb_Atomic_Load(&decoder->arena.blocks, memory_order_relaxed); - arena->head = decoder->arena.head; - upb_Atomic_Store(&arena->blocks, blocks, memory_order_relaxed); - return decoder->status; + UPB_ASSERT(_upb_FieldDef_LayoutIndex(key_field) == 0); + UPB_ASSERT(_upb_FieldDef_LayoutIndex(val_field) == 1); + + s->ptr = upb_MtDataEncoder_EncodeMap( + &s->e, s->ptr, upb_FieldDef_Type(key_field), upb_FieldDef_Type(val_field), + _upb_FieldDef_Modifiers(key_field), _upb_FieldDef_Modifiers(val_field)); + return true; } -upb_DecodeStatus upb_Decode(const char* buf, size_t size, void* msg, - const upb_MiniTable* l, - const upb_ExtensionRegistry* extreg, int options, - upb_Arena* arena) { - upb_Decoder decoder; - unsigned depth = (unsigned)options >> 16; - - upb_EpsCopyInputStream_Init(&decoder.input, &buf, size, - options & kUpb_DecodeOption_AliasString); +static bool _upb_MessageDef_EncodeMessage(upb_DescState* s, + const upb_MessageDef* m, + upb_Arena* a) { + const upb_FieldDef** sorted = NULL; + if (!m->is_sorted) { + sorted = _upb_FieldDefs_Sorted(m->fields, m->field_count, a); + if (!sorted) return false; + } - decoder.extreg = extreg; - decoder.unknown = NULL; - decoder.depth = depth ? depth : kUpb_WireFormat_DefaultDepthLimit; - decoder.end_group = DECODE_NOGROUP; - decoder.options = (uint16_t)options; - decoder.missing_required = false; - decoder.status = kUpb_DecodeStatus_Ok; + s->ptr = upb_MtDataEncoder_StartMessage(&s->e, s->ptr, + _upb_MessageDef_Modifiers(m)); - // Violating the encapsulation of the arena for performance reasons. - // This is a temporary arena that we swap into and swap out of when we are - // done. The temporary arena only needs to be able to handle allocation, - // not fuse or free, so it does not need many of the members to be initialized - // (particularly parent_or_count). - _upb_MemBlock* blocks = upb_Atomic_Load(&arena->blocks, memory_order_relaxed); - decoder.arena.head = arena->head; - decoder.arena.block_alloc = arena->block_alloc; - upb_Atomic_Init(&decoder.arena.blocks, blocks); + for (int i = 0; i < m->field_count; i++) { + const upb_FieldDef* f = sorted ? sorted[i] : upb_MessageDef_Field(m, i); + const upb_FieldType type = upb_FieldDef_Type(f); + const int number = upb_FieldDef_Number(f); + const uint64_t modifiers = _upb_FieldDef_Modifiers(f); - return upb_Decoder_Decode(&decoder, buf, msg, l, arena); -} + if (!_upb_DescState_Grow(s, a)) return false; + s->ptr = upb_MtDataEncoder_PutField(&s->e, s->ptr, type, number, modifiers); + } -#undef OP_FIXPCK_LG2 -#undef OP_VARPCK_LG2 + for (int i = 0; i < m->real_oneof_count; i++) { + if (!_upb_DescState_Grow(s, a)) return false; + s->ptr = upb_MtDataEncoder_StartOneof(&s->e, s->ptr); -// Fast decoder: ~3x the speed of decode.c, but requires x86-64/ARM64. -// Also the table size grows by 2x. -// -// Could potentially be ported to other 64-bit archs that pass at least six -// arguments in registers and have 8 unused high bits in pointers. -// -// The overall design is to create specialized functions for every possible -// field type (eg. oneof boolean field with a 1 byte tag) and then dispatch -// to the specialized function as quickly as possible. + const upb_OneofDef* o = upb_MessageDef_Oneof(m, i); + const int field_count = upb_OneofDef_FieldCount(o); + for (int j = 0; j < field_count; j++) { + const int number = upb_FieldDef_Number(upb_OneofDef_Field(o, j)); + if (!_upb_DescState_Grow(s, a)) return false; + s->ptr = upb_MtDataEncoder_PutOneofField(&s->e, s->ptr, number); + } + } + return true; +} -// Must be last. +static bool _upb_MessageDef_EncodeMessageSet(upb_DescState* s, + const upb_MessageDef* m, + upb_Arena* a) { + s->ptr = upb_MtDataEncoder_EncodeMessageSet(&s->e, s->ptr); -#if UPB_FASTTABLE + return true; +} -// The standard set of arguments passed to each parsing function. -// Thanks to x86-64 calling conventions, these will stay in registers. -#define UPB_PARSE_PARAMS \ - upb_Decoder *d, const char *ptr, upb_Message *msg, intptr_t table, \ - uint64_t hasbits, uint64_t data +bool upb_MessageDef_MiniDescriptorEncode(const upb_MessageDef* m, upb_Arena* a, + upb_StringView* out) { + upb_DescState s; + _upb_DescState_Init(&s); -#define UPB_PARSE_ARGS d, ptr, msg, table, hasbits, data + if (!_upb_DescState_Grow(&s, a)) return false; -#define RETURN_GENERIC(m) \ - /* Uncomment either of these for debugging purposes. */ \ - /* fprintf(stderr, m); */ \ - /*__builtin_trap(); */ \ - return _upb_FastDecoder_DecodeGeneric(d, ptr, msg, table, hasbits, 0); + if (upb_MessageDef_IsMapEntry(m)) { + if (!_upb_MessageDef_EncodeMap(&s, m, a)) return false; + } else if (UPB_DESC(MessageOptions_message_set_wire_format)(m->opts)) { + if (!_upb_MessageDef_EncodeMessageSet(&s, m, a)) return false; + } else { + if (!_upb_MessageDef_EncodeMessage(&s, m, a)) return false; + } -typedef enum { - CARD_s = 0, /* Singular (optional, non-repeated) */ - CARD_o = 1, /* Oneof */ - CARD_r = 2, /* Repeated */ - CARD_p = 3 /* Packed Repeated */ -} upb_card; + if (!_upb_DescState_Grow(&s, a)) return false; + *s.ptr = '\0'; -UPB_NOINLINE -static const char* fastdecode_isdonefallback(UPB_PARSE_PARAMS) { - int overrun = data; - ptr = _upb_EpsCopyInputStream_IsDoneFallbackInline( - &d->input, ptr, overrun, _upb_Decoder_BufferFlipCallback); - data = _upb_FastDecoder_LoadTag(ptr); - UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); + out->data = s.buf; + out->size = s.ptr - s.buf; + return true; } -UPB_FORCEINLINE -static const char* fastdecode_dispatch(UPB_PARSE_PARAMS) { - int overrun; - switch (upb_EpsCopyInputStream_IsDoneStatus(&d->input, ptr, &overrun)) { - case kUpb_IsDoneStatus_Done: - *(uint32_t*)msg |= hasbits; // Sync hasbits. - const upb_MiniTable* l = decode_totablep(table); - return UPB_UNLIKELY(l->required_count) - ? _upb_Decoder_CheckRequired(d, ptr, msg, l) - : ptr; - case kUpb_IsDoneStatus_NotDone: - break; - case kUpb_IsDoneStatus_NeedFallback: - data = overrun; - UPB_MUSTTAIL return fastdecode_isdonefallback(UPB_PARSE_ARGS); +static upb_StringView* _upb_ReservedNames_New(upb_DefBuilder* ctx, int n, + const upb_StringView* protos) { + upb_StringView* sv = _upb_DefBuilder_Alloc(ctx, sizeof(upb_StringView) * n); + for (int i = 0; i < n; i++) { + sv[i].data = + upb_strdup2(protos[i].data, protos[i].size, _upb_DefBuilder_Arena(ctx)); + sv[i].size = protos[i].size; } - - // Read two bytes of tag data (for a one-byte tag, the high byte is junk). - data = _upb_FastDecoder_LoadTag(ptr); - UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); + return sv; } -UPB_FORCEINLINE -static bool fastdecode_checktag(uint16_t data, int tagbytes) { - if (tagbytes == 1) { - return (data & 0xff) == 0; - } else { - return data == 0; - } -} +static void create_msgdef(upb_DefBuilder* ctx, const char* prefix, + const UPB_DESC(DescriptorProto) * msg_proto, + const upb_MessageDef* containing_type, + upb_MessageDef* m) { + const UPB_DESC(OneofDescriptorProto)* const* oneofs; + const UPB_DESC(FieldDescriptorProto)* const* fields; + const UPB_DESC(DescriptorProto_ExtensionRange)* const* ext_ranges; + const UPB_DESC(DescriptorProto_ReservedRange)* const* res_ranges; + const upb_StringView* res_names; + size_t n_oneof, n_field, n_enum, n_ext, n_msg; + size_t n_ext_range, n_res_range, n_res_name; + upb_StringView name; -UPB_FORCEINLINE -static const char* fastdecode_longsize(const char* ptr, int* size) { - int i; - UPB_ASSERT(*size & 0x80); - *size &= 0xff; - for (i = 0; i < 3; i++) { - ptr++; - size_t byte = (uint8_t)ptr[-1]; - *size += (byte - 1) << (7 + 7 * i); - if (UPB_LIKELY((byte & 0x80) == 0)) return ptr; - } - ptr++; - size_t byte = (uint8_t)ptr[-1]; - // len is limited by 2gb not 4gb, hence 8 and not 16 as normally expected - // for a 32 bit varint. - if (UPB_UNLIKELY(byte >= 8)) return NULL; - *size += (byte - 1) << 28; - return ptr; -} + // Must happen before _upb_DefBuilder_Add() + m->file = _upb_DefBuilder_File(ctx); -UPB_FORCEINLINE -static const char* fastdecode_delimited( - upb_Decoder* d, const char* ptr, - upb_EpsCopyInputStream_ParseDelimitedFunc* func, void* ctx) { - ptr++; + m->containing_type = containing_type; + m->is_sorted = true; - // Sign-extend so varint greater than one byte becomes negative, causing - // fast delimited parse to fail. - int len = (int8_t)ptr[-1]; + name = UPB_DESC(DescriptorProto_name)(msg_proto); - if (!upb_EpsCopyInputStream_TryParseDelimitedFast(&d->input, &ptr, len, func, - ctx)) { - // Slow case: Sub-message is >=128 bytes and/or exceeds the current buffer. - // If it exceeds the buffer limit, limit/limit_ptr will change during - // sub-message parsing, so we need to preserve delta, not limit. - if (UPB_UNLIKELY(len & 0x80)) { - // Size varint >1 byte (length >= 128). - ptr = fastdecode_longsize(ptr, &len); - if (!ptr) { - // Corrupt wire format: size exceeded INT_MAX. - return NULL; - } - } - if (!upb_EpsCopyInputStream_CheckSize(&d->input, ptr, len)) { - // Corrupt wire format: invalid limit. - return NULL; - } - int delta = upb_EpsCopyInputStream_PushLimit(&d->input, ptr, len); - ptr = func(&d->input, ptr, ctx); - upb_EpsCopyInputStream_PopLimit(&d->input, ptr, delta); - } - return ptr; -} + m->full_name = _upb_DefBuilder_MakeFullName(ctx, prefix, name); + _upb_DefBuilder_Add(ctx, m->full_name, _upb_DefType_Pack(m, UPB_DEFTYPE_MSG)); -/* singular, oneof, repeated field handling ***********************************/ + oneofs = UPB_DESC(DescriptorProto_oneof_decl)(msg_proto, &n_oneof); + fields = UPB_DESC(DescriptorProto_field)(msg_proto, &n_field); + ext_ranges = + UPB_DESC(DescriptorProto_extension_range)(msg_proto, &n_ext_range); + res_ranges = + UPB_DESC(DescriptorProto_reserved_range)(msg_proto, &n_res_range); + res_names = UPB_DESC(DescriptorProto_reserved_name)(msg_proto, &n_res_name); -typedef struct { - upb_Array* arr; - void* end; -} fastdecode_arr; + bool ok = upb_inttable_init(&m->itof, ctx->arena); + if (!ok) _upb_DefBuilder_OomErr(ctx); -typedef enum { - FD_NEXT_ATLIMIT, - FD_NEXT_SAMEFIELD, - FD_NEXT_OTHERFIELD -} fastdecode_next; + ok = upb_strtable_init(&m->ntof, n_oneof + n_field, ctx->arena); + if (!ok) _upb_DefBuilder_OomErr(ctx); -typedef struct { - void* dst; - fastdecode_next next; - uint32_t tag; -} fastdecode_nextret; + UPB_DEF_SET_OPTIONS(m->opts, DescriptorProto, MessageOptions, msg_proto); -UPB_FORCEINLINE -static void* fastdecode_resizearr(upb_Decoder* d, void* dst, - fastdecode_arr* farr, int valbytes) { - if (UPB_UNLIKELY(dst == farr->end)) { - size_t old_size = farr->arr->capacity; - size_t old_bytes = old_size * valbytes; - size_t new_size = old_size * 2; - size_t new_bytes = new_size * valbytes; - char* old_ptr = _upb_array_ptr(farr->arr); - char* new_ptr = upb_Arena_Realloc(&d->arena, old_ptr, old_bytes, new_bytes); - uint8_t elem_size_lg2 = __builtin_ctz(valbytes); - farr->arr->capacity = new_size; - farr->arr->data = _upb_array_tagptr(new_ptr, elem_size_lg2); - dst = (void*)(new_ptr + (old_size * valbytes)); - farr->end = (void*)(new_ptr + (new_size * valbytes)); + m->oneof_count = n_oneof; + m->oneofs = _upb_OneofDefs_New(ctx, n_oneof, oneofs, m); + + m->field_count = n_field; + m->fields = + _upb_FieldDefs_New(ctx, n_field, fields, m->full_name, m, &m->is_sorted); + + // Message Sets may not contain fields. + if (UPB_UNLIKELY(UPB_DESC(MessageOptions_message_set_wire_format)(m->opts))) { + if (UPB_UNLIKELY(n_field > 0)) { + _upb_DefBuilder_Errf(ctx, "invalid message set (%s)", m->full_name); + } } - return dst; + + m->ext_range_count = n_ext_range; + m->ext_ranges = _upb_ExtensionRanges_New(ctx, n_ext_range, ext_ranges, m); + + m->res_range_count = n_res_range; + m->res_ranges = + _upb_MessageReservedRanges_New(ctx, n_res_range, res_ranges, m); + + m->res_name_count = n_res_name; + m->res_names = _upb_ReservedNames_New(ctx, n_res_name, res_names); + + const size_t synthetic_count = _upb_OneofDefs_Finalize(ctx, m); + m->real_oneof_count = m->oneof_count - synthetic_count; + + assign_msg_wellknowntype(m); + upb_inttable_compact(&m->itof, ctx->arena); + + const UPB_DESC(EnumDescriptorProto)* const* enums = + UPB_DESC(DescriptorProto_enum_type)(msg_proto, &n_enum); + m->nested_enum_count = n_enum; + m->nested_enums = _upb_EnumDefs_New(ctx, n_enum, enums, m); + + const UPB_DESC(FieldDescriptorProto)* const* exts = + UPB_DESC(DescriptorProto_extension)(msg_proto, &n_ext); + m->nested_ext_count = n_ext; + m->nested_exts = _upb_Extensions_New(ctx, n_ext, exts, m->full_name, m); + + const UPB_DESC(DescriptorProto)* const* msgs = + UPB_DESC(DescriptorProto_nested_type)(msg_proto, &n_msg); + m->nested_msg_count = n_msg; + m->nested_msgs = _upb_MessageDefs_New(ctx, n_msg, msgs, m); } -UPB_FORCEINLINE -static bool fastdecode_tagmatch(uint32_t tag, uint64_t data, int tagbytes) { - if (tagbytes == 1) { - return (uint8_t)tag == (uint8_t)data; - } else { - return (uint16_t)tag == (uint16_t)data; +// Allocate and initialize an array of |n| message defs. +upb_MessageDef* _upb_MessageDefs_New( + upb_DefBuilder* ctx, int n, const UPB_DESC(DescriptorProto) * const* protos, + const upb_MessageDef* containing_type) { + _upb_DefType_CheckPadding(sizeof(upb_MessageDef)); + + const char* name = containing_type ? containing_type->full_name + : _upb_FileDef_RawPackage(ctx->file); + + upb_MessageDef* m = _upb_DefBuilder_Alloc(ctx, sizeof(upb_MessageDef) * n); + for (int i = 0; i < n; i++) { + create_msgdef(ctx, name, protos[i], containing_type, &m[i]); } + return m; } -UPB_FORCEINLINE -static void fastdecode_commitarr(void* dst, fastdecode_arr* farr, - int valbytes) { - farr->arr->size = - (size_t)((char*)dst - (char*)_upb_array_ptr(farr->arr)) / valbytes; -} -UPB_FORCEINLINE -static fastdecode_nextret fastdecode_nextrepeated(upb_Decoder* d, void* dst, - const char** ptr, - fastdecode_arr* farr, - uint64_t data, int tagbytes, - int valbytes) { - fastdecode_nextret ret; - dst = (char*)dst + valbytes; +// Must be last. - if (UPB_LIKELY(!_upb_Decoder_IsDone(d, ptr))) { - ret.tag = _upb_FastDecoder_LoadTag(*ptr); - if (fastdecode_tagmatch(ret.tag, data, tagbytes)) { - ret.next = FD_NEXT_SAMEFIELD; - } else { - fastdecode_commitarr(dst, farr, valbytes); - ret.next = FD_NEXT_OTHERFIELD; - } - } else { - fastdecode_commitarr(dst, farr, valbytes); - ret.next = FD_NEXT_ATLIMIT; - } +struct upb_MessageReservedRange { + int32_t start; + int32_t end; +}; - ret.dst = dst; - return ret; +upb_MessageReservedRange* _upb_MessageReservedRange_At( + const upb_MessageReservedRange* r, int i) { + return (upb_MessageReservedRange*)&r[i]; } -UPB_FORCEINLINE -static void* fastdecode_fieldmem(upb_Message* msg, uint64_t data) { - size_t ofs = data >> 48; - return (char*)msg + ofs; +int32_t upb_MessageReservedRange_Start(const upb_MessageReservedRange* r) { + return r->start; +} +int32_t upb_MessageReservedRange_End(const upb_MessageReservedRange* r) { + return r->end; } -UPB_FORCEINLINE -static void* fastdecode_getfield(upb_Decoder* d, const char* ptr, - upb_Message* msg, uint64_t* data, - uint64_t* hasbits, fastdecode_arr* farr, - int valbytes, upb_card card) { - switch (card) { - case CARD_s: { - uint8_t hasbit_index = *data >> 24; - // Set hasbit and return pointer to scalar field. - *hasbits |= 1ull << hasbit_index; - return fastdecode_fieldmem(msg, *data); - } - case CARD_o: { - uint16_t case_ofs = *data >> 32; - uint32_t* oneof_case = UPB_PTR_AT(msg, case_ofs, uint32_t); - uint8_t field_number = *data >> 24; - *oneof_case = field_number; - return fastdecode_fieldmem(msg, *data); - } - case CARD_r: { - // Get pointer to upb_Array and allocate/expand if necessary. - uint8_t elem_size_lg2 = __builtin_ctz(valbytes); - upb_Array** arr_p = fastdecode_fieldmem(msg, *data); - char* begin; - *(uint32_t*)msg |= *hasbits; - *hasbits = 0; - if (UPB_LIKELY(!*arr_p)) { - farr->arr = _upb_Array_New(&d->arena, 8, elem_size_lg2); - *arr_p = farr->arr; - } else { - farr->arr = *arr_p; - } - begin = _upb_array_ptr(farr->arr); - farr->end = begin + (farr->arr->capacity * valbytes); - *data = _upb_FastDecoder_LoadTag(ptr); - return begin + (farr->arr->size * valbytes); +upb_MessageReservedRange* _upb_MessageReservedRanges_New( + upb_DefBuilder* ctx, int n, + const UPB_DESC(DescriptorProto_ReservedRange) * const* protos, + const upb_MessageDef* m) { + upb_MessageReservedRange* r = + _upb_DefBuilder_Alloc(ctx, sizeof(upb_MessageReservedRange) * n); + + for (int i = 0; i < n; i++) { + const int32_t start = + UPB_DESC(DescriptorProto_ReservedRange_start)(protos[i]); + const int32_t end = UPB_DESC(DescriptorProto_ReservedRange_end)(protos[i]); + const int32_t max = kUpb_MaxFieldNumber + 1; + + // A full validation would also check that each range is disjoint, and that + // none of the fields overlap with the extension ranges, but we are just + // sanity checking here. + if (start < 1 || end <= start || end > max) { + _upb_DefBuilder_Errf(ctx, + "Reserved range (%d, %d) is invalid, message=%s\n", + (int)start, (int)end, upb_MessageDef_FullName(m)); } - default: - UPB_UNREACHABLE(); + + r[i].start = start; + r[i].end = end; } + + return r; } -UPB_FORCEINLINE -static bool fastdecode_flippacked(uint64_t* data, int tagbytes) { - *data ^= (0x2 ^ 0x0); // Patch data to match packed wiretype. - return fastdecode_checktag(*data, tagbytes); + + +// Must be last. + +struct upb_MethodDef { + const UPB_DESC(MethodOptions) * opts; + upb_ServiceDef* service; + const char* full_name; + const upb_MessageDef* input_type; + const upb_MessageDef* output_type; + int index; + bool client_streaming; + bool server_streaming; +}; + +upb_MethodDef* _upb_MethodDef_At(const upb_MethodDef* m, int i) { + return (upb_MethodDef*)&m[i]; } -#define FASTDECODE_CHECKPACKED(tagbytes, card, func) \ - if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { \ - if (card == CARD_r && fastdecode_flippacked(&data, tagbytes)) { \ - UPB_MUSTTAIL return func(UPB_PARSE_ARGS); \ - } \ - RETURN_GENERIC("packed check tag mismatch\n"); \ - } +const upb_ServiceDef* upb_MethodDef_Service(const upb_MethodDef* m) { + return m->service; +} -/* varint fields **************************************************************/ +const UPB_DESC(MethodOptions) * upb_MethodDef_Options(const upb_MethodDef* m) { + return m->opts; +} -UPB_FORCEINLINE -static uint64_t fastdecode_munge(uint64_t val, int valbytes, bool zigzag) { - if (valbytes == 1) { - return val != 0; - } else if (zigzag) { - if (valbytes == 4) { - uint32_t n = val; - return (n >> 1) ^ -(int32_t)(n & 1); - } else if (valbytes == 8) { - return (val >> 1) ^ -(int64_t)(val & 1); - } - UPB_UNREACHABLE(); - } - return val; +bool upb_MethodDef_HasOptions(const upb_MethodDef* m) { + return m->opts != (void*)kUpbDefOptDefault; } -UPB_FORCEINLINE -static const char* fastdecode_varint64(const char* ptr, uint64_t* val) { - ptr++; - *val = (uint8_t)ptr[-1]; - if (UPB_UNLIKELY(*val & 0x80)) { - int i; - for (i = 0; i < 8; i++) { - ptr++; - uint64_t byte = (uint8_t)ptr[-1]; - *val += (byte - 1) << (7 + 7 * i); - if (UPB_LIKELY((byte & 0x80) == 0)) goto done; - } - ptr++; - uint64_t byte = (uint8_t)ptr[-1]; - if (byte > 1) { - return NULL; - } - *val += (byte - 1) << 63; - } -done: - UPB_ASSUME(ptr != NULL); - return ptr; +const char* upb_MethodDef_FullName(const upb_MethodDef* m) { + return m->full_name; } -#define FASTDECODE_UNPACKEDVARINT(d, ptr, msg, table, hasbits, data, tagbytes, \ - valbytes, card, zigzag, packed) \ - uint64_t val; \ - void* dst; \ - fastdecode_arr farr; \ - \ - FASTDECODE_CHECKPACKED(tagbytes, card, packed); \ - \ - dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, valbytes, \ - card); \ - if (card == CARD_r) { \ - if (UPB_UNLIKELY(!dst)) { \ - RETURN_GENERIC("need array resize\n"); \ - } \ - } \ - \ - again: \ - if (card == CARD_r) { \ - dst = fastdecode_resizearr(d, dst, &farr, valbytes); \ - } \ - \ - ptr += tagbytes; \ - ptr = fastdecode_varint64(ptr, &val); \ - if (ptr == NULL) _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \ - val = fastdecode_munge(val, valbytes, zigzag); \ - memcpy(dst, &val, valbytes); \ - \ - if (card == CARD_r) { \ - fastdecode_nextret ret = fastdecode_nextrepeated( \ - d, dst, &ptr, &farr, data, tagbytes, valbytes); \ - switch (ret.next) { \ - case FD_NEXT_SAMEFIELD: \ - dst = ret.dst; \ - goto again; \ - case FD_NEXT_OTHERFIELD: \ - data = ret.tag; \ - UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); \ - case FD_NEXT_ATLIMIT: \ - return ptr; \ - } \ - } \ - \ - UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); +const char* upb_MethodDef_Name(const upb_MethodDef* m) { + return _upb_DefBuilder_FullToShort(m->full_name); +} -typedef struct { - uint8_t valbytes; - bool zigzag; - void* dst; - fastdecode_arr farr; -} fastdecode_varintdata; +int upb_MethodDef_Index(const upb_MethodDef* m) { return m->index; } -UPB_FORCEINLINE -static const char* fastdecode_topackedvarint(upb_EpsCopyInputStream* e, - const char* ptr, void* ctx) { - upb_Decoder* d = (upb_Decoder*)e; - fastdecode_varintdata* data = ctx; - void* dst = data->dst; - uint64_t val; +const upb_MessageDef* upb_MethodDef_InputType(const upb_MethodDef* m) { + return m->input_type; +} - while (!_upb_Decoder_IsDone(d, &ptr)) { - dst = fastdecode_resizearr(d, dst, &data->farr, data->valbytes); - ptr = fastdecode_varint64(ptr, &val); - if (ptr == NULL) return NULL; - val = fastdecode_munge(val, data->valbytes, data->zigzag); - memcpy(dst, &val, data->valbytes); - dst = (char*)dst + data->valbytes; - } +const upb_MessageDef* upb_MethodDef_OutputType(const upb_MethodDef* m) { + return m->output_type; +} - fastdecode_commitarr(dst, &data->farr, data->valbytes); - return ptr; +bool upb_MethodDef_ClientStreaming(const upb_MethodDef* m) { + return m->client_streaming; } -#define FASTDECODE_PACKEDVARINT(d, ptr, msg, table, hasbits, data, tagbytes, \ - valbytes, zigzag, unpacked) \ - fastdecode_varintdata ctx = {valbytes, zigzag}; \ - \ - FASTDECODE_CHECKPACKED(tagbytes, CARD_r, unpacked); \ - \ - ctx.dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &ctx.farr, \ - valbytes, CARD_r); \ - if (UPB_UNLIKELY(!ctx.dst)) { \ - RETURN_GENERIC("need array resize\n"); \ - } \ - \ - ptr += tagbytes; \ - ptr = fastdecode_delimited(d, ptr, &fastdecode_topackedvarint, &ctx); \ - \ - if (UPB_UNLIKELY(ptr == NULL)) { \ - _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \ - } \ - \ - UPB_MUSTTAIL return fastdecode_dispatch(d, ptr, msg, table, hasbits, 0); +bool upb_MethodDef_ServerStreaming(const upb_MethodDef* m) { + return m->server_streaming; +} -#define FASTDECODE_VARINT(d, ptr, msg, table, hasbits, data, tagbytes, \ - valbytes, card, zigzag, unpacked, packed) \ - if (card == CARD_p) { \ - FASTDECODE_PACKEDVARINT(d, ptr, msg, table, hasbits, data, tagbytes, \ - valbytes, zigzag, unpacked); \ - } else { \ - FASTDECODE_UNPACKEDVARINT(d, ptr, msg, table, hasbits, data, tagbytes, \ - valbytes, card, zigzag, packed); \ - } +static void create_method(upb_DefBuilder* ctx, + const UPB_DESC(MethodDescriptorProto) * method_proto, + upb_ServiceDef* s, upb_MethodDef* m) { + upb_StringView name = UPB_DESC(MethodDescriptorProto_name)(method_proto); -#define z_ZZ true -#define b_ZZ false -#define v_ZZ false + m->service = s; + m->full_name = + _upb_DefBuilder_MakeFullName(ctx, upb_ServiceDef_FullName(s), name); + m->client_streaming = + UPB_DESC(MethodDescriptorProto_client_streaming)(method_proto); + m->server_streaming = + UPB_DESC(MethodDescriptorProto_server_streaming)(method_proto); + m->input_type = _upb_DefBuilder_Resolve( + ctx, m->full_name, m->full_name, + UPB_DESC(MethodDescriptorProto_input_type)(method_proto), + UPB_DEFTYPE_MSG); + m->output_type = _upb_DefBuilder_Resolve( + ctx, m->full_name, m->full_name, + UPB_DESC(MethodDescriptorProto_output_type)(method_proto), + UPB_DEFTYPE_MSG); -/* Generate all combinations: - * {s,o,r,p} x {b1,v4,z4,v8,z8} x {1bt,2bt} */ + UPB_DEF_SET_OPTIONS(m->opts, MethodDescriptorProto, MethodOptions, + method_proto); +} -#define F(card, type, valbytes, tagbytes) \ - UPB_NOINLINE \ - const char* upb_p##card##type##valbytes##_##tagbytes##bt(UPB_PARSE_PARAMS) { \ - FASTDECODE_VARINT(d, ptr, msg, table, hasbits, data, tagbytes, valbytes, \ - CARD_##card, type##_ZZ, \ - upb_pr##type##valbytes##_##tagbytes##bt, \ - upb_pp##type##valbytes##_##tagbytes##bt); \ +// Allocate and initialize an array of |n| method defs belonging to |s|. +upb_MethodDef* _upb_MethodDefs_New( + upb_DefBuilder* ctx, int n, + const UPB_DESC(MethodDescriptorProto) * const* protos, upb_ServiceDef* s) { + upb_MethodDef* m = _upb_DefBuilder_Alloc(ctx, sizeof(upb_MethodDef) * n); + for (int i = 0; i < n; i++) { + create_method(ctx, protos[i], s, &m[i]); + m[i].index = i; } + return m; +} -#define TYPES(card, tagbytes) \ - F(card, b, 1, tagbytes) \ - F(card, v, 4, tagbytes) \ - F(card, v, 8, tagbytes) \ - F(card, z, 4, tagbytes) \ - F(card, z, 8, tagbytes) -#define TAGBYTES(card) \ - TYPES(card, 1) \ - TYPES(card, 2) +#include +#include +#include -TAGBYTES(s) -TAGBYTES(o) -TAGBYTES(r) -TAGBYTES(p) -#undef z_ZZ -#undef b_ZZ -#undef v_ZZ -#undef o_ONEOF -#undef s_ONEOF -#undef r_ONEOF -#undef F -#undef TYPES -#undef TAGBYTES -#undef FASTDECODE_UNPACKEDVARINT -#undef FASTDECODE_PACKEDVARINT -#undef FASTDECODE_VARINT +// Must be last. -/* fixed fields ***************************************************************/ +struct upb_OneofDef { + const UPB_DESC(OneofOptions) * opts; + const upb_MessageDef* parent; + const char* full_name; + int field_count; + bool synthetic; + const upb_FieldDef** fields; + upb_strtable ntof; // lookup a field by name + upb_inttable itof; // lookup a field by number (index) +#if UINTPTR_MAX == 0xffffffff + uint32_t padding; // Increase size to a multiple of 8. +#endif +}; -#define FASTDECODE_UNPACKEDFIXED(d, ptr, msg, table, hasbits, data, tagbytes, \ - valbytes, card, packed) \ - void* dst; \ - fastdecode_arr farr; \ - \ - FASTDECODE_CHECKPACKED(tagbytes, card, packed) \ - \ - dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, valbytes, \ - card); \ - if (card == CARD_r) { \ - if (UPB_UNLIKELY(!dst)) { \ - RETURN_GENERIC("couldn't allocate array in arena\n"); \ - } \ - } \ - \ - again: \ - if (card == CARD_r) { \ - dst = fastdecode_resizearr(d, dst, &farr, valbytes); \ - } \ - \ - ptr += tagbytes; \ - memcpy(dst, ptr, valbytes); \ - ptr += valbytes; \ - \ - if (card == CARD_r) { \ - fastdecode_nextret ret = fastdecode_nextrepeated( \ - d, dst, &ptr, &farr, data, tagbytes, valbytes); \ - switch (ret.next) { \ - case FD_NEXT_SAMEFIELD: \ - dst = ret.dst; \ - goto again; \ - case FD_NEXT_OTHERFIELD: \ - data = ret.tag; \ - UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); \ - case FD_NEXT_ATLIMIT: \ - return ptr; \ - } \ - } \ - \ - UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); +upb_OneofDef* _upb_OneofDef_At(const upb_OneofDef* o, int i) { + return (upb_OneofDef*)&o[i]; +} -#define FASTDECODE_PACKEDFIXED(d, ptr, msg, table, hasbits, data, tagbytes, \ - valbytes, unpacked) \ - FASTDECODE_CHECKPACKED(tagbytes, CARD_r, unpacked) \ - \ - ptr += tagbytes; \ - int size = (uint8_t)ptr[0]; \ - ptr++; \ - if (size & 0x80) { \ - ptr = fastdecode_longsize(ptr, &size); \ - } \ - \ - if (UPB_UNLIKELY(!upb_EpsCopyInputStream_CheckDataSizeAvailable( \ - &d->input, ptr, size) || \ - (size % valbytes) != 0)) { \ - _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \ - } \ - \ - upb_Array** arr_p = fastdecode_fieldmem(msg, data); \ - upb_Array* arr = *arr_p; \ - uint8_t elem_size_lg2 = __builtin_ctz(valbytes); \ - int elems = size / valbytes; \ - \ - if (UPB_LIKELY(!arr)) { \ - *arr_p = arr = _upb_Array_New(&d->arena, elems, elem_size_lg2); \ - if (!arr) { \ - _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \ - } \ - } else { \ - _upb_Array_ResizeUninitialized(arr, elems, &d->arena); \ - } \ - \ - char* dst = _upb_array_ptr(arr); \ - memcpy(dst, ptr, size); \ - arr->size = elems; \ - \ - ptr += size; \ - UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); +const UPB_DESC(OneofOptions) * upb_OneofDef_Options(const upb_OneofDef* o) { + return o->opts; +} -#define FASTDECODE_FIXED(d, ptr, msg, table, hasbits, data, tagbytes, \ - valbytes, card, unpacked, packed) \ - if (card == CARD_p) { \ - FASTDECODE_PACKEDFIXED(d, ptr, msg, table, hasbits, data, tagbytes, \ - valbytes, unpacked); \ - } else { \ - FASTDECODE_UNPACKEDFIXED(d, ptr, msg, table, hasbits, data, tagbytes, \ - valbytes, card, packed); \ - } +bool upb_OneofDef_HasOptions(const upb_OneofDef* o) { + return o->opts != (void*)kUpbDefOptDefault; +} -/* Generate all combinations: - * {s,o,r,p} x {f4,f8} x {1bt,2bt} */ +const char* upb_OneofDef_FullName(const upb_OneofDef* o) { + return o->full_name; +} -#define F(card, valbytes, tagbytes) \ - UPB_NOINLINE \ - const char* upb_p##card##f##valbytes##_##tagbytes##bt(UPB_PARSE_PARAMS) { \ - FASTDECODE_FIXED(d, ptr, msg, table, hasbits, data, tagbytes, valbytes, \ - CARD_##card, upb_ppf##valbytes##_##tagbytes##bt, \ - upb_prf##valbytes##_##tagbytes##bt); \ - } +const char* upb_OneofDef_Name(const upb_OneofDef* o) { + return _upb_DefBuilder_FullToShort(o->full_name); +} -#define TYPES(card, tagbytes) \ - F(card, 4, tagbytes) \ - F(card, 8, tagbytes) +const upb_MessageDef* upb_OneofDef_ContainingType(const upb_OneofDef* o) { + return o->parent; +} -#define TAGBYTES(card) \ - TYPES(card, 1) \ - TYPES(card, 2) +int upb_OneofDef_FieldCount(const upb_OneofDef* o) { return o->field_count; } -TAGBYTES(s) -TAGBYTES(o) -TAGBYTES(r) -TAGBYTES(p) +const upb_FieldDef* upb_OneofDef_Field(const upb_OneofDef* o, int i) { + UPB_ASSERT(i < o->field_count); + return o->fields[i]; +} -#undef F -#undef TYPES -#undef TAGBYTES -#undef FASTDECODE_UNPACKEDFIXED -#undef FASTDECODE_PACKEDFIXED +int upb_OneofDef_numfields(const upb_OneofDef* o) { return o->field_count; } -/* string fields **************************************************************/ +uint32_t upb_OneofDef_Index(const upb_OneofDef* o) { + // Compute index in our parent's array. + return o - upb_MessageDef_Oneof(o->parent, 0); +} -typedef const char* fastdecode_copystr_func(struct upb_Decoder* d, - const char* ptr, upb_Message* msg, - const upb_MiniTable* table, - uint64_t hasbits, - upb_StringView* dst); +bool upb_OneofDef_IsSynthetic(const upb_OneofDef* o) { return o->synthetic; } -UPB_NOINLINE -static const char* fastdecode_verifyutf8(upb_Decoder* d, const char* ptr, - upb_Message* msg, intptr_t table, - uint64_t hasbits, uint64_t data) { - upb_StringView* dst = (upb_StringView*)data; - if (!_upb_Decoder_VerifyUtf8Inline(dst->data, dst->size)) { - _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_BadUtf8); +const upb_FieldDef* upb_OneofDef_LookupNameWithSize(const upb_OneofDef* o, + const char* name, + size_t size) { + upb_value val; + return upb_strtable_lookup2(&o->ntof, name, size, &val) + ? upb_value_getptr(val) + : NULL; +} + +const upb_FieldDef* upb_OneofDef_LookupName(const upb_OneofDef* o, + const char* name) { + return upb_OneofDef_LookupNameWithSize(o, name, strlen(name)); +} + +const upb_FieldDef* upb_OneofDef_LookupNumber(const upb_OneofDef* o, + uint32_t num) { + upb_value val; + return upb_inttable_lookup(&o->itof, num, &val) ? upb_value_getptr(val) + : NULL; +} + +void _upb_OneofDef_Insert(upb_DefBuilder* ctx, upb_OneofDef* o, + const upb_FieldDef* f, const char* name, + size_t size) { + o->field_count++; + if (_upb_FieldDef_IsProto3Optional(f)) o->synthetic = true; + + const int number = upb_FieldDef_Number(f); + const upb_value v = upb_value_constptr(f); + + // TODO(salo): This lookup is unfortunate because we also perform it when + // inserting into the message's table. Unfortunately that step occurs after + // this one and moving things around could be tricky so let's leave it for + // a future refactoring. + const bool number_exists = upb_inttable_lookup(&o->itof, number, NULL); + if (UPB_UNLIKELY(number_exists)) { + _upb_DefBuilder_Errf(ctx, "oneof fields have the same number (%d)", number); + } + + // TODO(salo): More redundant work happening here. + const bool name_exists = upb_strtable_lookup2(&o->ntof, name, size, NULL); + if (UPB_UNLIKELY(name_exists)) { + _upb_DefBuilder_Errf(ctx, "oneof fields have the same name (%.*s)", + (int)size, name); + } + + const bool ok = upb_inttable_insert(&o->itof, number, v, ctx->arena) && + upb_strtable_insert(&o->ntof, name, size, v, ctx->arena); + if (UPB_UNLIKELY(!ok)) { + _upb_DefBuilder_OomErr(ctx); } - UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); } -#define FASTDECODE_LONGSTRING(d, ptr, msg, table, hasbits, dst, validate_utf8) \ - int size = (uint8_t)ptr[0]; /* Could plumb through hasbits. */ \ - ptr++; \ - if (size & 0x80) { \ - ptr = fastdecode_longsize(ptr, &size); \ - } \ - \ - if (UPB_UNLIKELY(!upb_EpsCopyInputStream_CheckSize(&d->input, ptr, size))) { \ - dst->size = 0; \ - _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \ - } \ - \ - const char* s_ptr = ptr; \ - ptr = upb_EpsCopyInputStream_ReadString(&d->input, &s_ptr, size, &d->arena); \ - if (!ptr) _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); \ - dst->data = s_ptr; \ - dst->size = size; \ - \ - if (validate_utf8) { \ - data = (uint64_t)dst; \ - UPB_MUSTTAIL return fastdecode_verifyutf8(UPB_PARSE_ARGS); \ - } else { \ - UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); \ +// Returns the synthetic count. +size_t _upb_OneofDefs_Finalize(upb_DefBuilder* ctx, upb_MessageDef* m) { + int synthetic_count = 0; + + for (int i = 0; i < upb_MessageDef_OneofCount(m); i++) { + upb_OneofDef* o = (upb_OneofDef*)upb_MessageDef_Oneof(m, i); + + if (o->synthetic && o->field_count != 1) { + _upb_DefBuilder_Errf(ctx, + "Synthetic oneofs must have one field, not %d: %s", + o->field_count, upb_OneofDef_Name(o)); + } + + if (o->synthetic) { + synthetic_count++; + } else if (synthetic_count != 0) { + _upb_DefBuilder_Errf( + ctx, "Synthetic oneofs must be after all other oneofs: %s", + upb_OneofDef_Name(o)); + } + + o->fields = + _upb_DefBuilder_Alloc(ctx, sizeof(upb_FieldDef*) * o->field_count); + o->field_count = 0; } -UPB_NOINLINE -static const char* fastdecode_longstring_utf8(struct upb_Decoder* d, - const char* ptr, upb_Message* msg, - intptr_t table, uint64_t hasbits, - uint64_t data) { - upb_StringView* dst = (upb_StringView*)data; - FASTDECODE_LONGSTRING(d, ptr, msg, table, hasbits, dst, true); + for (int i = 0; i < upb_MessageDef_FieldCount(m); i++) { + const upb_FieldDef* f = upb_MessageDef_Field(m, i); + upb_OneofDef* o = (upb_OneofDef*)upb_FieldDef_ContainingOneof(f); + if (o) { + o->fields[o->field_count++] = f; + } + } + + return synthetic_count; } -UPB_NOINLINE -static const char* fastdecode_longstring_noutf8( - struct upb_Decoder* d, const char* ptr, upb_Message* msg, intptr_t table, - uint64_t hasbits, uint64_t data) { - upb_StringView* dst = (upb_StringView*)data; - FASTDECODE_LONGSTRING(d, ptr, msg, table, hasbits, dst, false); +static void create_oneofdef(upb_DefBuilder* ctx, upb_MessageDef* m, + const UPB_DESC(OneofDescriptorProto) * oneof_proto, + const upb_OneofDef* _o) { + upb_OneofDef* o = (upb_OneofDef*)_o; + upb_StringView name = UPB_DESC(OneofDescriptorProto_name)(oneof_proto); + + o->parent = m; + o->full_name = + _upb_DefBuilder_MakeFullName(ctx, upb_MessageDef_FullName(m), name); + o->field_count = 0; + o->synthetic = false; + + UPB_DEF_SET_OPTIONS(o->opts, OneofDescriptorProto, OneofOptions, oneof_proto); + + if (upb_MessageDef_FindByNameWithSize(m, name.data, name.size, NULL, NULL)) { + _upb_DefBuilder_Errf(ctx, "duplicate oneof name (%s)", o->full_name); + } + + upb_value v = _upb_DefType_Pack(o, UPB_DEFTYPE_ONEOF); + bool ok = _upb_MessageDef_Insert(m, name.data, name.size, v, ctx->arena); + if (!ok) _upb_DefBuilder_OomErr(ctx); + + ok = upb_inttable_init(&o->itof, ctx->arena); + if (!ok) _upb_DefBuilder_OomErr(ctx); + + ok = upb_strtable_init(&o->ntof, 4, ctx->arena); + if (!ok) _upb_DefBuilder_OomErr(ctx); } -UPB_FORCEINLINE -static void fastdecode_docopy(upb_Decoder* d, const char* ptr, uint32_t size, - int copy, char* data, upb_StringView* dst) { - d->arena.head.ptr += copy; - dst->data = data; - UPB_UNPOISON_MEMORY_REGION(data, copy); - memcpy(data, ptr, copy); - UPB_POISON_MEMORY_REGION(data + size, copy - size); +// Allocate and initialize an array of |n| oneof defs. +upb_OneofDef* _upb_OneofDefs_New( + upb_DefBuilder* ctx, int n, + const UPB_DESC(OneofDescriptorProto) * const* protos, upb_MessageDef* m) { + _upb_DefType_CheckPadding(sizeof(upb_OneofDef)); + + upb_OneofDef* o = _upb_DefBuilder_Alloc(ctx, sizeof(upb_OneofDef) * n); + for (int i = 0; i < n; i++) { + create_oneofdef(ctx, m, protos[i], &o[i]); + } + return o; } -#define FASTDECODE_COPYSTRING(d, ptr, msg, table, hasbits, data, tagbytes, \ - card, validate_utf8) \ - upb_StringView* dst; \ - fastdecode_arr farr; \ - int64_t size; \ - size_t arena_has; \ - size_t common_has; \ - char* buf; \ - \ - UPB_ASSERT(!upb_EpsCopyInputStream_AliasingAvailable(&d->input, ptr, 0)); \ - UPB_ASSERT(fastdecode_checktag(data, tagbytes)); \ - \ - dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, \ - sizeof(upb_StringView), card); \ - \ - again: \ - if (card == CARD_r) { \ - dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_StringView)); \ - } \ - \ - size = (uint8_t)ptr[tagbytes]; \ - ptr += tagbytes + 1; \ - dst->size = size; \ - \ - buf = d->arena.head.ptr; \ - arena_has = _upb_ArenaHas(&d->arena); \ - common_has = UPB_MIN(arena_has, \ - upb_EpsCopyInputStream_BytesAvailable(&d->input, ptr)); \ - \ - if (UPB_LIKELY(size <= 15 - tagbytes)) { \ - if (arena_has < 16) goto longstr; \ - d->arena.head.ptr += 16; \ - memcpy(buf, ptr - tagbytes - 1, 16); \ - dst->data = buf + tagbytes + 1; \ - } else if (UPB_LIKELY(size <= 32)) { \ - if (UPB_UNLIKELY(common_has < 32)) goto longstr; \ - fastdecode_docopy(d, ptr, size, 32, buf, dst); \ - } else if (UPB_LIKELY(size <= 64)) { \ - if (UPB_UNLIKELY(common_has < 64)) goto longstr; \ - fastdecode_docopy(d, ptr, size, 64, buf, dst); \ - } else if (UPB_LIKELY(size < 128)) { \ - if (UPB_UNLIKELY(common_has < 128)) goto longstr; \ - fastdecode_docopy(d, ptr, size, 128, buf, dst); \ - } else { \ - goto longstr; \ - } \ - \ - ptr += size; \ - \ - if (card == CARD_r) { \ - if (validate_utf8 && \ - !_upb_Decoder_VerifyUtf8Inline(dst->data, dst->size)) { \ - _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_BadUtf8); \ - } \ - fastdecode_nextret ret = fastdecode_nextrepeated( \ - d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_StringView)); \ - switch (ret.next) { \ - case FD_NEXT_SAMEFIELD: \ - dst = ret.dst; \ - goto again; \ - case FD_NEXT_OTHERFIELD: \ - data = ret.tag; \ - UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); \ - case FD_NEXT_ATLIMIT: \ - return ptr; \ - } \ - } \ - \ - if (card != CARD_r && validate_utf8) { \ - data = (uint64_t)dst; \ - UPB_MUSTTAIL return fastdecode_verifyutf8(UPB_PARSE_ARGS); \ - } \ - \ - UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); \ - \ - longstr: \ - if (card == CARD_r) { \ - fastdecode_commitarr(dst + 1, &farr, sizeof(upb_StringView)); \ - } \ - ptr--; \ - if (validate_utf8) { \ - UPB_MUSTTAIL return fastdecode_longstring_utf8(d, ptr, msg, table, \ - hasbits, (uint64_t)dst); \ - } else { \ - UPB_MUSTTAIL return fastdecode_longstring_noutf8(d, ptr, msg, table, \ - hasbits, (uint64_t)dst); \ - } -#define FASTDECODE_STRING(d, ptr, msg, table, hasbits, data, tagbytes, card, \ - copyfunc, validate_utf8) \ - upb_StringView* dst; \ - fastdecode_arr farr; \ - int64_t size; \ - \ - if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { \ - RETURN_GENERIC("string field tag mismatch\n"); \ - } \ - \ - if (UPB_UNLIKELY( \ - !upb_EpsCopyInputStream_AliasingAvailable(&d->input, ptr, 0))) { \ - UPB_MUSTTAIL return copyfunc(UPB_PARSE_ARGS); \ - } \ - \ - dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, \ - sizeof(upb_StringView), card); \ - \ - again: \ - if (card == CARD_r) { \ - dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_StringView)); \ - } \ - \ - size = (int8_t)ptr[tagbytes]; \ - ptr += tagbytes + 1; \ - \ - if (UPB_UNLIKELY( \ - !upb_EpsCopyInputStream_AliasingAvailable(&d->input, ptr, size))) { \ - ptr--; \ - if (validate_utf8) { \ - return fastdecode_longstring_utf8(d, ptr, msg, table, hasbits, \ - (uint64_t)dst); \ - } else { \ - return fastdecode_longstring_noutf8(d, ptr, msg, table, hasbits, \ - (uint64_t)dst); \ - } \ - } \ - \ - dst->data = ptr; \ - dst->size = size; \ - ptr = upb_EpsCopyInputStream_ReadStringAliased(&d->input, &dst->data, \ - dst->size); \ - \ - if (card == CARD_r) { \ - if (validate_utf8 && \ - !_upb_Decoder_VerifyUtf8Inline(dst->data, dst->size)) { \ - _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_BadUtf8); \ - } \ - fastdecode_nextret ret = fastdecode_nextrepeated( \ - d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_StringView)); \ - switch (ret.next) { \ - case FD_NEXT_SAMEFIELD: \ - dst = ret.dst; \ - goto again; \ - case FD_NEXT_OTHERFIELD: \ - data = ret.tag; \ - UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); \ - case FD_NEXT_ATLIMIT: \ - return ptr; \ - } \ - } \ - \ - if (card != CARD_r && validate_utf8) { \ - data = (uint64_t)dst; \ - UPB_MUSTTAIL return fastdecode_verifyutf8(UPB_PARSE_ARGS); \ - } \ - \ - UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); -/* Generate all combinations: - * {p,c} x {s,o,r} x {s, b} x {1bt,2bt} */ +// Must be last. -#define s_VALIDATE true -#define b_VALIDATE false +struct upb_ServiceDef { + const UPB_DESC(ServiceOptions) * opts; + const upb_FileDef* file; + const char* full_name; + upb_MethodDef* methods; + int method_count; + int index; +}; -#define F(card, tagbytes, type) \ - UPB_NOINLINE \ - const char* upb_c##card##type##_##tagbytes##bt(UPB_PARSE_PARAMS) { \ - FASTDECODE_COPYSTRING(d, ptr, msg, table, hasbits, data, tagbytes, \ - CARD_##card, type##_VALIDATE); \ - } \ - const char* upb_p##card##type##_##tagbytes##bt(UPB_PARSE_PARAMS) { \ - FASTDECODE_STRING(d, ptr, msg, table, hasbits, data, tagbytes, \ - CARD_##card, upb_c##card##type##_##tagbytes##bt, \ - type##_VALIDATE); \ - } +upb_ServiceDef* _upb_ServiceDef_At(const upb_ServiceDef* s, int index) { + return (upb_ServiceDef*)&s[index]; +} -#define UTF8(card, tagbytes) \ - F(card, tagbytes, s) \ - F(card, tagbytes, b) +const UPB_DESC(ServiceOptions) * + upb_ServiceDef_Options(const upb_ServiceDef* s) { + return s->opts; +} -#define TAGBYTES(card) \ - UTF8(card, 1) \ - UTF8(card, 2) +bool upb_ServiceDef_HasOptions(const upb_ServiceDef* s) { + return s->opts != (void*)kUpbDefOptDefault; +} -TAGBYTES(s) -TAGBYTES(o) -TAGBYTES(r) +const char* upb_ServiceDef_FullName(const upb_ServiceDef* s) { + return s->full_name; +} -#undef s_VALIDATE -#undef b_VALIDATE -#undef F -#undef TAGBYTES -#undef FASTDECODE_LONGSTRING -#undef FASTDECODE_COPYSTRING -#undef FASTDECODE_STRING +const char* upb_ServiceDef_Name(const upb_ServiceDef* s) { + return _upb_DefBuilder_FullToShort(s->full_name); +} -/* message fields *************************************************************/ +int upb_ServiceDef_Index(const upb_ServiceDef* s) { return s->index; } -UPB_INLINE -upb_Message* decode_newmsg_ceil(upb_Decoder* d, const upb_MiniTable* l, - int msg_ceil_bytes) { - size_t size = l->size + sizeof(upb_Message_Internal); - char* msg_data; - if (UPB_LIKELY(msg_ceil_bytes > 0 && - _upb_ArenaHas(&d->arena) >= msg_ceil_bytes)) { - UPB_ASSERT(size <= (size_t)msg_ceil_bytes); - msg_data = d->arena.head.ptr; - d->arena.head.ptr += size; - UPB_UNPOISON_MEMORY_REGION(msg_data, msg_ceil_bytes); - memset(msg_data, 0, msg_ceil_bytes); - UPB_POISON_MEMORY_REGION(msg_data + size, msg_ceil_bytes - size); - } else { - msg_data = (char*)upb_Arena_Malloc(&d->arena, size); - memset(msg_data, 0, size); - } - return msg_data + sizeof(upb_Message_Internal); +const upb_FileDef* upb_ServiceDef_File(const upb_ServiceDef* s) { + return s->file; } -typedef struct { - intptr_t table; - upb_Message* msg; -} fastdecode_submsgdata; +int upb_ServiceDef_MethodCount(const upb_ServiceDef* s) { + return s->method_count; +} -UPB_FORCEINLINE -static const char* fastdecode_tosubmsg(upb_EpsCopyInputStream* e, - const char* ptr, void* ctx) { - upb_Decoder* d = (upb_Decoder*)e; - fastdecode_submsgdata* submsg = ctx; - ptr = fastdecode_dispatch(d, ptr, submsg->msg, submsg->table, 0, 0); - UPB_ASSUME(ptr != NULL); - return ptr; +const upb_MethodDef* upb_ServiceDef_Method(const upb_ServiceDef* s, int i) { + return (i < 0 || i >= s->method_count) ? NULL + : _upb_MethodDef_At(s->methods, i); } -#define FASTDECODE_SUBMSG(d, ptr, msg, table, hasbits, data, tagbytes, \ - msg_ceil_bytes, card) \ - \ - if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { \ - RETURN_GENERIC("submessage field tag mismatch\n"); \ - } \ - \ - if (--d->depth == 0) { \ - _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_MaxDepthExceeded); \ - } \ - \ - upb_Message** dst; \ - uint32_t submsg_idx = (data >> 16) & 0xff; \ - const upb_MiniTable* tablep = decode_totablep(table); \ - const upb_MiniTable* subtablep = tablep->subs[submsg_idx].submsg; \ - fastdecode_submsgdata submsg = {decode_totable(subtablep)}; \ - fastdecode_arr farr; \ - \ - if (subtablep->table_mask == (uint8_t)-1) { \ - RETURN_GENERIC("submessage doesn't have fast tables."); \ - } \ - \ - dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, \ - sizeof(upb_Message*), card); \ - \ - if (card == CARD_s) { \ - *(uint32_t*)msg |= hasbits; \ - hasbits = 0; \ - } \ - \ - again: \ - if (card == CARD_r) { \ - dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_Message*)); \ - } \ - \ - submsg.msg = *dst; \ - \ - if (card == CARD_r || UPB_LIKELY(!submsg.msg)) { \ - *dst = submsg.msg = decode_newmsg_ceil(d, subtablep, msg_ceil_bytes); \ - } \ - \ - ptr += tagbytes; \ - ptr = fastdecode_delimited(d, ptr, fastdecode_tosubmsg, &submsg); \ - \ - if (UPB_UNLIKELY(ptr == NULL || d->end_group != DECODE_NOGROUP)) { \ - _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \ - } \ - \ - if (card == CARD_r) { \ - fastdecode_nextret ret = fastdecode_nextrepeated( \ - d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_Message*)); \ - switch (ret.next) { \ - case FD_NEXT_SAMEFIELD: \ - dst = ret.dst; \ - goto again; \ - case FD_NEXT_OTHERFIELD: \ - d->depth++; \ - data = ret.tag; \ - UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); \ - case FD_NEXT_ATLIMIT: \ - d->depth++; \ - return ptr; \ - } \ - } \ - \ - d->depth++; \ - UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); - -#define F(card, tagbytes, size_ceil, ceil_arg) \ - const char* upb_p##card##m_##tagbytes##bt_max##size_ceil##b( \ - UPB_PARSE_PARAMS) { \ - FASTDECODE_SUBMSG(d, ptr, msg, table, hasbits, data, tagbytes, ceil_arg, \ - CARD_##card); \ +const upb_MethodDef* upb_ServiceDef_FindMethodByName(const upb_ServiceDef* s, + const char* name) { + for (int i = 0; i < s->method_count; i++) { + const upb_MethodDef* m = _upb_MethodDef_At(s->methods, i); + if (strcmp(name, upb_MethodDef_Name(m)) == 0) { + return m; + } } + return NULL; +} -#define SIZES(card, tagbytes) \ - F(card, tagbytes, 64, 64) \ - F(card, tagbytes, 128, 128) \ - F(card, tagbytes, 192, 192) \ - F(card, tagbytes, 256, 256) \ - F(card, tagbytes, max, -1) +static void create_service(upb_DefBuilder* ctx, + const UPB_DESC(ServiceDescriptorProto) * svc_proto, + upb_ServiceDef* s) { + upb_StringView name; + size_t n; -#define TAGBYTES(card) \ - SIZES(card, 1) \ - SIZES(card, 2) + // Must happen before _upb_DefBuilder_Add() + s->file = _upb_DefBuilder_File(ctx); -TAGBYTES(s) -TAGBYTES(o) -TAGBYTES(r) + name = UPB_DESC(ServiceDescriptorProto_name)(svc_proto); + const char* package = _upb_FileDef_RawPackage(s->file); + s->full_name = _upb_DefBuilder_MakeFullName(ctx, package, name); + _upb_DefBuilder_Add(ctx, s->full_name, + _upb_DefType_Pack(s, UPB_DEFTYPE_SERVICE)); -#undef TAGBYTES -#undef SIZES -#undef F -#undef FASTDECODE_SUBMSG + const UPB_DESC(MethodDescriptorProto)* const* methods = + UPB_DESC(ServiceDescriptorProto_method)(svc_proto, &n); + s->method_count = n; + s->methods = _upb_MethodDefs_New(ctx, n, methods, s); -#endif /* UPB_FASTTABLE */ + UPB_DEF_SET_OPTIONS(s->opts, ServiceDescriptorProto, ServiceOptions, + svc_proto); +} -// We encode backwards, to avoid pre-computing lengths (one-pass encode). +upb_ServiceDef* _upb_ServiceDefs_New( + upb_DefBuilder* ctx, int n, + const UPB_DESC(ServiceDescriptorProto) * const* protos) { + _upb_DefType_CheckPadding(sizeof(upb_ServiceDef)); + + upb_ServiceDef* s = _upb_DefBuilder_Alloc(ctx, sizeof(upb_ServiceDef) * n); + for (int i = 0; i < n; i++) { + create_service(ctx, protos[i], &s[i]); + s[i].index = i; + } + return s; +} #include @@ -12036,2170 +11254,2962 @@ TAGBYTES(r) // Must be last. -#define UPB_PB_VARINT_MAX_LEN 10 - -UPB_NOINLINE -static size_t encode_varint64(uint64_t val, char* buf) { - size_t i = 0; - do { - uint8_t byte = val & 0x7fU; - val >>= 7; - if (val) byte |= 0x80U; - buf[i++] = byte; - } while (val); - return i; -} +// A few fake field types for our tables. +enum { + kUpb_FakeFieldType_FieldNotFound = 0, + kUpb_FakeFieldType_MessageSetItem = 19, +}; -static uint32_t encode_zz32(int32_t n) { - return ((uint32_t)n << 1) ^ (n >> 31); -} -static uint64_t encode_zz64(int64_t n) { - return ((uint64_t)n << 1) ^ (n >> 63); -} +// DecodeOp: an action to be performed for a wire-type/field-type combination. +enum { + // Special ops: we don't write data to regular fields for these. + kUpb_DecodeOp_UnknownField = -1, + kUpb_DecodeOp_MessageSetItem = -2, -typedef struct { - upb_EncodeStatus status; - jmp_buf err; - upb_Arena* arena; - char *buf, *ptr, *limit; - int options; - int depth; - _upb_mapsorter sorter; -} upb_encstate; + // Scalar-only ops. + kUpb_DecodeOp_Scalar1Byte = 0, + kUpb_DecodeOp_Scalar4Byte = 2, + kUpb_DecodeOp_Scalar8Byte = 3, + kUpb_DecodeOp_Enum = 1, -static size_t upb_roundup_pow2(size_t bytes) { - size_t ret = 128; - while (ret < bytes) { - ret *= 2; - } - return ret; -} + // Scalar/repeated ops. + kUpb_DecodeOp_String = 4, + kUpb_DecodeOp_Bytes = 5, + kUpb_DecodeOp_SubMessage = 6, -UPB_NORETURN static void encode_err(upb_encstate* e, upb_EncodeStatus s) { - UPB_ASSERT(s != kUpb_EncodeStatus_Ok); - e->status = s; - UPB_LONGJMP(e->err, 1); -} + // Repeated-only ops (also see macros below). + kUpb_DecodeOp_PackedEnum = 13, +}; -UPB_NOINLINE -static void encode_growbuffer(upb_encstate* e, size_t bytes) { - size_t old_size = e->limit - e->buf; - size_t new_size = upb_roundup_pow2(bytes + (e->limit - e->ptr)); - char* new_buf = upb_Arena_Realloc(e->arena, e->buf, old_size, new_size); +// For packed fields it is helpful to be able to recover the lg2 of the data +// size from the op. +#define OP_FIXPCK_LG2(n) (n + 5) /* n in [2, 3] => op in [7, 8] */ +#define OP_VARPCK_LG2(n) (n + 9) /* n in [0, 2, 3] => op in [9, 11, 12] */ - if (!new_buf) encode_err(e, kUpb_EncodeStatus_OutOfMemory); +typedef union { + bool bool_val; + uint32_t uint32_val; + uint64_t uint64_val; + uint32_t size; +} wireval; - // We want previous data at the end, realloc() put it at the beginning. - // TODO(salo): This is somewhat inefficient since we are copying twice. - // Maybe create a realloc() that copies to the end of the new buffer? - if (old_size > 0) { - memmove(new_buf + new_size - old_size, e->buf, old_size); - } +static const char* _upb_Decoder_DecodeMessage(upb_Decoder* d, const char* ptr, + upb_Message* msg, + const upb_MiniTable* layout); - e->ptr = new_buf + new_size - (e->limit - e->ptr); - e->limit = new_buf + new_size; - e->buf = new_buf; +UPB_NORETURN static void* _upb_Decoder_ErrorJmp(upb_Decoder* d, + upb_DecodeStatus status) { + assert(status != kUpb_DecodeStatus_Ok); + d->status = status; + UPB_LONGJMP(d->err, 1); +} - e->ptr -= bytes; +const char* _upb_FastDecoder_ErrorJmp(upb_Decoder* d, int status) { + assert(status != kUpb_DecodeStatus_Ok); + d->status = status; + UPB_LONGJMP(d->err, 1); + return NULL; } -/* Call to ensure that at least "bytes" bytes are available for writing at - * e->ptr. Returns false if the bytes could not be allocated. */ -UPB_FORCEINLINE -static void encode_reserve(upb_encstate* e, size_t bytes) { - if ((size_t)(e->ptr - e->buf) < bytes) { - encode_growbuffer(e, bytes); - return; +static void _upb_Decoder_VerifyUtf8(upb_Decoder* d, const char* buf, int len) { + if (!_upb_Decoder_VerifyUtf8Inline(buf, len)) { + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_BadUtf8); } - - e->ptr -= bytes; } -/* Writes the given bytes to the buffer, handling reserve/advance. */ -static void encode_bytes(upb_encstate* e, const void* data, size_t len) { - if (len == 0) return; /* memcpy() with zero size is UB */ - encode_reserve(e, len); - memcpy(e->ptr, data, len); +static bool _upb_Decoder_Reserve(upb_Decoder* d, upb_Array* arr, size_t elem) { + bool need_realloc = arr->capacity - arr->size < elem; + if (need_realloc && !_upb_array_realloc(arr, arr->size + elem, &d->arena)) { + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); + } + return need_realloc; } -static void encode_fixed64(upb_encstate* e, uint64_t val) { - val = _upb_BigEndian_Swap64(val); - encode_bytes(e, &val, sizeof(uint64_t)); -} - -static void encode_fixed32(upb_encstate* e, uint32_t val) { - val = _upb_BigEndian_Swap32(val); - encode_bytes(e, &val, sizeof(uint32_t)); -} +typedef struct { + const char* ptr; + uint64_t val; +} _upb_DecodeLongVarintReturn; UPB_NOINLINE -static void encode_longvarint(upb_encstate* e, uint64_t val) { - size_t len; - char* start; - - encode_reserve(e, UPB_PB_VARINT_MAX_LEN); - len = encode_varint64(val, e->ptr); - start = e->ptr + UPB_PB_VARINT_MAX_LEN - len; - memmove(start, e->ptr, len); - e->ptr = start; +static _upb_DecodeLongVarintReturn _upb_Decoder_DecodeLongVarint( + const char* ptr, uint64_t val) { + _upb_DecodeLongVarintReturn ret = {NULL, 0}; + uint64_t byte; + int i; + for (i = 1; i < 10; i++) { + byte = (uint8_t)ptr[i]; + val += (byte - 1) << (i * 7); + if (!(byte & 0x80)) { + ret.ptr = ptr + i + 1; + ret.val = val; + return ret; + } + } + return ret; } UPB_FORCEINLINE -static void encode_varint(upb_encstate* e, uint64_t val) { - if (val < 128 && e->ptr != e->buf) { - --e->ptr; - *e->ptr = val; +static const char* _upb_Decoder_DecodeVarint(upb_Decoder* d, const char* ptr, + uint64_t* val) { + uint64_t byte = (uint8_t)*ptr; + if (UPB_LIKELY((byte & 0x80) == 0)) { + *val = byte; + return ptr + 1; } else { - encode_longvarint(e, val); + _upb_DecodeLongVarintReturn res = _upb_Decoder_DecodeLongVarint(ptr, byte); + if (!res.ptr) _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); + *val = res.val; + return res.ptr; } } -static void encode_double(upb_encstate* e, double d) { - uint64_t u64; - UPB_ASSERT(sizeof(double) == sizeof(uint64_t)); - memcpy(&u64, &d, sizeof(uint64_t)); - encode_fixed64(e, u64); -} - -static void encode_float(upb_encstate* e, float d) { - uint32_t u32; - UPB_ASSERT(sizeof(float) == sizeof(uint32_t)); - memcpy(&u32, &d, sizeof(uint32_t)); - encode_fixed32(e, u32); -} - -static void encode_tag(upb_encstate* e, uint32_t field_number, - uint8_t wire_type) { - encode_varint(e, (field_number << 3) | wire_type); -} - -static void encode_fixedarray(upb_encstate* e, const upb_Array* arr, - size_t elem_size, uint32_t tag) { - size_t bytes = arr->size * elem_size; - const char* data = _upb_array_constptr(arr); - const char* ptr = data + bytes - elem_size; - - if (tag || !_upb_IsLittleEndian()) { - while (true) { - if (elem_size == 4) { - uint32_t val; - memcpy(&val, ptr, sizeof(val)); - val = _upb_BigEndian_Swap32(val); - encode_bytes(e, &val, elem_size); - } else { - UPB_ASSERT(elem_size == 8); - uint64_t val; - memcpy(&val, ptr, sizeof(val)); - val = _upb_BigEndian_Swap64(val); - encode_bytes(e, &val, elem_size); - } - - if (tag) encode_varint(e, tag); - if (ptr == data) break; - ptr -= elem_size; - } +UPB_FORCEINLINE +static const char* _upb_Decoder_DecodeTag(upb_Decoder* d, const char* ptr, + uint32_t* val) { + uint64_t byte = (uint8_t)*ptr; + if (UPB_LIKELY((byte & 0x80) == 0)) { + *val = byte; + return ptr + 1; } else { - encode_bytes(e, data, bytes); + const char* start = ptr; + _upb_DecodeLongVarintReturn res = _upb_Decoder_DecodeLongVarint(ptr, byte); + if (!res.ptr || res.ptr - start > 5 || res.val > UINT32_MAX) { + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); + } + *val = res.val; + return res.ptr; } } -static void encode_message(upb_encstate* e, const upb_Message* msg, - const upb_MiniTable* m, size_t* size); - -static void encode_TaggedMessagePtr(upb_encstate* e, - upb_TaggedMessagePtr tagged, - const upb_MiniTable* m, size_t* size) { - if (upb_TaggedMessagePtr_IsEmpty(tagged)) { - m = &_kUpb_MiniTable_Empty; +UPB_FORCEINLINE +static const char* upb_Decoder_DecodeSize(upb_Decoder* d, const char* ptr, + uint32_t* size) { + uint64_t size64; + ptr = _upb_Decoder_DecodeVarint(d, ptr, &size64); + if (size64 >= INT32_MAX || + !upb_EpsCopyInputStream_CheckSize(&d->input, ptr, (int)size64)) { + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); } - encode_message(e, _upb_TaggedMessagePtr_GetMessage(tagged), m, size); + *size = size64; + return ptr; } -static void encode_scalar(upb_encstate* e, const void* _field_mem, - const upb_MiniTableSub* subs, - const upb_MiniTableField* f) { - const char* field_mem = _field_mem; - int wire_type; - -#define CASE(ctype, type, wtype, encodeval) \ - { \ - ctype val = *(ctype*)field_mem; \ - encode_##type(e, encodeval); \ - wire_type = wtype; \ - break; \ +static void _upb_Decoder_MungeInt32(wireval* val) { + if (!_upb_IsLittleEndian()) { + /* The next stage will memcpy(dst, &val, 4) */ + val->uint32_val = val->uint64_val; } +} - switch (f->UPB_PRIVATE(descriptortype)) { - case kUpb_FieldType_Double: - CASE(double, double, kUpb_WireType_64Bit, val); - case kUpb_FieldType_Float: - CASE(float, float, kUpb_WireType_32Bit, val); - case kUpb_FieldType_Int64: - case kUpb_FieldType_UInt64: - CASE(uint64_t, varint, kUpb_WireType_Varint, val); - case kUpb_FieldType_UInt32: - CASE(uint32_t, varint, kUpb_WireType_Varint, val); - case kUpb_FieldType_Int32: - case kUpb_FieldType_Enum: - CASE(int32_t, varint, kUpb_WireType_Varint, (int64_t)val); - case kUpb_FieldType_SFixed64: - case kUpb_FieldType_Fixed64: - CASE(uint64_t, fixed64, kUpb_WireType_64Bit, val); - case kUpb_FieldType_Fixed32: - case kUpb_FieldType_SFixed32: - CASE(uint32_t, fixed32, kUpb_WireType_32Bit, val); +static void _upb_Decoder_Munge(int type, wireval* val) { + switch (type) { case kUpb_FieldType_Bool: - CASE(bool, varint, kUpb_WireType_Varint, val); - case kUpb_FieldType_SInt32: - CASE(int32_t, varint, kUpb_WireType_Varint, encode_zz32(val)); - case kUpb_FieldType_SInt64: - CASE(int64_t, varint, kUpb_WireType_Varint, encode_zz64(val)); - case kUpb_FieldType_String: - case kUpb_FieldType_Bytes: { - upb_StringView view = *(upb_StringView*)field_mem; - encode_bytes(e, view.data, view.size); - encode_varint(e, view.size); - wire_type = kUpb_WireType_Delimited; + val->bool_val = val->uint64_val != 0; break; - } - case kUpb_FieldType_Group: { - size_t size; - upb_TaggedMessagePtr submsg = *(upb_TaggedMessagePtr*)field_mem; - const upb_MiniTable* subm = subs[f->UPB_PRIVATE(submsg_index)].submsg; - if (submsg == 0) { - return; - } - if (--e->depth == 0) encode_err(e, kUpb_EncodeStatus_MaxDepthExceeded); - encode_tag(e, f->number, kUpb_WireType_EndGroup); - encode_TaggedMessagePtr(e, submsg, subm, &size); - wire_type = kUpb_WireType_StartGroup; - e->depth++; + case kUpb_FieldType_SInt32: { + uint32_t n = val->uint64_val; + val->uint32_val = (n >> 1) ^ -(int32_t)(n & 1); break; } - case kUpb_FieldType_Message: { - size_t size; - upb_TaggedMessagePtr submsg = *(upb_TaggedMessagePtr*)field_mem; - const upb_MiniTable* subm = subs[f->UPB_PRIVATE(submsg_index)].submsg; - if (submsg == 0) { - return; - } - if (--e->depth == 0) encode_err(e, kUpb_EncodeStatus_MaxDepthExceeded); - encode_TaggedMessagePtr(e, submsg, subm, &size); - encode_varint(e, size); - wire_type = kUpb_WireType_Delimited; - e->depth++; + case kUpb_FieldType_SInt64: { + uint64_t n = val->uint64_val; + val->uint64_val = (n >> 1) ^ -(int64_t)(n & 1); break; } - default: - UPB_UNREACHABLE(); + case kUpb_FieldType_Int32: + case kUpb_FieldType_UInt32: + case kUpb_FieldType_Enum: + _upb_Decoder_MungeInt32(val); + break; } -#undef CASE - - encode_tag(e, f->number, wire_type); } -static void encode_array(upb_encstate* e, const upb_Message* msg, - const upb_MiniTableSub* subs, - const upb_MiniTableField* f) { - const upb_Array* arr = *UPB_PTR_AT(msg, f->offset, upb_Array*); - bool packed = f->mode & kUpb_LabelFlags_IsPacked; - size_t pre_len = e->limit - e->ptr; +static upb_Message* _upb_Decoder_NewSubMessage(upb_Decoder* d, + const upb_MiniTableSub* subs, + const upb_MiniTableField* field, + upb_TaggedMessagePtr* target) { + const upb_MiniTable* subl = subs[field->UPB_PRIVATE(submsg_index)].submsg; + UPB_ASSERT(subl); + upb_Message* msg = _upb_Message_New(subl, &d->arena); + if (!msg) _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); - if (arr == NULL || arr->size == 0) { - return; - } + // Extensions should not be unlinked. A message extension should not be + // registered until its sub-message type is available to be linked. + bool is_empty = subl == &_kUpb_MiniTable_Empty; + bool is_extension = field->mode & kUpb_LabelFlags_IsExtension; + UPB_ASSERT(!(is_empty && is_extension)); -#define VARINT_CASE(ctype, encode) \ - { \ - const ctype* start = _upb_array_constptr(arr); \ - const ctype* ptr = start + arr->size; \ - uint32_t tag = packed ? 0 : (f->number << 3) | kUpb_WireType_Varint; \ - do { \ - ptr--; \ - encode_varint(e, encode); \ - if (tag) encode_varint(e, tag); \ - } while (ptr != start); \ - } \ - break; + if (is_empty && !(d->options & kUpb_DecodeOption_ExperimentalAllowUnlinked)) { + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_UnlinkedSubMessage); + } -#define TAG(wire_type) (packed ? 0 : (f->number << 3 | wire_type)) + upb_TaggedMessagePtr tagged = _upb_TaggedMessagePtr_Pack(msg, is_empty); + memcpy(target, &tagged, sizeof(tagged)); + return msg; +} - switch (f->UPB_PRIVATE(descriptortype)) { - case kUpb_FieldType_Double: - encode_fixedarray(e, arr, sizeof(double), TAG(kUpb_WireType_64Bit)); - break; - case kUpb_FieldType_Float: - encode_fixedarray(e, arr, sizeof(float), TAG(kUpb_WireType_32Bit)); - break; - case kUpb_FieldType_SFixed64: - case kUpb_FieldType_Fixed64: - encode_fixedarray(e, arr, sizeof(uint64_t), TAG(kUpb_WireType_64Bit)); - break; - case kUpb_FieldType_Fixed32: - case kUpb_FieldType_SFixed32: - encode_fixedarray(e, arr, sizeof(uint32_t), TAG(kUpb_WireType_32Bit)); - break; - case kUpb_FieldType_Int64: - case kUpb_FieldType_UInt64: - VARINT_CASE(uint64_t, *ptr); - case kUpb_FieldType_UInt32: - VARINT_CASE(uint32_t, *ptr); - case kUpb_FieldType_Int32: - case kUpb_FieldType_Enum: - VARINT_CASE(int32_t, (int64_t)*ptr); - case kUpb_FieldType_Bool: - VARINT_CASE(bool, *ptr); - case kUpb_FieldType_SInt32: - VARINT_CASE(int32_t, encode_zz32(*ptr)); - case kUpb_FieldType_SInt64: - VARINT_CASE(int64_t, encode_zz64(*ptr)); - case kUpb_FieldType_String: - case kUpb_FieldType_Bytes: { - const upb_StringView* start = _upb_array_constptr(arr); - const upb_StringView* ptr = start + arr->size; - do { - ptr--; - encode_bytes(e, ptr->data, ptr->size); - encode_varint(e, ptr->size); - encode_tag(e, f->number, kUpb_WireType_Delimited); - } while (ptr != start); - return; - } - case kUpb_FieldType_Group: { - const upb_TaggedMessagePtr* start = _upb_array_constptr(arr); - const upb_TaggedMessagePtr* ptr = start + arr->size; - const upb_MiniTable* subm = subs[f->UPB_PRIVATE(submsg_index)].submsg; - if (--e->depth == 0) encode_err(e, kUpb_EncodeStatus_MaxDepthExceeded); - do { - size_t size; - ptr--; - encode_tag(e, f->number, kUpb_WireType_EndGroup); - encode_TaggedMessagePtr(e, *ptr, subm, &size); - encode_tag(e, f->number, kUpb_WireType_StartGroup); - } while (ptr != start); - e->depth++; - return; - } - case kUpb_FieldType_Message: { - const upb_TaggedMessagePtr* start = _upb_array_constptr(arr); - const upb_TaggedMessagePtr* ptr = start + arr->size; - const upb_MiniTable* subm = subs[f->UPB_PRIVATE(submsg_index)].submsg; - if (--e->depth == 0) encode_err(e, kUpb_EncodeStatus_MaxDepthExceeded); - do { - size_t size; - ptr--; - encode_TaggedMessagePtr(e, *ptr, subm, &size); - encode_varint(e, size); - encode_tag(e, f->number, kUpb_WireType_Delimited); - } while (ptr != start); - e->depth++; - return; - } - } -#undef VARINT_CASE - - if (packed) { - encode_varint(e, e->limit - e->ptr - pre_len); - encode_tag(e, f->number, kUpb_WireType_Delimited); +static upb_Message* _upb_Decoder_ReuseSubMessage( + upb_Decoder* d, const upb_MiniTableSub* subs, + const upb_MiniTableField* field, upb_TaggedMessagePtr* target) { + upb_TaggedMessagePtr tagged = *target; + const upb_MiniTable* subl = subs[field->UPB_PRIVATE(submsg_index)].submsg; + UPB_ASSERT(subl); + if (!upb_TaggedMessagePtr_IsEmpty(tagged) || subl == &_kUpb_MiniTable_Empty) { + return _upb_TaggedMessagePtr_GetMessage(tagged); } -} -static void encode_mapentry(upb_encstate* e, uint32_t number, - const upb_MiniTable* layout, - const upb_MapEntry* ent) { - const upb_MiniTableField* key_field = &layout->fields[0]; - const upb_MiniTableField* val_field = &layout->fields[1]; - size_t pre_len = e->limit - e->ptr; + // We found an empty message from a previous parse that was performed before + // this field was linked. But it is linked now, so we want to allocate a new + // message of the correct type and promote data into it before continuing. + upb_Message* existing = _upb_TaggedMessagePtr_GetEmptyMessage(tagged); + upb_Message* promoted = _upb_Decoder_NewSubMessage(d, subs, field, target); size_t size; - encode_scalar(e, &ent->data.v, layout->subs, val_field); - encode_scalar(e, &ent->data.k, layout->subs, key_field); - size = (e->limit - e->ptr) - pre_len; - encode_varint(e, size); - encode_tag(e, number, kUpb_WireType_Delimited); + const char* unknown = upb_Message_GetUnknown(existing, &size); + upb_DecodeStatus status = upb_Decode(unknown, size, promoted, subl, d->extreg, + d->options, &d->arena); + if (status != kUpb_DecodeStatus_Ok) _upb_Decoder_ErrorJmp(d, status); + return promoted; } -static void encode_map(upb_encstate* e, const upb_Message* msg, - const upb_MiniTableSub* subs, - const upb_MiniTableField* f) { - const upb_Map* map = *UPB_PTR_AT(msg, f->offset, const upb_Map*); - const upb_MiniTable* layout = subs[f->UPB_PRIVATE(submsg_index)].submsg; - UPB_ASSERT(layout->field_count == 2); - - if (map == NULL) return; - - if (e->options & kUpb_EncodeOption_Deterministic) { - _upb_sortedmap sorted; - _upb_mapsorter_pushmap(&e->sorter, - layout->fields[0].UPB_PRIVATE(descriptortype), map, - &sorted); - upb_MapEntry ent; - while (_upb_sortedmap_next(&e->sorter, map, &sorted, &ent)) { - encode_mapentry(e, f->number, layout, &ent); - } - _upb_mapsorter_popmap(&e->sorter, &sorted); - } else { - intptr_t iter = UPB_STRTABLE_BEGIN; - upb_StringView key; - upb_value val; - while (upb_strtable_next2(&map->table, &key, &val, &iter)) { - upb_MapEntry ent; - _upb_map_fromkey(key, &ent.data.k, map->key_size); - _upb_map_fromvalue(val, &ent.data.v, map->val_size); - encode_mapentry(e, f->number, layout, &ent); - } - } +static const char* _upb_Decoder_ReadString(upb_Decoder* d, const char* ptr, + int size, upb_StringView* str) { + const char* str_ptr = ptr; + ptr = upb_EpsCopyInputStream_ReadString(&d->input, &str_ptr, size, &d->arena); + if (!ptr) _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); + str->data = str_ptr; + str->size = size; + return ptr; } -static bool encode_shouldencode(upb_encstate* e, const upb_Message* msg, - const upb_MiniTableSub* subs, - const upb_MiniTableField* f) { - if (f->presence == 0) { - /* Proto3 presence or map/array. */ - const void* mem = UPB_PTR_AT(msg, f->offset, void); - switch (_upb_MiniTableField_GetRep(f)) { - case kUpb_FieldRep_1Byte: { - char ch; - memcpy(&ch, mem, 1); - return ch != 0; - } - case kUpb_FieldRep_4Byte: { - uint32_t u32; - memcpy(&u32, mem, 4); - return u32 != 0; - } - case kUpb_FieldRep_8Byte: { - uint64_t u64; - memcpy(&u64, mem, 8); - return u64 != 0; - } - case kUpb_FieldRep_StringView: { - const upb_StringView* str = (const upb_StringView*)mem; - return str->size != 0; - } - default: - UPB_UNREACHABLE(); - } - } else if (f->presence > 0) { - /* Proto2 presence: hasbit. */ - return _upb_hasbit_field(msg, f); - } else { - /* Field is in a oneof. */ - return _upb_getoneofcase_field(msg, f) == f->number; +UPB_FORCEINLINE +static const char* _upb_Decoder_RecurseSubMessage(upb_Decoder* d, + const char* ptr, + upb_Message* submsg, + const upb_MiniTable* subl, + uint32_t expected_end_group) { + if (--d->depth < 0) { + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_MaxDepthExceeded); } -} - -static void encode_field(upb_encstate* e, const upb_Message* msg, - const upb_MiniTableSub* subs, - const upb_MiniTableField* field) { - switch (upb_FieldMode_Get(field)) { - case kUpb_FieldMode_Array: - encode_array(e, msg, subs, field); - break; - case kUpb_FieldMode_Map: - encode_map(e, msg, subs, field); - break; - case kUpb_FieldMode_Scalar: - encode_scalar(e, UPB_PTR_AT(msg, field->offset, void), subs, field); - break; - default: - UPB_UNREACHABLE(); + ptr = _upb_Decoder_DecodeMessage(d, ptr, submsg, subl); + d->depth++; + if (d->end_group != expected_end_group) { + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); } + return ptr; } -static void encode_msgset_item(upb_encstate* e, - const upb_Message_Extension* ext) { - size_t size; - encode_tag(e, kUpb_MsgSet_Item, kUpb_WireType_EndGroup); - encode_message(e, ext->data.ptr, ext->ext->sub.submsg, &size); - encode_varint(e, size); - encode_tag(e, kUpb_MsgSet_Message, kUpb_WireType_Delimited); - encode_varint(e, ext->ext->field.number); - encode_tag(e, kUpb_MsgSet_TypeId, kUpb_WireType_Varint); - encode_tag(e, kUpb_MsgSet_Item, kUpb_WireType_StartGroup); +UPB_FORCEINLINE +static const char* _upb_Decoder_DecodeSubMessage( + upb_Decoder* d, const char* ptr, upb_Message* submsg, + const upb_MiniTableSub* subs, const upb_MiniTableField* field, int size) { + int saved_delta = upb_EpsCopyInputStream_PushLimit(&d->input, ptr, size); + const upb_MiniTable* subl = subs[field->UPB_PRIVATE(submsg_index)].submsg; + UPB_ASSERT(subl); + ptr = _upb_Decoder_RecurseSubMessage(d, ptr, submsg, subl, DECODE_NOGROUP); + upb_EpsCopyInputStream_PopLimit(&d->input, ptr, saved_delta); + return ptr; } -static void encode_ext(upb_encstate* e, const upb_Message_Extension* ext, - bool is_message_set) { - if (UPB_UNLIKELY(is_message_set)) { - encode_msgset_item(e, ext); - } else { - encode_field(e, &ext->data, &ext->ext->sub, &ext->ext->field); +UPB_FORCEINLINE +static const char* _upb_Decoder_DecodeGroup(upb_Decoder* d, const char* ptr, + upb_Message* submsg, + const upb_MiniTable* subl, + uint32_t number) { + if (_upb_Decoder_IsDone(d, &ptr)) { + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); } + ptr = _upb_Decoder_RecurseSubMessage(d, ptr, submsg, subl, number); + d->end_group = DECODE_NOGROUP; + return ptr; } -static void encode_message(upb_encstate* e, const upb_Message* msg, - const upb_MiniTable* m, size_t* size) { - size_t pre_len = e->limit - e->ptr; +UPB_FORCEINLINE +static const char* _upb_Decoder_DecodeUnknownGroup(upb_Decoder* d, + const char* ptr, + uint32_t number) { + return _upb_Decoder_DecodeGroup(d, ptr, NULL, NULL, number); +} - if ((e->options & kUpb_EncodeOption_CheckRequired) && m->required_count) { - uint64_t msg_head; - memcpy(&msg_head, msg, 8); - msg_head = _upb_BigEndian_Swap64(msg_head); - if (upb_MiniTable_requiredmask(m) & ~msg_head) { - encode_err(e, kUpb_EncodeStatus_MissingRequired); - } - } - - if ((e->options & kUpb_EncodeOption_SkipUnknown) == 0) { - size_t unknown_size; - const char* unknown = upb_Message_GetUnknown(msg, &unknown_size); +UPB_FORCEINLINE +static const char* _upb_Decoder_DecodeKnownGroup( + upb_Decoder* d, const char* ptr, upb_Message* submsg, + const upb_MiniTableSub* subs, const upb_MiniTableField* field) { + const upb_MiniTable* subl = subs[field->UPB_PRIVATE(submsg_index)].submsg; + UPB_ASSERT(subl); + return _upb_Decoder_DecodeGroup(d, ptr, submsg, subl, field->number); +} - if (unknown) { - encode_bytes(e, unknown, unknown_size); - } - } +static char* upb_Decoder_EncodeVarint32(uint32_t val, char* ptr) { + do { + uint8_t byte = val & 0x7fU; + val >>= 7; + if (val) byte |= 0x80U; + *(ptr++) = byte; + } while (val); + return ptr; +} - if (m->ext != kUpb_ExtMode_NonExtendable) { - /* Encode all extensions together. Unlike C++, we do not attempt to keep - * these in field number order relative to normal fields or even to each - * other. */ - size_t ext_count; - const upb_Message_Extension* ext = _upb_Message_Getexts(msg, &ext_count); - if (ext_count) { - if (e->options & kUpb_EncodeOption_Deterministic) { - _upb_sortedmap sorted; - _upb_mapsorter_pushexts(&e->sorter, ext, ext_count, &sorted); - while (_upb_sortedmap_nextext(&e->sorter, &sorted, &ext)) { - encode_ext(e, ext, m->ext == kUpb_ExtMode_IsMessageSet); - } - _upb_mapsorter_popmap(&e->sorter, &sorted); - } else { - const upb_Message_Extension* end = ext + ext_count; - for (; ext != end; ext++) { - encode_ext(e, ext, m->ext == kUpb_ExtMode_IsMessageSet); - } - } - } - } +static void _upb_Decoder_AddUnknownVarints(upb_Decoder* d, upb_Message* msg, + uint32_t val1, uint32_t val2) { + char buf[20]; + char* end = buf; + end = upb_Decoder_EncodeVarint32(val1, end); + end = upb_Decoder_EncodeVarint32(val2, end); - if (m->field_count) { - const upb_MiniTableField* f = &m->fields[m->field_count]; - const upb_MiniTableField* first = &m->fields[0]; - while (f != first) { - f--; - if (encode_shouldencode(e, msg, m->subs, f)) { - encode_field(e, msg, m->subs, f); - } - } + if (!_upb_Message_AddUnknown(msg, buf, end - buf, &d->arena)) { + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); } - - *size = (e->limit - e->ptr) - pre_len; } -static upb_EncodeStatus upb_Encoder_Encode(upb_encstate* const encoder, - const void* const msg, - const upb_MiniTable* const l, - char** const buf, - size_t* const size) { - // Unfortunately we must continue to perform hackery here because there are - // code paths which blindly copy the returned pointer without bothering to - // check for errors until much later (b/235839510). So we still set *buf to - // NULL on error and we still set it to non-NULL on a successful empty result. - if (UPB_SETJMP(encoder->err) == 0) { - encode_message(encoder, msg, l, size); - *size = encoder->limit - encoder->ptr; - if (*size == 0) { - static char ch; - *buf = &ch; - } else { - UPB_ASSERT(encoder->ptr); - *buf = encoder->ptr; - } - } else { - UPB_ASSERT(encoder->status != kUpb_EncodeStatus_Ok); - *buf = NULL; - *size = 0; - } +UPB_NOINLINE +static bool _upb_Decoder_CheckEnumSlow(upb_Decoder* d, const char* ptr, + upb_Message* msg, + const upb_MiniTableEnum* e, + const upb_MiniTableField* field, + uint32_t v) { + if (_upb_MiniTable_CheckEnumValueSlow(e, v)) return true; - _upb_mapsorter_destroy(&encoder->sorter); - return encoder->status; + // Unrecognized enum goes into unknown fields. + // For packed fields the tag could be arbitrarily far in the past, so we + // just re-encode the tag and value here. + uint32_t tag = ((uint32_t)field->number << 3) | kUpb_WireType_Varint; + upb_Message* unknown_msg = + field->mode & kUpb_LabelFlags_IsExtension ? d->unknown_msg : msg; + _upb_Decoder_AddUnknownVarints(d, unknown_msg, tag, v); + return false; } -upb_EncodeStatus upb_Encode(const void* msg, const upb_MiniTable* l, - int options, upb_Arena* arena, char** buf, - size_t* size) { - upb_encstate e; - unsigned depth = (unsigned)options >> 16; - - e.status = kUpb_EncodeStatus_Ok; - e.arena = arena; - e.buf = NULL; - e.limit = NULL; - e.ptr = NULL; - e.depth = depth ? depth : kUpb_WireFormat_DefaultDepthLimit; - e.options = options; - _upb_mapsorter_init(&e.sorter); +UPB_FORCEINLINE +static bool _upb_Decoder_CheckEnum(upb_Decoder* d, const char* ptr, + upb_Message* msg, const upb_MiniTableEnum* e, + const upb_MiniTableField* field, + wireval* val) { + uint32_t v = val->uint32_val; - return upb_Encoder_Encode(&e, msg, l, buf, size); + _kUpb_FastEnumCheck_Status status = _upb_MiniTable_CheckEnumValueFast(e, v); + if (UPB_LIKELY(status == _kUpb_FastEnumCheck_ValueIsInEnum)) return true; + return _upb_Decoder_CheckEnumSlow(d, ptr, msg, e, field, v); } +UPB_NOINLINE +static const char* _upb_Decoder_DecodeEnumArray(upb_Decoder* d, const char* ptr, + upb_Message* msg, + upb_Array* arr, + const upb_MiniTableSub* subs, + const upb_MiniTableField* field, + wireval* val) { + const upb_MiniTableEnum* e = subs[field->UPB_PRIVATE(submsg_index)].subenum; + if (!_upb_Decoder_CheckEnum(d, ptr, msg, e, field, val)) return ptr; + void* mem = UPB_PTR_AT(_upb_array_ptr(arr), arr->size * 4, void); + arr->size++; + memcpy(mem, val, 4); + return ptr; +} +UPB_FORCEINLINE +static const char* _upb_Decoder_DecodeFixedPacked( + upb_Decoder* d, const char* ptr, upb_Array* arr, wireval* val, + const upb_MiniTableField* field, int lg2) { + int mask = (1 << lg2) - 1; + size_t count = val->size >> lg2; + if ((val->size & mask) != 0) { + // Length isn't a round multiple of elem size. + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); + } + _upb_Decoder_Reserve(d, arr, count); + void* mem = UPB_PTR_AT(_upb_array_ptr(arr), arr->size << lg2, void); + arr->size += count; + // Note: if/when the decoder supports multi-buffer input, we will need to + // handle buffer seams here. + if (_upb_IsLittleEndian()) { + ptr = upb_EpsCopyInputStream_Copy(&d->input, ptr, mem, val->size); + } else { + int delta = upb_EpsCopyInputStream_PushLimit(&d->input, ptr, val->size); + char* dst = mem; + while (!_upb_Decoder_IsDone(d, &ptr)) { + if (lg2 == 2) { + ptr = upb_WireReader_ReadFixed32(ptr, dst); + dst += 4; + } else { + UPB_ASSERT(lg2 == 3); + ptr = upb_WireReader_ReadFixed64(ptr, dst); + dst += 8; + } + } + upb_EpsCopyInputStream_PopLimit(&d->input, ptr, delta); + } -// Must be last. + return ptr; +} -UPB_NOINLINE _upb_WireReader_ReadLongVarintRet -_upb_WireReader_ReadLongVarint(const char* ptr, uint64_t val) { - _upb_WireReader_ReadLongVarintRet ret = {NULL, 0}; - uint64_t byte; - int i; - for (i = 1; i < 10; i++) { - byte = (uint8_t)ptr[i]; - val += (byte - 1) << (i * 7); - if (!(byte & 0x80)) { - ret.ptr = ptr + i + 1; - ret.val = val; - return ret; +UPB_FORCEINLINE +static const char* _upb_Decoder_DecodeVarintPacked( + upb_Decoder* d, const char* ptr, upb_Array* arr, wireval* val, + const upb_MiniTableField* field, int lg2) { + int scale = 1 << lg2; + int saved_limit = upb_EpsCopyInputStream_PushLimit(&d->input, ptr, val->size); + char* out = UPB_PTR_AT(_upb_array_ptr(arr), arr->size << lg2, void); + while (!_upb_Decoder_IsDone(d, &ptr)) { + wireval elem; + ptr = _upb_Decoder_DecodeVarint(d, ptr, &elem.uint64_val); + _upb_Decoder_Munge(field->UPB_PRIVATE(descriptortype), &elem); + if (_upb_Decoder_Reserve(d, arr, 1)) { + out = UPB_PTR_AT(_upb_array_ptr(arr), arr->size << lg2, void); } + arr->size++; + memcpy(out, &elem, scale); + out += scale; } - return ret; + upb_EpsCopyInputStream_PopLimit(&d->input, ptr, saved_limit); + return ptr; } -const char* _upb_WireReader_SkipGroup(const char* ptr, uint32_t tag, - int depth_limit, - upb_EpsCopyInputStream* stream) { - if (--depth_limit == 0) return NULL; - uint32_t end_group_tag = (tag & ~7ULL) | kUpb_WireType_EndGroup; - while (!upb_EpsCopyInputStream_IsDone(stream, &ptr)) { - uint32_t tag; - ptr = upb_WireReader_ReadTag(ptr, &tag); - if (!ptr) return NULL; - if (tag == end_group_tag) return ptr; - ptr = _upb_WireReader_SkipValue(ptr, tag, depth_limit, stream); - if (!ptr) return NULL; +UPB_NOINLINE +static const char* _upb_Decoder_DecodeEnumPacked( + upb_Decoder* d, const char* ptr, upb_Message* msg, upb_Array* arr, + const upb_MiniTableSub* subs, const upb_MiniTableField* field, + wireval* val) { + const upb_MiniTableEnum* e = subs[field->UPB_PRIVATE(submsg_index)].subenum; + int saved_limit = upb_EpsCopyInputStream_PushLimit(&d->input, ptr, val->size); + char* out = UPB_PTR_AT(_upb_array_ptr(arr), arr->size * 4, void); + while (!_upb_Decoder_IsDone(d, &ptr)) { + wireval elem; + ptr = _upb_Decoder_DecodeVarint(d, ptr, &elem.uint64_val); + _upb_Decoder_MungeInt32(&elem); + if (!_upb_Decoder_CheckEnum(d, ptr, msg, e, field, &elem)) { + continue; + } + if (_upb_Decoder_Reserve(d, arr, 1)) { + out = UPB_PTR_AT(_upb_array_ptr(arr), arr->size * 4, void); + } + arr->size++; + memcpy(out, &elem, 4); + out += 4; } + upb_EpsCopyInputStream_PopLimit(&d->input, ptr, saved_limit); return ptr; } +upb_Array* _upb_Decoder_CreateArray(upb_Decoder* d, + const upb_MiniTableField* field) { + /* Maps descriptor type -> elem_size_lg2. */ + static const uint8_t kElemSizeLg2[] = { + [0] = -1, // invalid descriptor type + [kUpb_FieldType_Double] = 3, + [kUpb_FieldType_Float] = 2, + [kUpb_FieldType_Int64] = 3, + [kUpb_FieldType_UInt64] = 3, + [kUpb_FieldType_Int32] = 2, + [kUpb_FieldType_Fixed64] = 3, + [kUpb_FieldType_Fixed32] = 2, + [kUpb_FieldType_Bool] = 0, + [kUpb_FieldType_String] = UPB_SIZE(3, 4), + [kUpb_FieldType_Group] = UPB_SIZE(2, 3), + [kUpb_FieldType_Message] = UPB_SIZE(2, 3), + [kUpb_FieldType_Bytes] = UPB_SIZE(3, 4), + [kUpb_FieldType_UInt32] = 2, + [kUpb_FieldType_Enum] = 2, + [kUpb_FieldType_SFixed32] = 2, + [kUpb_FieldType_SFixed64] = 3, + [kUpb_FieldType_SInt32] = 2, + [kUpb_FieldType_SInt64] = 3, + }; - -// Must be last. - -typedef struct { - uint64_t present_values_mask; - uint32_t last_written_value; -} upb_MtDataEncoderInternal_EnumState; - -typedef struct { - uint64_t msg_modifiers; - uint32_t last_field_num; - enum { - kUpb_OneofState_NotStarted, - kUpb_OneofState_StartedOneof, - kUpb_OneofState_EmittedOneofField, - } oneof_state; -} upb_MtDataEncoderInternal_MsgState; - -typedef struct { - char* buf_start; // Only for checking kUpb_MtDataEncoder_MinSize. - union { - upb_MtDataEncoderInternal_EnumState enum_state; - upb_MtDataEncoderInternal_MsgState msg_state; - } state; -} upb_MtDataEncoderInternal; - -static upb_MtDataEncoderInternal* upb_MtDataEncoder_GetInternal( - upb_MtDataEncoder* e, char* buf_start) { - UPB_ASSERT(sizeof(upb_MtDataEncoderInternal) <= sizeof(e->internal)); - upb_MtDataEncoderInternal* ret = (upb_MtDataEncoderInternal*)e->internal; - ret->buf_start = buf_start; + size_t lg2 = kElemSizeLg2[field->UPB_PRIVATE(descriptortype)]; + upb_Array* ret = _upb_Array_New(&d->arena, 4, lg2); + if (!ret) _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); return ret; } -static char* upb_MtDataEncoder_PutRaw(upb_MtDataEncoder* e, char* ptr, - char ch) { - upb_MtDataEncoderInternal* in = (upb_MtDataEncoderInternal*)e->internal; - UPB_ASSERT(ptr - in->buf_start < kUpb_MtDataEncoder_MinSize); - if (ptr == e->end) return NULL; - *ptr++ = ch; - return ptr; -} - -static char* upb_MtDataEncoder_Put(upb_MtDataEncoder* e, char* ptr, char ch) { - return upb_MtDataEncoder_PutRaw(e, ptr, _upb_ToBase92(ch)); -} +static const char* _upb_Decoder_DecodeToArray(upb_Decoder* d, const char* ptr, + upb_Message* msg, + const upb_MiniTableSub* subs, + const upb_MiniTableField* field, + wireval* val, int op) { + upb_Array** arrp = UPB_PTR_AT(msg, field->offset, void); + upb_Array* arr = *arrp; + void* mem; -static char* upb_MtDataEncoder_PutBase92Varint(upb_MtDataEncoder* e, char* ptr, - uint32_t val, int min, int max) { - int shift = upb_Log2Ceiling(_upb_FromBase92(max) - _upb_FromBase92(min) + 1); - UPB_ASSERT(shift <= 6); - uint32_t mask = (1 << shift) - 1; - do { - uint32_t bits = val & mask; - ptr = upb_MtDataEncoder_Put(e, ptr, bits + _upb_FromBase92(min)); - if (!ptr) return NULL; - val >>= shift; - } while (val); - return ptr; -} + if (arr) { + _upb_Decoder_Reserve(d, arr, 1); + } else { + arr = _upb_Decoder_CreateArray(d, field); + *arrp = arr; + } -char* upb_MtDataEncoder_PutModifier(upb_MtDataEncoder* e, char* ptr, - uint64_t mod) { - if (mod) { - ptr = upb_MtDataEncoder_PutBase92Varint(e, ptr, mod, - kUpb_EncodedValue_MinModifier, - kUpb_EncodedValue_MaxModifier); + switch (op) { + case kUpb_DecodeOp_Scalar1Byte: + case kUpb_DecodeOp_Scalar4Byte: + case kUpb_DecodeOp_Scalar8Byte: + /* Append scalar value. */ + mem = UPB_PTR_AT(_upb_array_ptr(arr), arr->size << op, void); + arr->size++; + memcpy(mem, val, 1 << op); + return ptr; + case kUpb_DecodeOp_String: + _upb_Decoder_VerifyUtf8(d, ptr, val->size); + /* Fallthrough. */ + case kUpb_DecodeOp_Bytes: { + /* Append bytes. */ + upb_StringView* str = (upb_StringView*)_upb_array_ptr(arr) + arr->size; + arr->size++; + return _upb_Decoder_ReadString(d, ptr, val->size, str); + } + case kUpb_DecodeOp_SubMessage: { + /* Append submessage / group. */ + upb_TaggedMessagePtr* target = UPB_PTR_AT( + _upb_array_ptr(arr), arr->size * sizeof(void*), upb_TaggedMessagePtr); + upb_Message* submsg = _upb_Decoder_NewSubMessage(d, subs, field, target); + arr->size++; + if (UPB_UNLIKELY(field->UPB_PRIVATE(descriptortype) == + kUpb_FieldType_Group)) { + return _upb_Decoder_DecodeKnownGroup(d, ptr, submsg, subs, field); + } else { + return _upb_Decoder_DecodeSubMessage(d, ptr, submsg, subs, field, + val->size); + } + } + case OP_FIXPCK_LG2(2): + case OP_FIXPCK_LG2(3): + return _upb_Decoder_DecodeFixedPacked(d, ptr, arr, val, field, + op - OP_FIXPCK_LG2(0)); + case OP_VARPCK_LG2(0): + case OP_VARPCK_LG2(2): + case OP_VARPCK_LG2(3): + return _upb_Decoder_DecodeVarintPacked(d, ptr, arr, val, field, + op - OP_VARPCK_LG2(0)); + case kUpb_DecodeOp_Enum: + return _upb_Decoder_DecodeEnumArray(d, ptr, msg, arr, subs, field, val); + case kUpb_DecodeOp_PackedEnum: + return _upb_Decoder_DecodeEnumPacked(d, ptr, msg, arr, subs, field, val); + default: + UPB_UNREACHABLE(); } - return ptr; } -char* upb_MtDataEncoder_EncodeExtension(upb_MtDataEncoder* e, char* ptr, - upb_FieldType type, uint32_t field_num, - uint64_t field_mod) { - upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr); - in->state.msg_state.msg_modifiers = 0; - in->state.msg_state.last_field_num = 0; - in->state.msg_state.oneof_state = kUpb_OneofState_NotStarted; - - ptr = upb_MtDataEncoder_PutRaw(e, ptr, kUpb_EncodedVersion_ExtensionV1); - if (!ptr) return NULL; +upb_Map* _upb_Decoder_CreateMap(upb_Decoder* d, const upb_MiniTable* entry) { + /* Maps descriptor type -> upb map size. */ + static const uint8_t kSizeInMap[] = { + [0] = -1, // invalid descriptor type */ + [kUpb_FieldType_Double] = 8, + [kUpb_FieldType_Float] = 4, + [kUpb_FieldType_Int64] = 8, + [kUpb_FieldType_UInt64] = 8, + [kUpb_FieldType_Int32] = 4, + [kUpb_FieldType_Fixed64] = 8, + [kUpb_FieldType_Fixed32] = 4, + [kUpb_FieldType_Bool] = 1, + [kUpb_FieldType_String] = UPB_MAPTYPE_STRING, + [kUpb_FieldType_Group] = sizeof(void*), + [kUpb_FieldType_Message] = sizeof(void*), + [kUpb_FieldType_Bytes] = UPB_MAPTYPE_STRING, + [kUpb_FieldType_UInt32] = 4, + [kUpb_FieldType_Enum] = 4, + [kUpb_FieldType_SFixed32] = 4, + [kUpb_FieldType_SFixed64] = 8, + [kUpb_FieldType_SInt32] = 4, + [kUpb_FieldType_SInt64] = 8, + }; - return upb_MtDataEncoder_PutField(e, ptr, type, field_num, field_mod); + const upb_MiniTableField* key_field = &entry->fields[0]; + const upb_MiniTableField* val_field = &entry->fields[1]; + char key_size = kSizeInMap[key_field->UPB_PRIVATE(descriptortype)]; + char val_size = kSizeInMap[val_field->UPB_PRIVATE(descriptortype)]; + UPB_ASSERT(key_field->offset == offsetof(upb_MapEntryData, k)); + UPB_ASSERT(val_field->offset == offsetof(upb_MapEntryData, v)); + upb_Map* ret = _upb_Map_New(&d->arena, key_size, val_size); + if (!ret) _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); + return ret; } -char* upb_MtDataEncoder_EncodeMap(upb_MtDataEncoder* e, char* ptr, - upb_FieldType key_type, - upb_FieldType value_type, uint64_t key_mod, - uint64_t value_mod) { - upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr); - in->state.msg_state.msg_modifiers = 0; - in->state.msg_state.last_field_num = 0; - in->state.msg_state.oneof_state = kUpb_OneofState_NotStarted; - - ptr = upb_MtDataEncoder_PutRaw(e, ptr, kUpb_EncodedVersion_MapV1); - if (!ptr) return NULL; - - ptr = upb_MtDataEncoder_PutField(e, ptr, key_type, 1, key_mod); - if (!ptr) return NULL; - - return upb_MtDataEncoder_PutField(e, ptr, value_type, 2, value_mod); -} +static const char* _upb_Decoder_DecodeToMap(upb_Decoder* d, const char* ptr, + upb_Message* msg, + const upb_MiniTableSub* subs, + const upb_MiniTableField* field, + wireval* val) { + upb_Map** map_p = UPB_PTR_AT(msg, field->offset, upb_Map*); + upb_Map* map = *map_p; + upb_MapEntry ent; + UPB_ASSERT(upb_MiniTableField_Type(field) == kUpb_FieldType_Message); + const upb_MiniTable* entry = subs[field->UPB_PRIVATE(submsg_index)].submsg; -char* upb_MtDataEncoder_EncodeMessageSet(upb_MtDataEncoder* e, char* ptr) { - (void)upb_MtDataEncoder_GetInternal(e, ptr); - return upb_MtDataEncoder_PutRaw(e, ptr, kUpb_EncodedVersion_MessageSetV1); -} + UPB_ASSERT(entry); + UPB_ASSERT(entry->field_count == 2); + UPB_ASSERT(!upb_IsRepeatedOrMap(&entry->fields[0])); + UPB_ASSERT(!upb_IsRepeatedOrMap(&entry->fields[1])); -char* upb_MtDataEncoder_StartMessage(upb_MtDataEncoder* e, char* ptr, - uint64_t msg_mod) { - upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr); - in->state.msg_state.msg_modifiers = msg_mod; - in->state.msg_state.last_field_num = 0; - in->state.msg_state.oneof_state = kUpb_OneofState_NotStarted; + if (!map) { + map = _upb_Decoder_CreateMap(d, entry); + *map_p = map; + } - ptr = upb_MtDataEncoder_PutRaw(e, ptr, kUpb_EncodedVersion_MessageV1); - if (!ptr) return NULL; + // Parse map entry. + memset(&ent, 0, sizeof(ent)); - return upb_MtDataEncoder_PutModifier(e, ptr, msg_mod); -} + if (entry->fields[1].UPB_PRIVATE(descriptortype) == kUpb_FieldType_Message || + entry->fields[1].UPB_PRIVATE(descriptortype) == kUpb_FieldType_Group) { + // Create proactively to handle the case where it doesn't appear. + upb_TaggedMessagePtr msg; + _upb_Decoder_NewSubMessage(d, entry->subs, &entry->fields[1], &msg); + ent.data.v.val = upb_value_uintptr(msg); + } -static char* _upb_MtDataEncoder_MaybePutFieldSkip(upb_MtDataEncoder* e, - char* ptr, - uint32_t field_num) { - upb_MtDataEncoderInternal* in = (upb_MtDataEncoderInternal*)e->internal; - if (field_num <= in->state.msg_state.last_field_num) return NULL; - if (in->state.msg_state.last_field_num + 1 != field_num) { - // Put skip. - UPB_ASSERT(field_num > in->state.msg_state.last_field_num); - uint32_t skip = field_num - in->state.msg_state.last_field_num; - ptr = upb_MtDataEncoder_PutBase92Varint( - e, ptr, skip, kUpb_EncodedValue_MinSkip, kUpb_EncodedValue_MaxSkip); - if (!ptr) return NULL; + ptr = + _upb_Decoder_DecodeSubMessage(d, ptr, &ent.data, subs, field, val->size); + // check if ent had any unknown fields + size_t size; + upb_Message_GetUnknown(&ent.data, &size); + if (size != 0) { + char* buf; + size_t size; + uint32_t tag = ((uint32_t)field->number << 3) | kUpb_WireType_Delimited; + upb_EncodeStatus status = + upb_Encode(&ent.data, entry, 0, &d->arena, &buf, &size); + if (status != kUpb_EncodeStatus_Ok) { + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); + } + _upb_Decoder_AddUnknownVarints(d, msg, tag, size); + if (!_upb_Message_AddUnknown(msg, buf, size, &d->arena)) { + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); + } + } else { + if (_upb_Map_Insert(map, &ent.data.k, map->key_size, &ent.data.v, + map->val_size, + &d->arena) == kUpb_MapInsertStatus_OutOfMemory) { + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); + } } - in->state.msg_state.last_field_num = field_num; return ptr; } -static char* _upb_MtDataEncoder_PutFieldType(upb_MtDataEncoder* e, char* ptr, - upb_FieldType type, - uint64_t field_mod) { - static const char kUpb_TypeToEncoded[] = { - [kUpb_FieldType_Double] = kUpb_EncodedType_Double, - [kUpb_FieldType_Float] = kUpb_EncodedType_Float, - [kUpb_FieldType_Int64] = kUpb_EncodedType_Int64, - [kUpb_FieldType_UInt64] = kUpb_EncodedType_UInt64, - [kUpb_FieldType_Int32] = kUpb_EncodedType_Int32, - [kUpb_FieldType_Fixed64] = kUpb_EncodedType_Fixed64, - [kUpb_FieldType_Fixed32] = kUpb_EncodedType_Fixed32, - [kUpb_FieldType_Bool] = kUpb_EncodedType_Bool, - [kUpb_FieldType_String] = kUpb_EncodedType_String, - [kUpb_FieldType_Group] = kUpb_EncodedType_Group, - [kUpb_FieldType_Message] = kUpb_EncodedType_Message, - [kUpb_FieldType_Bytes] = kUpb_EncodedType_Bytes, - [kUpb_FieldType_UInt32] = kUpb_EncodedType_UInt32, - [kUpb_FieldType_Enum] = kUpb_EncodedType_OpenEnum, - [kUpb_FieldType_SFixed32] = kUpb_EncodedType_SFixed32, - [kUpb_FieldType_SFixed64] = kUpb_EncodedType_SFixed64, - [kUpb_FieldType_SInt32] = kUpb_EncodedType_SInt32, - [kUpb_FieldType_SInt64] = kUpb_EncodedType_SInt64, - }; +static const char* _upb_Decoder_DecodeToSubMessage( + upb_Decoder* d, const char* ptr, upb_Message* msg, + const upb_MiniTableSub* subs, const upb_MiniTableField* field, wireval* val, + int op) { + void* mem = UPB_PTR_AT(msg, field->offset, void); + int type = field->UPB_PRIVATE(descriptortype); - int encoded_type = kUpb_TypeToEncoded[type]; + if (UPB_UNLIKELY(op == kUpb_DecodeOp_Enum) && + !_upb_Decoder_CheckEnum(d, ptr, msg, + subs[field->UPB_PRIVATE(submsg_index)].subenum, + field, val)) { + return ptr; + } - if (field_mod & kUpb_FieldModifier_IsClosedEnum) { - UPB_ASSERT(type == kUpb_FieldType_Enum); - encoded_type = kUpb_EncodedType_ClosedEnum; + /* Set presence if necessary. */ + if (field->presence > 0) { + _upb_sethas_field(msg, field); + } else if (field->presence < 0) { + /* Oneof case */ + uint32_t* oneof_case = _upb_oneofcase_field(msg, field); + if (op == kUpb_DecodeOp_SubMessage && *oneof_case != field->number) { + memset(mem, 0, sizeof(void*)); + } + *oneof_case = field->number; } - if (field_mod & kUpb_FieldModifier_IsRepeated) { - // Repeated fields shift the type number up (unlike other modifiers which - // are bit flags). - encoded_type += kUpb_EncodedType_RepeatedBase; + /* Store into message. */ + switch (op) { + case kUpb_DecodeOp_SubMessage: { + upb_TaggedMessagePtr* submsgp = mem; + upb_Message* submsg; + if (*submsgp) { + submsg = _upb_Decoder_ReuseSubMessage(d, subs, field, submsgp); + } else { + submsg = _upb_Decoder_NewSubMessage(d, subs, field, submsgp); + } + if (UPB_UNLIKELY(type == kUpb_FieldType_Group)) { + ptr = _upb_Decoder_DecodeKnownGroup(d, ptr, submsg, subs, field); + } else { + ptr = _upb_Decoder_DecodeSubMessage(d, ptr, submsg, subs, field, + val->size); + } + break; + } + case kUpb_DecodeOp_String: + _upb_Decoder_VerifyUtf8(d, ptr, val->size); + /* Fallthrough. */ + case kUpb_DecodeOp_Bytes: + return _upb_Decoder_ReadString(d, ptr, val->size, mem); + case kUpb_DecodeOp_Scalar8Byte: + memcpy(mem, val, 8); + break; + case kUpb_DecodeOp_Enum: + case kUpb_DecodeOp_Scalar4Byte: + memcpy(mem, val, 4); + break; + case kUpb_DecodeOp_Scalar1Byte: + memcpy(mem, val, 1); + break; + default: + UPB_UNREACHABLE(); } - return upb_MtDataEncoder_Put(e, ptr, encoded_type); + return ptr; } -static char* _upb_MtDataEncoder_MaybePutModifiers(upb_MtDataEncoder* e, - char* ptr, upb_FieldType type, - uint64_t field_mod) { - upb_MtDataEncoderInternal* in = (upb_MtDataEncoderInternal*)e->internal; - uint32_t encoded_modifiers = 0; - if ((field_mod & kUpb_FieldModifier_IsRepeated) && - upb_FieldType_IsPackable(type)) { - bool field_is_packed = field_mod & kUpb_FieldModifier_IsPacked; - bool default_is_packed = in->state.msg_state.msg_modifiers & - kUpb_MessageModifier_DefaultIsPacked; - if (field_is_packed != default_is_packed) { - encoded_modifiers |= kUpb_EncodedFieldModifier_FlipPacked; - } +UPB_NOINLINE +const char* _upb_Decoder_CheckRequired(upb_Decoder* d, const char* ptr, + const upb_Message* msg, + const upb_MiniTable* l) { + assert(l->required_count); + if (UPB_LIKELY((d->options & kUpb_DecodeOption_CheckRequired) == 0)) { + return ptr; } - - if (field_mod & kUpb_FieldModifier_IsProto3Singular) { - encoded_modifiers |= kUpb_EncodedFieldModifier_IsProto3Singular; + uint64_t msg_head; + memcpy(&msg_head, msg, 8); + msg_head = _upb_BigEndian_Swap64(msg_head); + if (upb_MiniTable_requiredmask(l) & ~msg_head) { + d->missing_required = true; } + return ptr; +} - if (field_mod & kUpb_FieldModifier_IsRequired) { - encoded_modifiers |= kUpb_EncodedFieldModifier_IsRequired; +UPB_FORCEINLINE +static bool _upb_Decoder_TryFastDispatch(upb_Decoder* d, const char** ptr, + upb_Message* msg, + const upb_MiniTable* layout) { +#if UPB_FASTTABLE + if (layout && layout->table_mask != (unsigned char)-1) { + uint16_t tag = _upb_FastDecoder_LoadTag(*ptr); + intptr_t table = decode_totable(layout); + *ptr = _upb_FastDecoder_TagDispatch(d, *ptr, msg, table, 0, tag); + return true; } +#endif + return false; +} - return upb_MtDataEncoder_PutModifier(e, ptr, encoded_modifiers); +static const char* upb_Decoder_SkipField(upb_Decoder* d, const char* ptr, + uint32_t tag) { + int field_number = tag >> 3; + int wire_type = tag & 7; + switch (wire_type) { + case kUpb_WireType_Varint: { + uint64_t val; + return _upb_Decoder_DecodeVarint(d, ptr, &val); + } + case kUpb_WireType_64Bit: + return ptr + 8; + case kUpb_WireType_32Bit: + return ptr + 4; + case kUpb_WireType_Delimited: { + uint32_t size; + ptr = upb_Decoder_DecodeSize(d, ptr, &size); + return ptr + size; + } + case kUpb_WireType_StartGroup: + return _upb_Decoder_DecodeUnknownGroup(d, ptr, field_number); + default: + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); + } } -char* upb_MtDataEncoder_PutField(upb_MtDataEncoder* e, char* ptr, - upb_FieldType type, uint32_t field_num, - uint64_t field_mod) { - upb_MtDataEncoder_GetInternal(e, ptr); +enum { + kStartItemTag = ((kUpb_MsgSet_Item << 3) | kUpb_WireType_StartGroup), + kEndItemTag = ((kUpb_MsgSet_Item << 3) | kUpb_WireType_EndGroup), + kTypeIdTag = ((kUpb_MsgSet_TypeId << 3) | kUpb_WireType_Varint), + kMessageTag = ((kUpb_MsgSet_Message << 3) | kUpb_WireType_Delimited), +}; - ptr = _upb_MtDataEncoder_MaybePutFieldSkip(e, ptr, field_num); - if (!ptr) return NULL; +static void upb_Decoder_AddKnownMessageSetItem( + upb_Decoder* d, upb_Message* msg, const upb_MiniTableExtension* item_mt, + const char* data, uint32_t size) { + upb_Message_Extension* ext = + _upb_Message_GetOrCreateExtension(msg, item_mt, &d->arena); + if (UPB_UNLIKELY(!ext)) { + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); + } + upb_Message* submsg = _upb_Decoder_NewSubMessage( + d, &ext->ext->sub, &ext->ext->field, (upb_TaggedMessagePtr*)&ext->data); + upb_DecodeStatus status = upb_Decode(data, size, submsg, item_mt->sub.submsg, + d->extreg, d->options, &d->arena); + if (status != kUpb_DecodeStatus_Ok) _upb_Decoder_ErrorJmp(d, status); +} - ptr = _upb_MtDataEncoder_PutFieldType(e, ptr, type, field_mod); - if (!ptr) return NULL; +static void upb_Decoder_AddUnknownMessageSetItem(upb_Decoder* d, + upb_Message* msg, + uint32_t type_id, + const char* message_data, + uint32_t message_size) { + char buf[60]; + char* ptr = buf; + ptr = upb_Decoder_EncodeVarint32(kStartItemTag, ptr); + ptr = upb_Decoder_EncodeVarint32(kTypeIdTag, ptr); + ptr = upb_Decoder_EncodeVarint32(type_id, ptr); + ptr = upb_Decoder_EncodeVarint32(kMessageTag, ptr); + ptr = upb_Decoder_EncodeVarint32(message_size, ptr); + char* split = ptr; - return _upb_MtDataEncoder_MaybePutModifiers(e, ptr, type, field_mod); + ptr = upb_Decoder_EncodeVarint32(kEndItemTag, ptr); + char* end = ptr; + + if (!_upb_Message_AddUnknown(msg, buf, split - buf, &d->arena) || + !_upb_Message_AddUnknown(msg, message_data, message_size, &d->arena) || + !_upb_Message_AddUnknown(msg, split, end - split, &d->arena)) { + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); + } } -char* upb_MtDataEncoder_StartOneof(upb_MtDataEncoder* e, char* ptr) { - upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr); - if (in->state.msg_state.oneof_state == kUpb_OneofState_NotStarted) { - ptr = upb_MtDataEncoder_Put(e, ptr, _upb_FromBase92(kUpb_EncodedValue_End)); +static void upb_Decoder_AddMessageSetItem(upb_Decoder* d, upb_Message* msg, + const upb_MiniTable* t, + uint32_t type_id, const char* data, + uint32_t size) { + const upb_MiniTableExtension* item_mt = + upb_ExtensionRegistry_Lookup(d->extreg, t, type_id); + if (item_mt) { + upb_Decoder_AddKnownMessageSetItem(d, msg, item_mt, data, size); } else { - ptr = upb_MtDataEncoder_Put( - e, ptr, _upb_FromBase92(kUpb_EncodedValue_OneofSeparator)); + upb_Decoder_AddUnknownMessageSetItem(d, msg, type_id, data, size); } - in->state.msg_state.oneof_state = kUpb_OneofState_StartedOneof; - return ptr; } -char* upb_MtDataEncoder_PutOneofField(upb_MtDataEncoder* e, char* ptr, - uint32_t field_num) { - upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr); - if (in->state.msg_state.oneof_state == kUpb_OneofState_EmittedOneofField) { - ptr = upb_MtDataEncoder_Put( - e, ptr, _upb_FromBase92(kUpb_EncodedValue_FieldSeparator)); - if (!ptr) return NULL; +static const char* upb_Decoder_DecodeMessageSetItem( + upb_Decoder* d, const char* ptr, upb_Message* msg, + const upb_MiniTable* layout) { + uint32_t type_id = 0; + upb_StringView preserved = {NULL, 0}; + typedef enum { + kUpb_HaveId = 1 << 0, + kUpb_HavePayload = 1 << 1, + } StateMask; + StateMask state_mask = 0; + while (!_upb_Decoder_IsDone(d, &ptr)) { + uint32_t tag; + ptr = _upb_Decoder_DecodeTag(d, ptr, &tag); + switch (tag) { + case kEndItemTag: + return ptr; + case kTypeIdTag: { + uint64_t tmp; + ptr = _upb_Decoder_DecodeVarint(d, ptr, &tmp); + if (state_mask & kUpb_HaveId) break; // Ignore dup. + state_mask |= kUpb_HaveId; + type_id = tmp; + if (state_mask & kUpb_HavePayload) { + upb_Decoder_AddMessageSetItem(d, msg, layout, type_id, preserved.data, + preserved.size); + } + break; + } + case kMessageTag: { + uint32_t size; + ptr = upb_Decoder_DecodeSize(d, ptr, &size); + const char* data = ptr; + ptr += size; + if (state_mask & kUpb_HavePayload) break; // Ignore dup. + state_mask |= kUpb_HavePayload; + if (state_mask & kUpb_HaveId) { + upb_Decoder_AddMessageSetItem(d, msg, layout, type_id, data, size); + } else { + // Out of order, we must preserve the payload. + preserved.data = data; + preserved.size = size; + } + break; + } + default: + // We do not preserve unexpected fields inside a message set item. + ptr = upb_Decoder_SkipField(d, ptr, tag); + break; + } } - ptr = upb_MtDataEncoder_PutBase92Varint(e, ptr, field_num, _upb_ToBase92(0), - _upb_ToBase92(63)); - in->state.msg_state.oneof_state = kUpb_OneofState_EmittedOneofField; - return ptr; + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); } -char* upb_MtDataEncoder_StartEnum(upb_MtDataEncoder* e, char* ptr) { - upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr); - in->state.enum_state.present_values_mask = 0; - in->state.enum_state.last_written_value = 0; +static const upb_MiniTableField* _upb_Decoder_FindField(upb_Decoder* d, + const upb_MiniTable* t, + uint32_t field_number, + int* last_field_index) { + static upb_MiniTableField none = { + 0, 0, 0, 0, kUpb_FakeFieldType_FieldNotFound, 0}; + if (t == NULL) return &none; - return upb_MtDataEncoder_PutRaw(e, ptr, kUpb_EncodedVersion_EnumV1); -} + size_t idx = ((size_t)field_number) - 1; // 0 wraps to SIZE_MAX + if (idx < t->dense_below) { + /* Fastest case: index into dense fields. */ + goto found; + } -static char* upb_MtDataEncoder_FlushDenseEnumMask(upb_MtDataEncoder* e, - char* ptr) { - upb_MtDataEncoderInternal* in = (upb_MtDataEncoderInternal*)e->internal; - ptr = upb_MtDataEncoder_Put(e, ptr, in->state.enum_state.present_values_mask); - in->state.enum_state.present_values_mask = 0; - in->state.enum_state.last_written_value += 5; - return ptr; -} + if (t->dense_below < t->field_count) { + /* Linear search non-dense fields. Resume scanning from last_field_index + * since fields are usually in order. */ + size_t last = *last_field_index; + for (idx = last; idx < t->field_count; idx++) { + if (t->fields[idx].number == field_number) { + goto found; + } + } -char* upb_MtDataEncoder_PutEnumValue(upb_MtDataEncoder* e, char* ptr, - uint32_t val) { - // TODO(b/229641772): optimize this encoding. - upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr); - UPB_ASSERT(val >= in->state.enum_state.last_written_value); - uint32_t delta = val - in->state.enum_state.last_written_value; - if (delta >= 5 && in->state.enum_state.present_values_mask) { - ptr = upb_MtDataEncoder_FlushDenseEnumMask(e, ptr); - if (!ptr) { - return NULL; + for (idx = t->dense_below; idx < last; idx++) { + if (t->fields[idx].number == field_number) { + goto found; + } } - delta -= 5; } - if (delta >= 5) { - ptr = upb_MtDataEncoder_PutBase92Varint( - e, ptr, delta, kUpb_EncodedValue_MinSkip, kUpb_EncodedValue_MaxSkip); - in->state.enum_state.last_written_value += delta; - delta = 0; + if (d->extreg) { + switch (t->ext) { + case kUpb_ExtMode_Extendable: { + const upb_MiniTableExtension* ext = + upb_ExtensionRegistry_Lookup(d->extreg, t, field_number); + if (ext) return &ext->field; + break; + } + case kUpb_ExtMode_IsMessageSet: + if (field_number == kUpb_MsgSet_Item) { + static upb_MiniTableField item = { + 0, 0, 0, 0, kUpb_FakeFieldType_MessageSetItem, 0}; + return &item; + } + break; + } } - UPB_ASSERT((in->state.enum_state.present_values_mask >> delta) == 0); - in->state.enum_state.present_values_mask |= 1ULL << delta; - return ptr; -} + return &none; /* Unknown field. */ -char* upb_MtDataEncoder_EndEnum(upb_MtDataEncoder* e, char* ptr) { - upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr); - if (!in->state.enum_state.present_values_mask) return ptr; - return upb_MtDataEncoder_FlushDenseEnumMask(e, ptr); +found: + UPB_ASSERT(t->fields[idx].number == field_number); + *last_field_index = idx; + return &t->fields[idx]; } +int _upb_Decoder_GetVarintOp(const upb_MiniTableField* field) { + static const int8_t kVarintOps[] = { + [kUpb_FakeFieldType_FieldNotFound] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_Double] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_Float] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_Int64] = kUpb_DecodeOp_Scalar8Byte, + [kUpb_FieldType_UInt64] = kUpb_DecodeOp_Scalar8Byte, + [kUpb_FieldType_Int32] = kUpb_DecodeOp_Scalar4Byte, + [kUpb_FieldType_Fixed64] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_Fixed32] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_Bool] = kUpb_DecodeOp_Scalar1Byte, + [kUpb_FieldType_String] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_Group] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_Message] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_Bytes] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_UInt32] = kUpb_DecodeOp_Scalar4Byte, + [kUpb_FieldType_Enum] = kUpb_DecodeOp_Enum, + [kUpb_FieldType_SFixed32] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_SFixed64] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_SInt32] = kUpb_DecodeOp_Scalar4Byte, + [kUpb_FieldType_SInt64] = kUpb_DecodeOp_Scalar8Byte, + [kUpb_FakeFieldType_MessageSetItem] = kUpb_DecodeOp_UnknownField, + }; -const char _kUpb_ToBase92[] = { - ' ', '!', '#', '$', '%', '&', '(', ')', '*', '+', ',', '-', '.', '/', - '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<', '=', - '>', '?', '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', - 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', - 'Z', '[', ']', '^', '_', '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', - 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', - 'w', 'x', 'y', 'z', '{', '|', '}', '~', -}; - -const int8_t _kUpb_FromBase92[] = { - 0, 1, -1, 2, 3, 4, 5, -1, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, - 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, - 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, - 55, 56, 57, -1, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, - 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, -}; - - - -// Must be last. - -typedef struct { - upb_MdDecoder base; - upb_Arena* arena; - upb_MiniTableEnum* enum_table; - uint32_t enum_value_count; - uint32_t enum_data_count; - uint32_t enum_data_capacity; -} upb_MdEnumDecoder; - -static size_t upb_MiniTableEnum_Size(size_t count) { - return sizeof(upb_MiniTableEnum) + count * sizeof(uint32_t); + return kVarintOps[field->UPB_PRIVATE(descriptortype)]; } -static upb_MiniTableEnum* _upb_MiniTable_AddEnumDataMember(upb_MdEnumDecoder* d, - uint32_t val) { - if (d->enum_data_count == d->enum_data_capacity) { - size_t old_sz = upb_MiniTableEnum_Size(d->enum_data_capacity); - d->enum_data_capacity = UPB_MAX(2, d->enum_data_capacity * 2); - size_t new_sz = upb_MiniTableEnum_Size(d->enum_data_capacity); - d->enum_table = upb_Arena_Realloc(d->arena, d->enum_table, old_sz, new_sz); - upb_MdDecoder_CheckOutOfMemory(&d->base, d->enum_table); +UPB_FORCEINLINE +static void _upb_Decoder_CheckUnlinked(upb_Decoder* d, const upb_MiniTable* mt, + const upb_MiniTableField* field, + int* op) { + // If sub-message is not linked, treat as unknown. + if (field->mode & kUpb_LabelFlags_IsExtension) return; + const upb_MiniTableSub* sub = &mt->subs[field->UPB_PRIVATE(submsg_index)]; + if ((d->options & kUpb_DecodeOption_ExperimentalAllowUnlinked) || + sub->submsg != &_kUpb_MiniTable_Empty) { + return; } - d->enum_table->data[d->enum_data_count++] = val; - return d->enum_table; -} - -static void upb_MiniTableEnum_BuildValue(upb_MdEnumDecoder* d, uint32_t val) { - upb_MiniTableEnum* table = d->enum_table; - d->enum_value_count++; - if (table->value_count || (val > 512 && d->enum_value_count < val / 32)) { - if (table->value_count == 0) { - assert(d->enum_data_count == table->mask_limit / 32); - } - table = _upb_MiniTable_AddEnumDataMember(d, val); - table->value_count++; - } else { - uint32_t new_mask_limit = ((val / 32) + 1) * 32; - while (table->mask_limit < new_mask_limit) { - table = _upb_MiniTable_AddEnumDataMember(d, 0); - table->mask_limit += 32; - } - table->data[val / 32] |= 1ULL << (val % 32); +#ifndef NDEBUG + const upb_MiniTableField* oneof = upb_MiniTable_GetOneof(mt, field); + if (oneof) { + // All other members of the oneof must be message fields that are also + // unlinked. + do { + assert(upb_MiniTableField_CType(oneof) == kUpb_CType_Message); + const upb_MiniTableSub* oneof_sub = + &mt->subs[oneof->UPB_PRIVATE(submsg_index)]; + assert(!oneof_sub); + } while (upb_MiniTable_NextOneofField(mt, &oneof)); } +#endif // NDEBUG + *op = kUpb_DecodeOp_UnknownField; } -static upb_MiniTableEnum* upb_MtDecoder_DoBuildMiniTableEnum( - upb_MdEnumDecoder* d, const char* data, size_t len) { - // If the string is non-empty then it must begin with a version tag. - if (len) { - if (*data != kUpb_EncodedVersion_EnumV1) { - upb_MdDecoder_ErrorJmp(&d->base, "Invalid enum version: %c", *data); - } - data++; - len--; - } - - upb_MdDecoder_CheckOutOfMemory(&d->base, d->enum_table); - - // Guarantee at least 64 bits of mask without checking mask size. - d->enum_table->mask_limit = 64; - d->enum_table = _upb_MiniTable_AddEnumDataMember(d, 0); - d->enum_table = _upb_MiniTable_AddEnumDataMember(d, 0); +int _upb_Decoder_GetDelimitedOp(upb_Decoder* d, const upb_MiniTable* mt, + const upb_MiniTableField* field) { + enum { kRepeatedBase = 19 }; - d->enum_table->value_count = 0; + static const int8_t kDelimitedOps[] = { + /* For non-repeated field type. */ + [kUpb_FakeFieldType_FieldNotFound] = + kUpb_DecodeOp_UnknownField, // Field not found. + [kUpb_FieldType_Double] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_Float] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_Int64] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_UInt64] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_Int32] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_Fixed64] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_Fixed32] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_Bool] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_String] = kUpb_DecodeOp_String, + [kUpb_FieldType_Group] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_Message] = kUpb_DecodeOp_SubMessage, + [kUpb_FieldType_Bytes] = kUpb_DecodeOp_Bytes, + [kUpb_FieldType_UInt32] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_Enum] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_SFixed32] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_SFixed64] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_SInt32] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_SInt64] = kUpb_DecodeOp_UnknownField, + [kUpb_FakeFieldType_MessageSetItem] = kUpb_DecodeOp_UnknownField, + // For repeated field type. */ + [kRepeatedBase + kUpb_FieldType_Double] = OP_FIXPCK_LG2(3), + [kRepeatedBase + kUpb_FieldType_Float] = OP_FIXPCK_LG2(2), + [kRepeatedBase + kUpb_FieldType_Int64] = OP_VARPCK_LG2(3), + [kRepeatedBase + kUpb_FieldType_UInt64] = OP_VARPCK_LG2(3), + [kRepeatedBase + kUpb_FieldType_Int32] = OP_VARPCK_LG2(2), + [kRepeatedBase + kUpb_FieldType_Fixed64] = OP_FIXPCK_LG2(3), + [kRepeatedBase + kUpb_FieldType_Fixed32] = OP_FIXPCK_LG2(2), + [kRepeatedBase + kUpb_FieldType_Bool] = OP_VARPCK_LG2(0), + [kRepeatedBase + kUpb_FieldType_String] = kUpb_DecodeOp_String, + [kRepeatedBase + kUpb_FieldType_Group] = kUpb_DecodeOp_SubMessage, + [kRepeatedBase + kUpb_FieldType_Message] = kUpb_DecodeOp_SubMessage, + [kRepeatedBase + kUpb_FieldType_Bytes] = kUpb_DecodeOp_Bytes, + [kRepeatedBase + kUpb_FieldType_UInt32] = OP_VARPCK_LG2(2), + [kRepeatedBase + kUpb_FieldType_Enum] = kUpb_DecodeOp_PackedEnum, + [kRepeatedBase + kUpb_FieldType_SFixed32] = OP_FIXPCK_LG2(2), + [kRepeatedBase + kUpb_FieldType_SFixed64] = OP_FIXPCK_LG2(3), + [kRepeatedBase + kUpb_FieldType_SInt32] = OP_VARPCK_LG2(2), + [kRepeatedBase + kUpb_FieldType_SInt64] = OP_VARPCK_LG2(3), + // Omitting kUpb_FakeFieldType_MessageSetItem, because we never emit a + // repeated msgset type + }; - const char* ptr = data; - uint32_t base = 0; + int ndx = field->UPB_PRIVATE(descriptortype); + if (upb_FieldMode_Get(field) == kUpb_FieldMode_Array) ndx += kRepeatedBase; + int op = kDelimitedOps[ndx]; - while (ptr < d->base.end) { - char ch = *ptr++; - if (ch <= kUpb_EncodedValue_MaxEnumMask) { - uint32_t mask = _upb_FromBase92(ch); - for (int i = 0; i < 5; i++, base++, mask >>= 1) { - if (mask & 1) upb_MiniTableEnum_BuildValue(d, base); - } - } else if (kUpb_EncodedValue_MinSkip <= ch && - ch <= kUpb_EncodedValue_MaxSkip) { - uint32_t skip; - ptr = upb_MdDecoder_DecodeBase92Varint(&d->base, ptr, ch, - kUpb_EncodedValue_MinSkip, - kUpb_EncodedValue_MaxSkip, &skip); - base += skip; - } else { - upb_MdDecoder_ErrorJmp(&d->base, "Unexpected character: %c", ch); - } + if (op == kUpb_DecodeOp_SubMessage) { + _upb_Decoder_CheckUnlinked(d, mt, field, &op); } - return d->enum_table; + return op; } -static upb_MiniTableEnum* upb_MtDecoder_BuildMiniTableEnum( - upb_MdEnumDecoder* const decoder, const char* const data, size_t const len) { - if (UPB_SETJMP(decoder->base.err) != 0) return NULL; - return upb_MtDecoder_DoBuildMiniTableEnum(decoder, data, len); -} +UPB_FORCEINLINE +static const char* _upb_Decoder_DecodeWireValue(upb_Decoder* d, const char* ptr, + const upb_MiniTable* mt, + const upb_MiniTableField* field, + int wire_type, wireval* val, + int* op) { + static const unsigned kFixed32OkMask = (1 << kUpb_FieldType_Float) | + (1 << kUpb_FieldType_Fixed32) | + (1 << kUpb_FieldType_SFixed32); -upb_MiniTableEnum* upb_MiniDescriptor_BuildEnum(const char* data, size_t len, - upb_Arena* arena, - upb_Status* status) { - upb_MdEnumDecoder decoder = { - .base = - { - .end = UPB_PTRADD(data, len), - .status = status, - }, - .arena = arena, - .enum_table = upb_Arena_Malloc(arena, upb_MiniTableEnum_Size(2)), - .enum_value_count = 0, - .enum_data_count = 0, - .enum_data_capacity = 1, - }; + static const unsigned kFixed64OkMask = (1 << kUpb_FieldType_Double) | + (1 << kUpb_FieldType_Fixed64) | + (1 << kUpb_FieldType_SFixed64); - return upb_MtDecoder_BuildMiniTableEnum(&decoder, data, len); + switch (wire_type) { + case kUpb_WireType_Varint: + ptr = _upb_Decoder_DecodeVarint(d, ptr, &val->uint64_val); + *op = _upb_Decoder_GetVarintOp(field); + _upb_Decoder_Munge(field->UPB_PRIVATE(descriptortype), val); + return ptr; + case kUpb_WireType_32Bit: + *op = kUpb_DecodeOp_Scalar4Byte; + if (((1 << field->UPB_PRIVATE(descriptortype)) & kFixed32OkMask) == 0) { + *op = kUpb_DecodeOp_UnknownField; + } + return upb_WireReader_ReadFixed32(ptr, &val->uint32_val); + case kUpb_WireType_64Bit: + *op = kUpb_DecodeOp_Scalar8Byte; + if (((1 << field->UPB_PRIVATE(descriptortype)) & kFixed64OkMask) == 0) { + *op = kUpb_DecodeOp_UnknownField; + } + return upb_WireReader_ReadFixed64(ptr, &val->uint64_val); + case kUpb_WireType_Delimited: + ptr = upb_Decoder_DecodeSize(d, ptr, &val->size); + *op = _upb_Decoder_GetDelimitedOp(d, mt, field); + return ptr; + case kUpb_WireType_StartGroup: + val->uint32_val = field->number; + if (field->UPB_PRIVATE(descriptortype) == kUpb_FieldType_Group) { + *op = kUpb_DecodeOp_SubMessage; + _upb_Decoder_CheckUnlinked(d, mt, field, op); + } else if (field->UPB_PRIVATE(descriptortype) == + kUpb_FakeFieldType_MessageSetItem) { + *op = kUpb_DecodeOp_MessageSetItem; + } else { + *op = kUpb_DecodeOp_UnknownField; + } + return ptr; + default: + break; + } + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); } +UPB_FORCEINLINE +static const char* _upb_Decoder_DecodeKnownField( + upb_Decoder* d, const char* ptr, upb_Message* msg, + const upb_MiniTable* layout, const upb_MiniTableField* field, int op, + wireval* val) { + const upb_MiniTableSub* subs = layout->subs; + uint8_t mode = field->mode; -#include -#include - + if (UPB_UNLIKELY(mode & kUpb_LabelFlags_IsExtension)) { + const upb_MiniTableExtension* ext_layout = + (const upb_MiniTableExtension*)field; + upb_Message_Extension* ext = + _upb_Message_GetOrCreateExtension(msg, ext_layout, &d->arena); + if (UPB_UNLIKELY(!ext)) { + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); + } + d->unknown_msg = msg; + msg = &ext->data; + subs = &ext->ext->sub; + } -// Must be last. + switch (mode & kUpb_FieldMode_Mask) { + case kUpb_FieldMode_Array: + return _upb_Decoder_DecodeToArray(d, ptr, msg, subs, field, val, op); + case kUpb_FieldMode_Map: + return _upb_Decoder_DecodeToMap(d, ptr, msg, subs, field, val); + case kUpb_FieldMode_Scalar: + return _upb_Decoder_DecodeToSubMessage(d, ptr, msg, subs, field, val, op); + default: + UPB_UNREACHABLE(); + } +} -// Note: we sort by this number when calculating layout order. -typedef enum { - kUpb_LayoutItemType_OneofCase, // Oneof case. - kUpb_LayoutItemType_OneofField, // Oneof field data. - kUpb_LayoutItemType_Field, // Non-oneof field data. +static const char* _upb_Decoder_ReverseSkipVarint(const char* ptr, + uint32_t val) { + uint32_t seen = 0; + do { + ptr--; + seen <<= 7; + seen |= *ptr & 0x7f; + } while (seen != val); + return ptr; +} - kUpb_LayoutItemType_Max = kUpb_LayoutItemType_Field, -} upb_LayoutItemType; +static const char* _upb_Decoder_DecodeUnknownField(upb_Decoder* d, + const char* ptr, + upb_Message* msg, + int field_number, + int wire_type, wireval val) { + if (field_number == 0) _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); -#define kUpb_LayoutItem_IndexSentinel ((uint16_t)-1) + // Since unknown fields are the uncommon case, we do a little extra work here + // to walk backwards through the buffer to find the field start. This frees + // up a register in the fast paths (when the field is known), which leads to + // significant speedups in benchmarks. + const char* start = ptr; -typedef struct { - // Index of the corresponding field. When this is a oneof field, the field's - // offset will be the index of the next field in a linked list. - uint16_t field_index; - uint16_t offset; - upb_FieldRep rep; - upb_LayoutItemType type; -} upb_LayoutItem; + if (wire_type == kUpb_WireType_Delimited) ptr += val.size; + if (msg) { + switch (wire_type) { + case kUpb_WireType_Varint: + case kUpb_WireType_Delimited: + start--; + while (start[-1] & 0x80) start--; + break; + case kUpb_WireType_32Bit: + start -= 4; + break; + case kUpb_WireType_64Bit: + start -= 8; + break; + default: + break; + } -typedef struct { - upb_LayoutItem* data; - size_t size; - size_t capacity; -} upb_LayoutItemVector; + assert(start == d->debug_valstart); + uint32_t tag = ((uint32_t)field_number << 3) | wire_type; + start = _upb_Decoder_ReverseSkipVarint(start, tag); + assert(start == d->debug_tagstart); -typedef struct { - upb_MdDecoder base; - upb_MiniTable* table; - upb_MiniTableField* fields; - upb_MiniTablePlatform platform; - upb_LayoutItemVector vec; - upb_Arena* arena; -} upb_MtDecoder; + if (wire_type == kUpb_WireType_StartGroup) { + d->unknown = start; + d->unknown_msg = msg; + ptr = _upb_Decoder_DecodeUnknownGroup(d, ptr, field_number); + start = d->unknown; + d->unknown = NULL; + } + if (!_upb_Message_AddUnknown(msg, start, ptr - start, &d->arena)) { + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); + } + } else if (wire_type == kUpb_WireType_StartGroup) { + ptr = _upb_Decoder_DecodeUnknownGroup(d, ptr, field_number); + } + return ptr; +} -// In each field's offset, we temporarily store a presence classifier: -enum PresenceClass { - kNoPresence = 0, - kHasbitPresence = 1, - kRequiredPresence = 2, - kOneofBase = 3, - // Negative values refer to a specific oneof with that number. Positive - // values >= kOneofBase indicate that this field is in a oneof, and specify - // the next field in this oneof's linked list. -}; +UPB_NOINLINE +static const char* _upb_Decoder_DecodeMessage(upb_Decoder* d, const char* ptr, + upb_Message* msg, + const upb_MiniTable* layout) { + int last_field_index = 0; -static bool upb_MtDecoder_FieldIsPackable(upb_MiniTableField* field) { - return (field->mode & kUpb_FieldMode_Array) && - upb_FieldType_IsPackable(field->UPB_PRIVATE(descriptortype)); -} +#if UPB_FASTTABLE + // The first time we want to skip fast dispatch, because we may have just been + // invoked by the fast parser to handle a case that it bailed on. + if (!_upb_Decoder_IsDone(d, &ptr)) goto nofast; +#endif -typedef struct { - uint16_t submsg_count; - uint16_t subenum_count; -} upb_SubCounts; + while (!_upb_Decoder_IsDone(d, &ptr)) { + uint32_t tag; + const upb_MiniTableField* field; + int field_number; + int wire_type; + wireval val; + int op; -static void upb_MiniTable_SetTypeAndSub(upb_MiniTableField* field, - upb_FieldType type, - upb_SubCounts* sub_counts, - uint64_t msg_modifiers, - bool is_proto3_enum) { - if (is_proto3_enum) { - UPB_ASSERT(type == kUpb_FieldType_Enum); - type = kUpb_FieldType_Int32; - field->mode |= kUpb_LabelFlags_IsAlternate; - } else if (type == kUpb_FieldType_String && - !(msg_modifiers & kUpb_MessageModifier_ValidateUtf8)) { - type = kUpb_FieldType_Bytes; - field->mode |= kUpb_LabelFlags_IsAlternate; - } + if (_upb_Decoder_TryFastDispatch(d, &ptr, msg, layout)) break; - field->UPB_PRIVATE(descriptortype) = type; +#if UPB_FASTTABLE + nofast: +#endif - if (upb_MtDecoder_FieldIsPackable(field) && - (msg_modifiers & kUpb_MessageModifier_DefaultIsPacked)) { - field->mode |= kUpb_LabelFlags_IsPacked; - } +#ifndef NDEBUG + d->debug_tagstart = ptr; +#endif - if (type == kUpb_FieldType_Message || type == kUpb_FieldType_Group) { - field->UPB_PRIVATE(submsg_index) = sub_counts->submsg_count++; - } else if (type == kUpb_FieldType_Enum) { - // We will need to update this later once we know the total number of - // submsg fields. - field->UPB_PRIVATE(submsg_index) = sub_counts->subenum_count++; - } else { - field->UPB_PRIVATE(submsg_index) = kUpb_NoSub; - } -} + UPB_ASSERT(ptr < d->input.limit_ptr); + ptr = _upb_Decoder_DecodeTag(d, ptr, &tag); + field_number = tag >> 3; + wire_type = tag & 7; -static const char kUpb_EncodedToType[] = { - [kUpb_EncodedType_Double] = kUpb_FieldType_Double, - [kUpb_EncodedType_Float] = kUpb_FieldType_Float, - [kUpb_EncodedType_Int64] = kUpb_FieldType_Int64, - [kUpb_EncodedType_UInt64] = kUpb_FieldType_UInt64, - [kUpb_EncodedType_Int32] = kUpb_FieldType_Int32, - [kUpb_EncodedType_Fixed64] = kUpb_FieldType_Fixed64, - [kUpb_EncodedType_Fixed32] = kUpb_FieldType_Fixed32, - [kUpb_EncodedType_Bool] = kUpb_FieldType_Bool, - [kUpb_EncodedType_String] = kUpb_FieldType_String, - [kUpb_EncodedType_Group] = kUpb_FieldType_Group, - [kUpb_EncodedType_Message] = kUpb_FieldType_Message, - [kUpb_EncodedType_Bytes] = kUpb_FieldType_Bytes, - [kUpb_EncodedType_UInt32] = kUpb_FieldType_UInt32, - [kUpb_EncodedType_OpenEnum] = kUpb_FieldType_Enum, - [kUpb_EncodedType_SFixed32] = kUpb_FieldType_SFixed32, - [kUpb_EncodedType_SFixed64] = kUpb_FieldType_SFixed64, - [kUpb_EncodedType_SInt32] = kUpb_FieldType_SInt32, - [kUpb_EncodedType_SInt64] = kUpb_FieldType_SInt64, - [kUpb_EncodedType_ClosedEnum] = kUpb_FieldType_Enum, -}; +#ifndef NDEBUG + d->debug_valstart = ptr; +#endif -static void upb_MiniTable_SetField(upb_MtDecoder* d, uint8_t ch, - upb_MiniTableField* field, - uint64_t msg_modifiers, - upb_SubCounts* sub_counts) { - static const char kUpb_EncodedToFieldRep[] = { - [kUpb_EncodedType_Double] = kUpb_FieldRep_8Byte, - [kUpb_EncodedType_Float] = kUpb_FieldRep_4Byte, - [kUpb_EncodedType_Int64] = kUpb_FieldRep_8Byte, - [kUpb_EncodedType_UInt64] = kUpb_FieldRep_8Byte, - [kUpb_EncodedType_Int32] = kUpb_FieldRep_4Byte, - [kUpb_EncodedType_Fixed64] = kUpb_FieldRep_8Byte, - [kUpb_EncodedType_Fixed32] = kUpb_FieldRep_4Byte, - [kUpb_EncodedType_Bool] = kUpb_FieldRep_1Byte, - [kUpb_EncodedType_String] = kUpb_FieldRep_StringView, - [kUpb_EncodedType_Bytes] = kUpb_FieldRep_StringView, - [kUpb_EncodedType_UInt32] = kUpb_FieldRep_4Byte, - [kUpb_EncodedType_OpenEnum] = kUpb_FieldRep_4Byte, - [kUpb_EncodedType_SFixed32] = kUpb_FieldRep_4Byte, - [kUpb_EncodedType_SFixed64] = kUpb_FieldRep_8Byte, - [kUpb_EncodedType_SInt32] = kUpb_FieldRep_4Byte, - [kUpb_EncodedType_SInt64] = kUpb_FieldRep_8Byte, - [kUpb_EncodedType_ClosedEnum] = kUpb_FieldRep_4Byte, - }; + if (wire_type == kUpb_WireType_EndGroup) { + d->end_group = field_number; + return ptr; + } - char pointer_rep = d->platform == kUpb_MiniTablePlatform_32Bit - ? kUpb_FieldRep_4Byte - : kUpb_FieldRep_8Byte; + field = _upb_Decoder_FindField(d, layout, field_number, &last_field_index); + ptr = _upb_Decoder_DecodeWireValue(d, ptr, layout, field, wire_type, &val, + &op); - int8_t type = _upb_FromBase92(ch); - if (ch >= _upb_ToBase92(kUpb_EncodedType_RepeatedBase)) { - type -= kUpb_EncodedType_RepeatedBase; - field->mode = kUpb_FieldMode_Array; - field->mode |= pointer_rep << kUpb_FieldRep_Shift; - field->offset = kNoPresence; - } else { - field->mode = kUpb_FieldMode_Scalar; - field->offset = kHasbitPresence; - if (type == kUpb_EncodedType_Group || type == kUpb_EncodedType_Message) { - field->mode |= pointer_rep << kUpb_FieldRep_Shift; - } else if ((unsigned long)type >= sizeof(kUpb_EncodedToFieldRep)) { - upb_MdDecoder_ErrorJmp(&d->base, "Invalid field type: %d", (int)type); + if (op >= 0) { + ptr = _upb_Decoder_DecodeKnownField(d, ptr, msg, layout, field, op, &val); } else { - field->mode |= kUpb_EncodedToFieldRep[type] << kUpb_FieldRep_Shift; + switch (op) { + case kUpb_DecodeOp_UnknownField: + ptr = _upb_Decoder_DecodeUnknownField(d, ptr, msg, field_number, + wire_type, val); + break; + case kUpb_DecodeOp_MessageSetItem: + ptr = upb_Decoder_DecodeMessageSetItem(d, ptr, msg, layout); + break; + } } } - if ((unsigned long)type >= sizeof(kUpb_EncodedToType)) { - upb_MdDecoder_ErrorJmp(&d->base, "Invalid field type: %d", (int)type); - } - upb_MiniTable_SetTypeAndSub(field, kUpb_EncodedToType[type], sub_counts, - msg_modifiers, type == kUpb_EncodedType_OpenEnum); -} -static void upb_MtDecoder_ModifyField(upb_MtDecoder* d, - uint32_t message_modifiers, - uint32_t field_modifiers, - upb_MiniTableField* field) { - if (field_modifiers & kUpb_EncodedFieldModifier_FlipPacked) { - if (!upb_MtDecoder_FieldIsPackable(field)) { - upb_MdDecoder_ErrorJmp(&d->base, - "Cannot flip packed on unpackable field %" PRIu32, - field->number); - } - field->mode ^= kUpb_LabelFlags_IsPacked; - } + return UPB_UNLIKELY(layout && layout->required_count) + ? _upb_Decoder_CheckRequired(d, ptr, msg, layout) + : ptr; +} - bool singular = field_modifiers & kUpb_EncodedFieldModifier_IsProto3Singular; - bool required = field_modifiers & kUpb_EncodedFieldModifier_IsRequired; +const char* _upb_FastDecoder_DecodeGeneric(struct upb_Decoder* d, + const char* ptr, upb_Message* msg, + intptr_t table, uint64_t hasbits, + uint64_t data) { + (void)data; + *(uint32_t*)msg |= hasbits; + return _upb_Decoder_DecodeMessage(d, ptr, msg, decode_totablep(table)); +} - // Validate. - if ((singular || required) && field->offset != kHasbitPresence) { - upb_MdDecoder_ErrorJmp(&d->base, - "Invalid modifier(s) for repeated field %" PRIu32, - field->number); - } - if (singular && required) { - upb_MdDecoder_ErrorJmp( - &d->base, "Field %" PRIu32 " cannot be both singular and required", - field->number); +static upb_DecodeStatus _upb_Decoder_DecodeTop(struct upb_Decoder* d, + const char* buf, void* msg, + const upb_MiniTable* l) { + if (!_upb_Decoder_TryFastDispatch(d, &buf, msg, l)) { + _upb_Decoder_DecodeMessage(d, buf, msg, l); } + if (d->end_group != DECODE_NOGROUP) return kUpb_DecodeStatus_Malformed; + if (d->missing_required) return kUpb_DecodeStatus_MissingRequired; + return kUpb_DecodeStatus_Ok; +} - if (singular) field->offset = kNoPresence; - if (required) { - field->offset = kRequiredPresence; - } +UPB_NOINLINE +const char* _upb_Decoder_IsDoneFallback(upb_EpsCopyInputStream* e, + const char* ptr, int overrun) { + return _upb_EpsCopyInputStream_IsDoneFallbackInline( + e, ptr, overrun, _upb_Decoder_BufferFlipCallback); } -static void upb_MtDecoder_PushItem(upb_MtDecoder* d, upb_LayoutItem item) { - if (d->vec.size == d->vec.capacity) { - size_t new_cap = UPB_MAX(8, d->vec.size * 2); - d->vec.data = realloc(d->vec.data, new_cap * sizeof(*d->vec.data)); - upb_MdDecoder_CheckOutOfMemory(&d->base, d->vec.data); - d->vec.capacity = new_cap; +static upb_DecodeStatus upb_Decoder_Decode(upb_Decoder* const decoder, + const char* const buf, + void* const msg, + const upb_MiniTable* const l, + upb_Arena* const arena) { + if (UPB_SETJMP(decoder->err) == 0) { + decoder->status = _upb_Decoder_DecodeTop(decoder, buf, msg, l); + } else { + UPB_ASSERT(decoder->status != kUpb_DecodeStatus_Ok); } - d->vec.data[d->vec.size++] = item; -} -static void upb_MtDecoder_PushOneof(upb_MtDecoder* d, upb_LayoutItem item) { - if (item.field_index == kUpb_LayoutItem_IndexSentinel) { - upb_MdDecoder_ErrorJmp(&d->base, "Empty oneof"); - } - item.field_index -= kOneofBase; + _upb_MemBlock* blocks = + upb_Atomic_Load(&decoder->arena.blocks, memory_order_relaxed); + arena->head = decoder->arena.head; + upb_Atomic_Store(&arena->blocks, blocks, memory_order_relaxed); + return decoder->status; +} - // Push oneof data. - item.type = kUpb_LayoutItemType_OneofField; - upb_MtDecoder_PushItem(d, item); +upb_DecodeStatus upb_Decode(const char* buf, size_t size, void* msg, + const upb_MiniTable* l, + const upb_ExtensionRegistry* extreg, int options, + upb_Arena* arena) { + upb_Decoder decoder; + unsigned depth = (unsigned)options >> 16; - // Push oneof case. - item.rep = kUpb_FieldRep_4Byte; // Field Number. - item.type = kUpb_LayoutItemType_OneofCase; - upb_MtDecoder_PushItem(d, item); -} + upb_EpsCopyInputStream_Init(&decoder.input, &buf, size, + options & kUpb_DecodeOption_AliasString); -size_t upb_MtDecoder_SizeOfRep(upb_FieldRep rep, - upb_MiniTablePlatform platform) { - static const uint8_t kRepToSize32[] = { - [kUpb_FieldRep_1Byte] = 1, - [kUpb_FieldRep_4Byte] = 4, - [kUpb_FieldRep_StringView] = 8, - [kUpb_FieldRep_8Byte] = 8, - }; - static const uint8_t kRepToSize64[] = { - [kUpb_FieldRep_1Byte] = 1, - [kUpb_FieldRep_4Byte] = 4, - [kUpb_FieldRep_StringView] = 16, - [kUpb_FieldRep_8Byte] = 8, - }; - UPB_ASSERT(sizeof(upb_StringView) == - UPB_SIZE(kRepToSize32, kRepToSize64)[kUpb_FieldRep_StringView]); - return platform == kUpb_MiniTablePlatform_32Bit ? kRepToSize32[rep] - : kRepToSize64[rep]; -} + decoder.extreg = extreg; + decoder.unknown = NULL; + decoder.depth = depth ? depth : kUpb_WireFormat_DefaultDepthLimit; + decoder.end_group = DECODE_NOGROUP; + decoder.options = (uint16_t)options; + decoder.missing_required = false; + decoder.status = kUpb_DecodeStatus_Ok; -size_t upb_MtDecoder_AlignOfRep(upb_FieldRep rep, - upb_MiniTablePlatform platform) { - static const uint8_t kRepToAlign32[] = { - [kUpb_FieldRep_1Byte] = 1, - [kUpb_FieldRep_4Byte] = 4, - [kUpb_FieldRep_StringView] = 4, - [kUpb_FieldRep_8Byte] = 8, - }; - static const uint8_t kRepToAlign64[] = { - [kUpb_FieldRep_1Byte] = 1, - [kUpb_FieldRep_4Byte] = 4, - [kUpb_FieldRep_StringView] = 8, - [kUpb_FieldRep_8Byte] = 8, - }; - UPB_ASSERT(UPB_ALIGN_OF(upb_StringView) == - UPB_SIZE(kRepToAlign32, kRepToAlign64)[kUpb_FieldRep_StringView]); - return platform == kUpb_MiniTablePlatform_32Bit ? kRepToAlign32[rep] - : kRepToAlign64[rep]; + // Violating the encapsulation of the arena for performance reasons. + // This is a temporary arena that we swap into and swap out of when we are + // done. The temporary arena only needs to be able to handle allocation, + // not fuse or free, so it does not need many of the members to be initialized + // (particularly parent_or_count). + _upb_MemBlock* blocks = upb_Atomic_Load(&arena->blocks, memory_order_relaxed); + decoder.arena.head = arena->head; + decoder.arena.block_alloc = arena->block_alloc; + upb_Atomic_Init(&decoder.arena.blocks, blocks); + + return upb_Decoder_Decode(&decoder, buf, msg, l, arena); } -static const char* upb_MtDecoder_DecodeOneofField(upb_MtDecoder* d, - const char* ptr, - char first_ch, - upb_LayoutItem* item) { - uint32_t field_num; - ptr = upb_MdDecoder_DecodeBase92Varint( - &d->base, ptr, first_ch, kUpb_EncodedValue_MinOneofField, - kUpb_EncodedValue_MaxOneofField, &field_num); - upb_MiniTableField* f = - (void*)upb_MiniTable_FindFieldByNumber(d->table, field_num); +#undef OP_FIXPCK_LG2 +#undef OP_VARPCK_LG2 - if (!f) { - upb_MdDecoder_ErrorJmp(&d->base, - "Couldn't add field number %" PRIu32 - " to oneof, no such field number.", - field_num); +// Fast decoder: ~3x the speed of decode.c, but requires x86-64/ARM64. +// Also the table size grows by 2x. +// +// Could potentially be ported to other 64-bit archs that pass at least six +// arguments in registers and have 8 unused high bits in pointers. +// +// The overall design is to create specialized functions for every possible +// field type (eg. oneof boolean field with a 1 byte tag) and then dispatch +// to the specialized function as quickly as possible. + + + +// Must be last. + +#if UPB_FASTTABLE + +// The standard set of arguments passed to each parsing function. +// Thanks to x86-64 calling conventions, these will stay in registers. +#define UPB_PARSE_PARAMS \ + upb_Decoder *d, const char *ptr, upb_Message *msg, intptr_t table, \ + uint64_t hasbits, uint64_t data + +#define UPB_PARSE_ARGS d, ptr, msg, table, hasbits, data + +#define RETURN_GENERIC(m) \ + /* Uncomment either of these for debugging purposes. */ \ + /* fprintf(stderr, m); */ \ + /*__builtin_trap(); */ \ + return _upb_FastDecoder_DecodeGeneric(d, ptr, msg, table, hasbits, 0); + +typedef enum { + CARD_s = 0, /* Singular (optional, non-repeated) */ + CARD_o = 1, /* Oneof */ + CARD_r = 2, /* Repeated */ + CARD_p = 3 /* Packed Repeated */ +} upb_card; + +UPB_NOINLINE +static const char* fastdecode_isdonefallback(UPB_PARSE_PARAMS) { + int overrun = data; + ptr = _upb_EpsCopyInputStream_IsDoneFallbackInline( + &d->input, ptr, overrun, _upb_Decoder_BufferFlipCallback); + data = _upb_FastDecoder_LoadTag(ptr); + UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); +} + +UPB_FORCEINLINE +static const char* fastdecode_dispatch(UPB_PARSE_PARAMS) { + int overrun; + switch (upb_EpsCopyInputStream_IsDoneStatus(&d->input, ptr, &overrun)) { + case kUpb_IsDoneStatus_Done: + *(uint32_t*)msg |= hasbits; // Sync hasbits. + const upb_MiniTable* l = decode_totablep(table); + return UPB_UNLIKELY(l->required_count) + ? _upb_Decoder_CheckRequired(d, ptr, msg, l) + : ptr; + case kUpb_IsDoneStatus_NotDone: + break; + case kUpb_IsDoneStatus_NeedFallback: + data = overrun; + UPB_MUSTTAIL return fastdecode_isdonefallback(UPB_PARSE_ARGS); } - if (f->offset != kHasbitPresence) { - upb_MdDecoder_ErrorJmp( - &d->base, - "Cannot add repeated, required, or singular field %" PRIu32 - " to oneof.", - field_num); + + // Read two bytes of tag data (for a one-byte tag, the high byte is junk). + data = _upb_FastDecoder_LoadTag(ptr); + UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); +} + +UPB_FORCEINLINE +static bool fastdecode_checktag(uint16_t data, int tagbytes) { + if (tagbytes == 1) { + return (data & 0xff) == 0; + } else { + return data == 0; } +} - // Oneof storage must be large enough to accommodate the largest member. - int rep = f->mode >> kUpb_FieldRep_Shift; - if (upb_MtDecoder_SizeOfRep(rep, d->platform) > - upb_MtDecoder_SizeOfRep(item->rep, d->platform)) { - item->rep = rep; +UPB_FORCEINLINE +static const char* fastdecode_longsize(const char* ptr, int* size) { + int i; + UPB_ASSERT(*size & 0x80); + *size &= 0xff; + for (i = 0; i < 3; i++) { + ptr++; + size_t byte = (uint8_t)ptr[-1]; + *size += (byte - 1) << (7 + 7 * i); + if (UPB_LIKELY((byte & 0x80) == 0)) return ptr; } - // Prepend this field to the linked list. - f->offset = item->field_index; - item->field_index = (f - d->fields) + kOneofBase; + ptr++; + size_t byte = (uint8_t)ptr[-1]; + // len is limited by 2gb not 4gb, hence 8 and not 16 as normally expected + // for a 32 bit varint. + if (UPB_UNLIKELY(byte >= 8)) return NULL; + *size += (byte - 1) << 28; return ptr; } -static const char* upb_MtDecoder_DecodeOneofs(upb_MtDecoder* d, - const char* ptr) { - upb_LayoutItem item = {.rep = 0, - .field_index = kUpb_LayoutItem_IndexSentinel}; - while (ptr < d->base.end) { - char ch = *ptr++; - if (ch == kUpb_EncodedValue_FieldSeparator) { - // Field separator, no action needed. - } else if (ch == kUpb_EncodedValue_OneofSeparator) { - // End of oneof. - upb_MtDecoder_PushOneof(d, item); - item.field_index = kUpb_LayoutItem_IndexSentinel; // Move to next oneof. - } else { - ptr = upb_MtDecoder_DecodeOneofField(d, ptr, ch, &item); +UPB_FORCEINLINE +static const char* fastdecode_delimited( + upb_Decoder* d, const char* ptr, + upb_EpsCopyInputStream_ParseDelimitedFunc* func, void* ctx) { + ptr++; + + // Sign-extend so varint greater than one byte becomes negative, causing + // fast delimited parse to fail. + int len = (int8_t)ptr[-1]; + + if (!upb_EpsCopyInputStream_TryParseDelimitedFast(&d->input, &ptr, len, func, + ctx)) { + // Slow case: Sub-message is >=128 bytes and/or exceeds the current buffer. + // If it exceeds the buffer limit, limit/limit_ptr will change during + // sub-message parsing, so we need to preserve delta, not limit. + if (UPB_UNLIKELY(len & 0x80)) { + // Size varint >1 byte (length >= 128). + ptr = fastdecode_longsize(ptr, &len); + if (!ptr) { + // Corrupt wire format: size exceeded INT_MAX. + return NULL; + } + } + if (!upb_EpsCopyInputStream_CheckSize(&d->input, ptr, len)) { + // Corrupt wire format: invalid limit. + return NULL; } + int delta = upb_EpsCopyInputStream_PushLimit(&d->input, ptr, len); + ptr = func(&d->input, ptr, ctx); + upb_EpsCopyInputStream_PopLimit(&d->input, ptr, delta); } - - // Push final oneof. - upb_MtDecoder_PushOneof(d, item); return ptr; } -static const char* upb_MtDecoder_ParseModifier(upb_MtDecoder* d, - const char* ptr, char first_ch, - upb_MiniTableField* last_field, - uint64_t* msg_modifiers) { - uint32_t mod; - ptr = upb_MdDecoder_DecodeBase92Varint(&d->base, ptr, first_ch, - kUpb_EncodedValue_MinModifier, - kUpb_EncodedValue_MaxModifier, &mod); - if (last_field) { - upb_MtDecoder_ModifyField(d, *msg_modifiers, mod, last_field); +/* singular, oneof, repeated field handling ***********************************/ + +typedef struct { + upb_Array* arr; + void* end; +} fastdecode_arr; + +typedef enum { + FD_NEXT_ATLIMIT, + FD_NEXT_SAMEFIELD, + FD_NEXT_OTHERFIELD +} fastdecode_next; + +typedef struct { + void* dst; + fastdecode_next next; + uint32_t tag; +} fastdecode_nextret; + +UPB_FORCEINLINE +static void* fastdecode_resizearr(upb_Decoder* d, void* dst, + fastdecode_arr* farr, int valbytes) { + if (UPB_UNLIKELY(dst == farr->end)) { + size_t old_size = farr->arr->capacity; + size_t old_bytes = old_size * valbytes; + size_t new_size = old_size * 2; + size_t new_bytes = new_size * valbytes; + char* old_ptr = _upb_array_ptr(farr->arr); + char* new_ptr = upb_Arena_Realloc(&d->arena, old_ptr, old_bytes, new_bytes); + uint8_t elem_size_lg2 = __builtin_ctz(valbytes); + farr->arr->capacity = new_size; + farr->arr->data = _upb_array_tagptr(new_ptr, elem_size_lg2); + dst = (void*)(new_ptr + (old_size * valbytes)); + farr->end = (void*)(new_ptr + (new_size * valbytes)); + } + return dst; +} + +UPB_FORCEINLINE +static bool fastdecode_tagmatch(uint32_t tag, uint64_t data, int tagbytes) { + if (tagbytes == 1) { + return (uint8_t)tag == (uint8_t)data; + } else { + return (uint16_t)tag == (uint16_t)data; + } +} + +UPB_FORCEINLINE +static void fastdecode_commitarr(void* dst, fastdecode_arr* farr, + int valbytes) { + farr->arr->size = + (size_t)((char*)dst - (char*)_upb_array_ptr(farr->arr)) / valbytes; +} + +UPB_FORCEINLINE +static fastdecode_nextret fastdecode_nextrepeated(upb_Decoder* d, void* dst, + const char** ptr, + fastdecode_arr* farr, + uint64_t data, int tagbytes, + int valbytes) { + fastdecode_nextret ret; + dst = (char*)dst + valbytes; + + if (UPB_LIKELY(!_upb_Decoder_IsDone(d, ptr))) { + ret.tag = _upb_FastDecoder_LoadTag(*ptr); + if (fastdecode_tagmatch(ret.tag, data, tagbytes)) { + ret.next = FD_NEXT_SAMEFIELD; + } else { + fastdecode_commitarr(dst, farr, valbytes); + ret.next = FD_NEXT_OTHERFIELD; + } + } else { + fastdecode_commitarr(dst, farr, valbytes); + ret.next = FD_NEXT_ATLIMIT; + } + + ret.dst = dst; + return ret; +} + +UPB_FORCEINLINE +static void* fastdecode_fieldmem(upb_Message* msg, uint64_t data) { + size_t ofs = data >> 48; + return (char*)msg + ofs; +} + +UPB_FORCEINLINE +static void* fastdecode_getfield(upb_Decoder* d, const char* ptr, + upb_Message* msg, uint64_t* data, + uint64_t* hasbits, fastdecode_arr* farr, + int valbytes, upb_card card) { + switch (card) { + case CARD_s: { + uint8_t hasbit_index = *data >> 24; + // Set hasbit and return pointer to scalar field. + *hasbits |= 1ull << hasbit_index; + return fastdecode_fieldmem(msg, *data); + } + case CARD_o: { + uint16_t case_ofs = *data >> 32; + uint32_t* oneof_case = UPB_PTR_AT(msg, case_ofs, uint32_t); + uint8_t field_number = *data >> 24; + *oneof_case = field_number; + return fastdecode_fieldmem(msg, *data); + } + case CARD_r: { + // Get pointer to upb_Array and allocate/expand if necessary. + uint8_t elem_size_lg2 = __builtin_ctz(valbytes); + upb_Array** arr_p = fastdecode_fieldmem(msg, *data); + char* begin; + *(uint32_t*)msg |= *hasbits; + *hasbits = 0; + if (UPB_LIKELY(!*arr_p)) { + farr->arr = _upb_Array_New(&d->arena, 8, elem_size_lg2); + *arr_p = farr->arr; + } else { + farr->arr = *arr_p; + } + begin = _upb_array_ptr(farr->arr); + farr->end = begin + (farr->arr->capacity * valbytes); + *data = _upb_FastDecoder_LoadTag(ptr); + return begin + (farr->arr->size * valbytes); + } + default: + UPB_UNREACHABLE(); + } +} + +UPB_FORCEINLINE +static bool fastdecode_flippacked(uint64_t* data, int tagbytes) { + *data ^= (0x2 ^ 0x0); // Patch data to match packed wiretype. + return fastdecode_checktag(*data, tagbytes); +} + +#define FASTDECODE_CHECKPACKED(tagbytes, card, func) \ + if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { \ + if (card == CARD_r && fastdecode_flippacked(&data, tagbytes)) { \ + UPB_MUSTTAIL return func(UPB_PARSE_ARGS); \ + } \ + RETURN_GENERIC("packed check tag mismatch\n"); \ + } + +/* varint fields **************************************************************/ + +UPB_FORCEINLINE +static uint64_t fastdecode_munge(uint64_t val, int valbytes, bool zigzag) { + if (valbytes == 1) { + return val != 0; + } else if (zigzag) { + if (valbytes == 4) { + uint32_t n = val; + return (n >> 1) ^ -(int32_t)(n & 1); + } else if (valbytes == 8) { + return (val >> 1) ^ -(int64_t)(val & 1); + } + UPB_UNREACHABLE(); + } + return val; +} + +UPB_FORCEINLINE +static const char* fastdecode_varint64(const char* ptr, uint64_t* val) { + ptr++; + *val = (uint8_t)ptr[-1]; + if (UPB_UNLIKELY(*val & 0x80)) { + int i; + for (i = 0; i < 8; i++) { + ptr++; + uint64_t byte = (uint8_t)ptr[-1]; + *val += (byte - 1) << (7 + 7 * i); + if (UPB_LIKELY((byte & 0x80) == 0)) goto done; + } + ptr++; + uint64_t byte = (uint8_t)ptr[-1]; + if (byte > 1) { + return NULL; + } + *val += (byte - 1) << 63; + } +done: + UPB_ASSUME(ptr != NULL); + return ptr; +} + +#define FASTDECODE_UNPACKEDVARINT(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, card, zigzag, packed) \ + uint64_t val; \ + void* dst; \ + fastdecode_arr farr; \ + \ + FASTDECODE_CHECKPACKED(tagbytes, card, packed); \ + \ + dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, valbytes, \ + card); \ + if (card == CARD_r) { \ + if (UPB_UNLIKELY(!dst)) { \ + RETURN_GENERIC("need array resize\n"); \ + } \ + } \ + \ + again: \ + if (card == CARD_r) { \ + dst = fastdecode_resizearr(d, dst, &farr, valbytes); \ + } \ + \ + ptr += tagbytes; \ + ptr = fastdecode_varint64(ptr, &val); \ + if (ptr == NULL) _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \ + val = fastdecode_munge(val, valbytes, zigzag); \ + memcpy(dst, &val, valbytes); \ + \ + if (card == CARD_r) { \ + fastdecode_nextret ret = fastdecode_nextrepeated( \ + d, dst, &ptr, &farr, data, tagbytes, valbytes); \ + switch (ret.next) { \ + case FD_NEXT_SAMEFIELD: \ + dst = ret.dst; \ + goto again; \ + case FD_NEXT_OTHERFIELD: \ + data = ret.tag; \ + UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); \ + case FD_NEXT_ATLIMIT: \ + return ptr; \ + } \ + } \ + \ + UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); + +typedef struct { + uint8_t valbytes; + bool zigzag; + void* dst; + fastdecode_arr farr; +} fastdecode_varintdata; + +UPB_FORCEINLINE +static const char* fastdecode_topackedvarint(upb_EpsCopyInputStream* e, + const char* ptr, void* ctx) { + upb_Decoder* d = (upb_Decoder*)e; + fastdecode_varintdata* data = ctx; + void* dst = data->dst; + uint64_t val; + + while (!_upb_Decoder_IsDone(d, &ptr)) { + dst = fastdecode_resizearr(d, dst, &data->farr, data->valbytes); + ptr = fastdecode_varint64(ptr, &val); + if (ptr == NULL) return NULL; + val = fastdecode_munge(val, data->valbytes, data->zigzag); + memcpy(dst, &val, data->valbytes); + dst = (char*)dst + data->valbytes; + } + + fastdecode_commitarr(dst, &data->farr, data->valbytes); + return ptr; +} + +#define FASTDECODE_PACKEDVARINT(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, zigzag, unpacked) \ + fastdecode_varintdata ctx = {valbytes, zigzag}; \ + \ + FASTDECODE_CHECKPACKED(tagbytes, CARD_r, unpacked); \ + \ + ctx.dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &ctx.farr, \ + valbytes, CARD_r); \ + if (UPB_UNLIKELY(!ctx.dst)) { \ + RETURN_GENERIC("need array resize\n"); \ + } \ + \ + ptr += tagbytes; \ + ptr = fastdecode_delimited(d, ptr, &fastdecode_topackedvarint, &ctx); \ + \ + if (UPB_UNLIKELY(ptr == NULL)) { \ + _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \ + } \ + \ + UPB_MUSTTAIL return fastdecode_dispatch(d, ptr, msg, table, hasbits, 0); + +#define FASTDECODE_VARINT(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, card, zigzag, unpacked, packed) \ + if (card == CARD_p) { \ + FASTDECODE_PACKEDVARINT(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, zigzag, unpacked); \ + } else { \ + FASTDECODE_UNPACKEDVARINT(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, card, zigzag, packed); \ + } + +#define z_ZZ true +#define b_ZZ false +#define v_ZZ false + +/* Generate all combinations: + * {s,o,r,p} x {b1,v4,z4,v8,z8} x {1bt,2bt} */ + +#define F(card, type, valbytes, tagbytes) \ + UPB_NOINLINE \ + const char* upb_p##card##type##valbytes##_##tagbytes##bt(UPB_PARSE_PARAMS) { \ + FASTDECODE_VARINT(d, ptr, msg, table, hasbits, data, tagbytes, valbytes, \ + CARD_##card, type##_ZZ, \ + upb_pr##type##valbytes##_##tagbytes##bt, \ + upb_pp##type##valbytes##_##tagbytes##bt); \ + } + +#define TYPES(card, tagbytes) \ + F(card, b, 1, tagbytes) \ + F(card, v, 4, tagbytes) \ + F(card, v, 8, tagbytes) \ + F(card, z, 4, tagbytes) \ + F(card, z, 8, tagbytes) + +#define TAGBYTES(card) \ + TYPES(card, 1) \ + TYPES(card, 2) + +TAGBYTES(s) +TAGBYTES(o) +TAGBYTES(r) +TAGBYTES(p) + +#undef z_ZZ +#undef b_ZZ +#undef v_ZZ +#undef o_ONEOF +#undef s_ONEOF +#undef r_ONEOF +#undef F +#undef TYPES +#undef TAGBYTES +#undef FASTDECODE_UNPACKEDVARINT +#undef FASTDECODE_PACKEDVARINT +#undef FASTDECODE_VARINT + +/* fixed fields ***************************************************************/ + +#define FASTDECODE_UNPACKEDFIXED(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, card, packed) \ + void* dst; \ + fastdecode_arr farr; \ + \ + FASTDECODE_CHECKPACKED(tagbytes, card, packed) \ + \ + dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, valbytes, \ + card); \ + if (card == CARD_r) { \ + if (UPB_UNLIKELY(!dst)) { \ + RETURN_GENERIC("couldn't allocate array in arena\n"); \ + } \ + } \ + \ + again: \ + if (card == CARD_r) { \ + dst = fastdecode_resizearr(d, dst, &farr, valbytes); \ + } \ + \ + ptr += tagbytes; \ + memcpy(dst, ptr, valbytes); \ + ptr += valbytes; \ + \ + if (card == CARD_r) { \ + fastdecode_nextret ret = fastdecode_nextrepeated( \ + d, dst, &ptr, &farr, data, tagbytes, valbytes); \ + switch (ret.next) { \ + case FD_NEXT_SAMEFIELD: \ + dst = ret.dst; \ + goto again; \ + case FD_NEXT_OTHERFIELD: \ + data = ret.tag; \ + UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); \ + case FD_NEXT_ATLIMIT: \ + return ptr; \ + } \ + } \ + \ + UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); + +#define FASTDECODE_PACKEDFIXED(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, unpacked) \ + FASTDECODE_CHECKPACKED(tagbytes, CARD_r, unpacked) \ + \ + ptr += tagbytes; \ + int size = (uint8_t)ptr[0]; \ + ptr++; \ + if (size & 0x80) { \ + ptr = fastdecode_longsize(ptr, &size); \ + } \ + \ + if (UPB_UNLIKELY(!upb_EpsCopyInputStream_CheckDataSizeAvailable( \ + &d->input, ptr, size) || \ + (size % valbytes) != 0)) { \ + _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \ + } \ + \ + upb_Array** arr_p = fastdecode_fieldmem(msg, data); \ + upb_Array* arr = *arr_p; \ + uint8_t elem_size_lg2 = __builtin_ctz(valbytes); \ + int elems = size / valbytes; \ + \ + if (UPB_LIKELY(!arr)) { \ + *arr_p = arr = _upb_Array_New(&d->arena, elems, elem_size_lg2); \ + if (!arr) { \ + _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \ + } \ + } else { \ + _upb_Array_ResizeUninitialized(arr, elems, &d->arena); \ + } \ + \ + char* dst = _upb_array_ptr(arr); \ + memcpy(dst, ptr, size); \ + arr->size = elems; \ + \ + ptr += size; \ + UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); + +#define FASTDECODE_FIXED(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, card, unpacked, packed) \ + if (card == CARD_p) { \ + FASTDECODE_PACKEDFIXED(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, unpacked); \ + } else { \ + FASTDECODE_UNPACKEDFIXED(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, card, packed); \ + } + +/* Generate all combinations: + * {s,o,r,p} x {f4,f8} x {1bt,2bt} */ + +#define F(card, valbytes, tagbytes) \ + UPB_NOINLINE \ + const char* upb_p##card##f##valbytes##_##tagbytes##bt(UPB_PARSE_PARAMS) { \ + FASTDECODE_FIXED(d, ptr, msg, table, hasbits, data, tagbytes, valbytes, \ + CARD_##card, upb_ppf##valbytes##_##tagbytes##bt, \ + upb_prf##valbytes##_##tagbytes##bt); \ + } + +#define TYPES(card, tagbytes) \ + F(card, 4, tagbytes) \ + F(card, 8, tagbytes) + +#define TAGBYTES(card) \ + TYPES(card, 1) \ + TYPES(card, 2) + +TAGBYTES(s) +TAGBYTES(o) +TAGBYTES(r) +TAGBYTES(p) + +#undef F +#undef TYPES +#undef TAGBYTES +#undef FASTDECODE_UNPACKEDFIXED +#undef FASTDECODE_PACKEDFIXED + +/* string fields **************************************************************/ + +typedef const char* fastdecode_copystr_func(struct upb_Decoder* d, + const char* ptr, upb_Message* msg, + const upb_MiniTable* table, + uint64_t hasbits, + upb_StringView* dst); + +UPB_NOINLINE +static const char* fastdecode_verifyutf8(upb_Decoder* d, const char* ptr, + upb_Message* msg, intptr_t table, + uint64_t hasbits, uint64_t data) { + upb_StringView* dst = (upb_StringView*)data; + if (!_upb_Decoder_VerifyUtf8Inline(dst->data, dst->size)) { + _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_BadUtf8); + } + UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); +} + +#define FASTDECODE_LONGSTRING(d, ptr, msg, table, hasbits, dst, validate_utf8) \ + int size = (uint8_t)ptr[0]; /* Could plumb through hasbits. */ \ + ptr++; \ + if (size & 0x80) { \ + ptr = fastdecode_longsize(ptr, &size); \ + } \ + \ + if (UPB_UNLIKELY(!upb_EpsCopyInputStream_CheckSize(&d->input, ptr, size))) { \ + dst->size = 0; \ + _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \ + } \ + \ + const char* s_ptr = ptr; \ + ptr = upb_EpsCopyInputStream_ReadString(&d->input, &s_ptr, size, &d->arena); \ + if (!ptr) _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); \ + dst->data = s_ptr; \ + dst->size = size; \ + \ + if (validate_utf8) { \ + data = (uint64_t)dst; \ + UPB_MUSTTAIL return fastdecode_verifyutf8(UPB_PARSE_ARGS); \ + } else { \ + UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); \ + } + +UPB_NOINLINE +static const char* fastdecode_longstring_utf8(struct upb_Decoder* d, + const char* ptr, upb_Message* msg, + intptr_t table, uint64_t hasbits, + uint64_t data) { + upb_StringView* dst = (upb_StringView*)data; + FASTDECODE_LONGSTRING(d, ptr, msg, table, hasbits, dst, true); +} + +UPB_NOINLINE +static const char* fastdecode_longstring_noutf8( + struct upb_Decoder* d, const char* ptr, upb_Message* msg, intptr_t table, + uint64_t hasbits, uint64_t data) { + upb_StringView* dst = (upb_StringView*)data; + FASTDECODE_LONGSTRING(d, ptr, msg, table, hasbits, dst, false); +} + +UPB_FORCEINLINE +static void fastdecode_docopy(upb_Decoder* d, const char* ptr, uint32_t size, + int copy, char* data, upb_StringView* dst) { + d->arena.head.ptr += copy; + dst->data = data; + UPB_UNPOISON_MEMORY_REGION(data, copy); + memcpy(data, ptr, copy); + UPB_POISON_MEMORY_REGION(data + size, copy - size); +} + +#define FASTDECODE_COPYSTRING(d, ptr, msg, table, hasbits, data, tagbytes, \ + card, validate_utf8) \ + upb_StringView* dst; \ + fastdecode_arr farr; \ + int64_t size; \ + size_t arena_has; \ + size_t common_has; \ + char* buf; \ + \ + UPB_ASSERT(!upb_EpsCopyInputStream_AliasingAvailable(&d->input, ptr, 0)); \ + UPB_ASSERT(fastdecode_checktag(data, tagbytes)); \ + \ + dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, \ + sizeof(upb_StringView), card); \ + \ + again: \ + if (card == CARD_r) { \ + dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_StringView)); \ + } \ + \ + size = (uint8_t)ptr[tagbytes]; \ + ptr += tagbytes + 1; \ + dst->size = size; \ + \ + buf = d->arena.head.ptr; \ + arena_has = _upb_ArenaHas(&d->arena); \ + common_has = UPB_MIN(arena_has, \ + upb_EpsCopyInputStream_BytesAvailable(&d->input, ptr)); \ + \ + if (UPB_LIKELY(size <= 15 - tagbytes)) { \ + if (arena_has < 16) goto longstr; \ + d->arena.head.ptr += 16; \ + memcpy(buf, ptr - tagbytes - 1, 16); \ + dst->data = buf + tagbytes + 1; \ + } else if (UPB_LIKELY(size <= 32)) { \ + if (UPB_UNLIKELY(common_has < 32)) goto longstr; \ + fastdecode_docopy(d, ptr, size, 32, buf, dst); \ + } else if (UPB_LIKELY(size <= 64)) { \ + if (UPB_UNLIKELY(common_has < 64)) goto longstr; \ + fastdecode_docopy(d, ptr, size, 64, buf, dst); \ + } else if (UPB_LIKELY(size < 128)) { \ + if (UPB_UNLIKELY(common_has < 128)) goto longstr; \ + fastdecode_docopy(d, ptr, size, 128, buf, dst); \ + } else { \ + goto longstr; \ + } \ + \ + ptr += size; \ + \ + if (card == CARD_r) { \ + if (validate_utf8 && \ + !_upb_Decoder_VerifyUtf8Inline(dst->data, dst->size)) { \ + _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_BadUtf8); \ + } \ + fastdecode_nextret ret = fastdecode_nextrepeated( \ + d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_StringView)); \ + switch (ret.next) { \ + case FD_NEXT_SAMEFIELD: \ + dst = ret.dst; \ + goto again; \ + case FD_NEXT_OTHERFIELD: \ + data = ret.tag; \ + UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); \ + case FD_NEXT_ATLIMIT: \ + return ptr; \ + } \ + } \ + \ + if (card != CARD_r && validate_utf8) { \ + data = (uint64_t)dst; \ + UPB_MUSTTAIL return fastdecode_verifyutf8(UPB_PARSE_ARGS); \ + } \ + \ + UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); \ + \ + longstr: \ + if (card == CARD_r) { \ + fastdecode_commitarr(dst + 1, &farr, sizeof(upb_StringView)); \ + } \ + ptr--; \ + if (validate_utf8) { \ + UPB_MUSTTAIL return fastdecode_longstring_utf8(d, ptr, msg, table, \ + hasbits, (uint64_t)dst); \ + } else { \ + UPB_MUSTTAIL return fastdecode_longstring_noutf8(d, ptr, msg, table, \ + hasbits, (uint64_t)dst); \ + } + +#define FASTDECODE_STRING(d, ptr, msg, table, hasbits, data, tagbytes, card, \ + copyfunc, validate_utf8) \ + upb_StringView* dst; \ + fastdecode_arr farr; \ + int64_t size; \ + \ + if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { \ + RETURN_GENERIC("string field tag mismatch\n"); \ + } \ + \ + if (UPB_UNLIKELY( \ + !upb_EpsCopyInputStream_AliasingAvailable(&d->input, ptr, 0))) { \ + UPB_MUSTTAIL return copyfunc(UPB_PARSE_ARGS); \ + } \ + \ + dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, \ + sizeof(upb_StringView), card); \ + \ + again: \ + if (card == CARD_r) { \ + dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_StringView)); \ + } \ + \ + size = (int8_t)ptr[tagbytes]; \ + ptr += tagbytes + 1; \ + \ + if (UPB_UNLIKELY( \ + !upb_EpsCopyInputStream_AliasingAvailable(&d->input, ptr, size))) { \ + ptr--; \ + if (validate_utf8) { \ + return fastdecode_longstring_utf8(d, ptr, msg, table, hasbits, \ + (uint64_t)dst); \ + } else { \ + return fastdecode_longstring_noutf8(d, ptr, msg, table, hasbits, \ + (uint64_t)dst); \ + } \ + } \ + \ + dst->data = ptr; \ + dst->size = size; \ + ptr = upb_EpsCopyInputStream_ReadStringAliased(&d->input, &dst->data, \ + dst->size); \ + \ + if (card == CARD_r) { \ + if (validate_utf8 && \ + !_upb_Decoder_VerifyUtf8Inline(dst->data, dst->size)) { \ + _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_BadUtf8); \ + } \ + fastdecode_nextret ret = fastdecode_nextrepeated( \ + d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_StringView)); \ + switch (ret.next) { \ + case FD_NEXT_SAMEFIELD: \ + dst = ret.dst; \ + goto again; \ + case FD_NEXT_OTHERFIELD: \ + data = ret.tag; \ + UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); \ + case FD_NEXT_ATLIMIT: \ + return ptr; \ + } \ + } \ + \ + if (card != CARD_r && validate_utf8) { \ + data = (uint64_t)dst; \ + UPB_MUSTTAIL return fastdecode_verifyutf8(UPB_PARSE_ARGS); \ + } \ + \ + UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); + +/* Generate all combinations: + * {p,c} x {s,o,r} x {s, b} x {1bt,2bt} */ + +#define s_VALIDATE true +#define b_VALIDATE false + +#define F(card, tagbytes, type) \ + UPB_NOINLINE \ + const char* upb_c##card##type##_##tagbytes##bt(UPB_PARSE_PARAMS) { \ + FASTDECODE_COPYSTRING(d, ptr, msg, table, hasbits, data, tagbytes, \ + CARD_##card, type##_VALIDATE); \ + } \ + const char* upb_p##card##type##_##tagbytes##bt(UPB_PARSE_PARAMS) { \ + FASTDECODE_STRING(d, ptr, msg, table, hasbits, data, tagbytes, \ + CARD_##card, upb_c##card##type##_##tagbytes##bt, \ + type##_VALIDATE); \ + } + +#define UTF8(card, tagbytes) \ + F(card, tagbytes, s) \ + F(card, tagbytes, b) + +#define TAGBYTES(card) \ + UTF8(card, 1) \ + UTF8(card, 2) + +TAGBYTES(s) +TAGBYTES(o) +TAGBYTES(r) + +#undef s_VALIDATE +#undef b_VALIDATE +#undef F +#undef TAGBYTES +#undef FASTDECODE_LONGSTRING +#undef FASTDECODE_COPYSTRING +#undef FASTDECODE_STRING + +/* message fields *************************************************************/ + +UPB_INLINE +upb_Message* decode_newmsg_ceil(upb_Decoder* d, const upb_MiniTable* l, + int msg_ceil_bytes) { + size_t size = l->size + sizeof(upb_Message_Internal); + char* msg_data; + if (UPB_LIKELY(msg_ceil_bytes > 0 && + _upb_ArenaHas(&d->arena) >= msg_ceil_bytes)) { + UPB_ASSERT(size <= (size_t)msg_ceil_bytes); + msg_data = d->arena.head.ptr; + d->arena.head.ptr += size; + UPB_UNPOISON_MEMORY_REGION(msg_data, msg_ceil_bytes); + memset(msg_data, 0, msg_ceil_bytes); + UPB_POISON_MEMORY_REGION(msg_data + size, msg_ceil_bytes - size); } else { - if (!d->table) { - upb_MdDecoder_ErrorJmp(&d->base, - "Extensions cannot have message modifiers"); - } - *msg_modifiers = mod; + msg_data = (char*)upb_Arena_Malloc(&d->arena, size); + memset(msg_data, 0, size); } + return msg_data + sizeof(upb_Message_Internal); +} + +typedef struct { + intptr_t table; + upb_Message* msg; +} fastdecode_submsgdata; +UPB_FORCEINLINE +static const char* fastdecode_tosubmsg(upb_EpsCopyInputStream* e, + const char* ptr, void* ctx) { + upb_Decoder* d = (upb_Decoder*)e; + fastdecode_submsgdata* submsg = ctx; + ptr = fastdecode_dispatch(d, ptr, submsg->msg, submsg->table, 0, 0); + UPB_ASSUME(ptr != NULL); return ptr; } -static void upb_MtDecoder_AllocateSubs(upb_MtDecoder* d, - upb_SubCounts sub_counts) { - uint32_t total_count = sub_counts.submsg_count + sub_counts.subenum_count; - size_t subs_bytes = sizeof(*d->table->subs) * total_count; - upb_MiniTableSub* subs = upb_Arena_Malloc(d->arena, subs_bytes); - upb_MdDecoder_CheckOutOfMemory(&d->base, subs); - uint32_t i = 0; - for (; i < sub_counts.submsg_count; i++) { - subs[i].submsg = &_kUpb_MiniTable_Empty; - } - if (sub_counts.subenum_count) { - upb_MiniTableField* f = d->fields; - upb_MiniTableField* end_f = f + d->table->field_count; - for (; f < end_f; f++) { - if (f->UPB_PRIVATE(descriptortype) == kUpb_FieldType_Enum) { - f->UPB_PRIVATE(submsg_index) += sub_counts.submsg_count; - } - } - for (; i < sub_counts.submsg_count + sub_counts.subenum_count; i++) { - subs[i].subenum = NULL; - } +#define FASTDECODE_SUBMSG(d, ptr, msg, table, hasbits, data, tagbytes, \ + msg_ceil_bytes, card) \ + \ + if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { \ + RETURN_GENERIC("submessage field tag mismatch\n"); \ + } \ + \ + if (--d->depth == 0) { \ + _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_MaxDepthExceeded); \ + } \ + \ + upb_Message** dst; \ + uint32_t submsg_idx = (data >> 16) & 0xff; \ + const upb_MiniTable* tablep = decode_totablep(table); \ + const upb_MiniTable* subtablep = tablep->subs[submsg_idx].submsg; \ + fastdecode_submsgdata submsg = {decode_totable(subtablep)}; \ + fastdecode_arr farr; \ + \ + if (subtablep->table_mask == (uint8_t)-1) { \ + RETURN_GENERIC("submessage doesn't have fast tables."); \ + } \ + \ + dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, \ + sizeof(upb_Message*), card); \ + \ + if (card == CARD_s) { \ + *(uint32_t*)msg |= hasbits; \ + hasbits = 0; \ + } \ + \ + again: \ + if (card == CARD_r) { \ + dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_Message*)); \ + } \ + \ + submsg.msg = *dst; \ + \ + if (card == CARD_r || UPB_LIKELY(!submsg.msg)) { \ + *dst = submsg.msg = decode_newmsg_ceil(d, subtablep, msg_ceil_bytes); \ + } \ + \ + ptr += tagbytes; \ + ptr = fastdecode_delimited(d, ptr, fastdecode_tosubmsg, &submsg); \ + \ + if (UPB_UNLIKELY(ptr == NULL || d->end_group != DECODE_NOGROUP)) { \ + _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \ + } \ + \ + if (card == CARD_r) { \ + fastdecode_nextret ret = fastdecode_nextrepeated( \ + d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_Message*)); \ + switch (ret.next) { \ + case FD_NEXT_SAMEFIELD: \ + dst = ret.dst; \ + goto again; \ + case FD_NEXT_OTHERFIELD: \ + d->depth++; \ + data = ret.tag; \ + UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); \ + case FD_NEXT_ATLIMIT: \ + d->depth++; \ + return ptr; \ + } \ + } \ + \ + d->depth++; \ + UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); + +#define F(card, tagbytes, size_ceil, ceil_arg) \ + const char* upb_p##card##m_##tagbytes##bt_max##size_ceil##b( \ + UPB_PARSE_PARAMS) { \ + FASTDECODE_SUBMSG(d, ptr, msg, table, hasbits, data, tagbytes, ceil_arg, \ + CARD_##card); \ } - d->table->subs = subs; -} -static const char* upb_MtDecoder_Parse(upb_MtDecoder* d, const char* ptr, - size_t len, void* fields, - size_t field_size, uint16_t* field_count, - upb_SubCounts* sub_counts) { - uint64_t msg_modifiers = 0; - uint32_t last_field_number = 0; - upb_MiniTableField* last_field = NULL; - bool need_dense_below = d->table != NULL; +#define SIZES(card, tagbytes) \ + F(card, tagbytes, 64, 64) \ + F(card, tagbytes, 128, 128) \ + F(card, tagbytes, 192, 192) \ + F(card, tagbytes, 256, 256) \ + F(card, tagbytes, max, -1) - d->base.end = UPB_PTRADD(ptr, len); +#define TAGBYTES(card) \ + SIZES(card, 1) \ + SIZES(card, 2) - while (ptr < d->base.end) { - char ch = *ptr++; - if (ch <= kUpb_EncodedValue_MaxField) { - if (!d->table && last_field) { - // For extensions, consume only a single field and then return. - return --ptr; - } - upb_MiniTableField* field = fields; - *field_count += 1; - fields = (char*)fields + field_size; - field->number = ++last_field_number; - last_field = field; - upb_MiniTable_SetField(d, ch, field, msg_modifiers, sub_counts); - } else if (kUpb_EncodedValue_MinModifier <= ch && - ch <= kUpb_EncodedValue_MaxModifier) { - ptr = upb_MtDecoder_ParseModifier(d, ptr, ch, last_field, &msg_modifiers); - if (msg_modifiers & kUpb_MessageModifier_IsExtendable) { - d->table->ext |= kUpb_ExtMode_Extendable; - } - } else if (ch == kUpb_EncodedValue_End) { - if (!d->table) { - upb_MdDecoder_ErrorJmp(&d->base, "Extensions cannot have oneofs."); - } - ptr = upb_MtDecoder_DecodeOneofs(d, ptr); - } else if (kUpb_EncodedValue_MinSkip <= ch && - ch <= kUpb_EncodedValue_MaxSkip) { - if (need_dense_below) { - d->table->dense_below = d->table->field_count; - need_dense_below = false; - } - uint32_t skip; - ptr = upb_MdDecoder_DecodeBase92Varint(&d->base, ptr, ch, - kUpb_EncodedValue_MinSkip, - kUpb_EncodedValue_MaxSkip, &skip); - last_field_number += skip; - last_field_number--; // Next field seen will increment. - } else { - upb_MdDecoder_ErrorJmp(&d->base, "Invalid char: %c", ch); - } - } +TAGBYTES(s) +TAGBYTES(o) +TAGBYTES(r) - if (need_dense_below) { - d->table->dense_below = d->table->field_count; - } +#undef TAGBYTES +#undef SIZES +#undef F +#undef FASTDECODE_SUBMSG - return ptr; -} +#endif /* UPB_FASTTABLE */ -static void upb_MtDecoder_ParseMessage(upb_MtDecoder* d, const char* data, - size_t len) { - // Buffer length is an upper bound on the number of fields. We will return - // what we don't use. - d->fields = upb_Arena_Malloc(d->arena, sizeof(*d->fields) * len); - upb_MdDecoder_CheckOutOfMemory(&d->base, d->fields); +// We encode backwards, to avoid pre-computing lengths (one-pass encode). - upb_SubCounts sub_counts = {0, 0}; - d->table->field_count = 0; - d->table->fields = d->fields; - upb_MtDecoder_Parse(d, data, len, d->fields, sizeof(*d->fields), - &d->table->field_count, &sub_counts); - upb_Arena_ShrinkLast(d->arena, d->fields, sizeof(*d->fields) * len, - sizeof(*d->fields) * d->table->field_count); - d->table->fields = d->fields; - upb_MtDecoder_AllocateSubs(d, sub_counts); -} +#include -int upb_MtDecoder_CompareFields(const void* _a, const void* _b) { - const upb_LayoutItem* a = _a; - const upb_LayoutItem* b = _b; - // Currently we just sort by: - // 1. rep (smallest fields first) - // 2. type (oneof cases first) - // 2. field_index (smallest numbers first) - // The main goal of this is to reduce space lost to padding. - // Later we may have more subtle reasons to prefer a different ordering. - const int rep_bits = upb_Log2Ceiling(kUpb_FieldRep_Max); - const int type_bits = upb_Log2Ceiling(kUpb_LayoutItemType_Max); - const int idx_bits = (sizeof(a->field_index) * 8); - UPB_ASSERT(idx_bits + rep_bits + type_bits < 32); -#define UPB_COMBINE(rep, ty, idx) (((rep << type_bits) | ty) << idx_bits) | idx - uint32_t a_packed = UPB_COMBINE(a->rep, a->type, a->field_index); - uint32_t b_packed = UPB_COMBINE(b->rep, b->type, b->field_index); - assert(a_packed != b_packed); -#undef UPB_COMBINE - return a_packed < b_packed ? -1 : 1; -} -static bool upb_MtDecoder_SortLayoutItems(upb_MtDecoder* d) { - // Add items for all non-oneof fields (oneofs were already added). - int n = d->table->field_count; - for (int i = 0; i < n; i++) { - upb_MiniTableField* f = &d->fields[i]; - if (f->offset >= kOneofBase) continue; - upb_LayoutItem item = {.field_index = i, - .rep = f->mode >> kUpb_FieldRep_Shift, - .type = kUpb_LayoutItemType_Field}; - upb_MtDecoder_PushItem(d, item); - } +// Must be last. + +#define UPB_PB_VARINT_MAX_LEN 10 + +UPB_NOINLINE +static size_t encode_varint64(uint64_t val, char* buf) { + size_t i = 0; + do { + uint8_t byte = val & 0x7fU; + val >>= 7; + if (val) byte |= 0x80U; + buf[i++] = byte; + } while (val); + return i; +} - if (d->vec.size) { - qsort(d->vec.data, d->vec.size, sizeof(*d->vec.data), - upb_MtDecoder_CompareFields); - } +static uint32_t encode_zz32(int32_t n) { + return ((uint32_t)n << 1) ^ (n >> 31); +} +static uint64_t encode_zz64(int64_t n) { + return ((uint64_t)n << 1) ^ (n >> 63); +} - return true; +typedef struct { + upb_EncodeStatus status; + jmp_buf err; + upb_Arena* arena; + char *buf, *ptr, *limit; + int options; + int depth; + _upb_mapsorter sorter; +} upb_encstate; + +static size_t upb_roundup_pow2(size_t bytes) { + size_t ret = 128; + while (ret < bytes) { + ret *= 2; + } + return ret; } -static size_t upb_MiniTable_DivideRoundUp(size_t n, size_t d) { - return (n + d - 1) / d; +UPB_NORETURN static void encode_err(upb_encstate* e, upb_EncodeStatus s) { + UPB_ASSERT(s != kUpb_EncodeStatus_Ok); + e->status = s; + UPB_LONGJMP(e->err, 1); } -static void upb_MtDecoder_AssignHasbits(upb_MtDecoder* d) { - upb_MiniTable* ret = d->table; - int n = ret->field_count; - int last_hasbit = 0; // 0 cannot be used. +UPB_NOINLINE +static void encode_growbuffer(upb_encstate* e, size_t bytes) { + size_t old_size = e->limit - e->buf; + size_t new_size = upb_roundup_pow2(bytes + (e->limit - e->ptr)); + char* new_buf = upb_Arena_Realloc(e->arena, e->buf, old_size, new_size); - // First assign required fields, which must have the lowest hasbits. - for (int i = 0; i < n; i++) { - upb_MiniTableField* field = (upb_MiniTableField*)&ret->fields[i]; - if (field->offset == kRequiredPresence) { - field->presence = ++last_hasbit; - } else if (field->offset == kNoPresence) { - field->presence = 0; - } - } - ret->required_count = last_hasbit; + if (!new_buf) encode_err(e, kUpb_EncodeStatus_OutOfMemory); - if (ret->required_count > 63) { - upb_MdDecoder_ErrorJmp(&d->base, "Too many required fields"); + // We want previous data at the end, realloc() put it at the beginning. + // TODO(salo): This is somewhat inefficient since we are copying twice. + // Maybe create a realloc() that copies to the end of the new buffer? + if (old_size > 0) { + memmove(new_buf + new_size - old_size, e->buf, old_size); } - // Next assign non-required hasbit fields. - for (int i = 0; i < n; i++) { - upb_MiniTableField* field = (upb_MiniTableField*)&ret->fields[i]; - if (field->offset == kHasbitPresence) { - field->presence = ++last_hasbit; - } - } + e->ptr = new_buf + new_size - (e->limit - e->ptr); + e->limit = new_buf + new_size; + e->buf = new_buf; - ret->size = last_hasbit ? upb_MiniTable_DivideRoundUp(last_hasbit + 1, 8) : 0; + e->ptr -= bytes; } -size_t upb_MtDecoder_Place(upb_MtDecoder* d, upb_FieldRep rep) { - size_t size = upb_MtDecoder_SizeOfRep(rep, d->platform); - size_t align = upb_MtDecoder_AlignOfRep(rep, d->platform); - size_t ret = UPB_ALIGN_UP(d->table->size, align); - static const size_t max = UINT16_MAX; - size_t new_size = ret + size; - if (new_size > max) { - upb_MdDecoder_ErrorJmp( - &d->base, "Message size exceeded maximum size of %zu bytes", max); +/* Call to ensure that at least "bytes" bytes are available for writing at + * e->ptr. Returns false if the bytes could not be allocated. */ +UPB_FORCEINLINE +static void encode_reserve(upb_encstate* e, size_t bytes) { + if ((size_t)(e->ptr - e->buf) < bytes) { + encode_growbuffer(e, bytes); + return; } - d->table->size = new_size; - return ret; -} - -static void upb_MtDecoder_AssignOffsets(upb_MtDecoder* d) { - upb_LayoutItem* end = UPB_PTRADD(d->vec.data, d->vec.size); - // Compute offsets. - for (upb_LayoutItem* item = d->vec.data; item < end; item++) { - item->offset = upb_MtDecoder_Place(d, item->rep); - } + e->ptr -= bytes; +} - // Assign oneof case offsets. We must do these first, since assigning - // actual offsets will overwrite the links of the linked list. - for (upb_LayoutItem* item = d->vec.data; item < end; item++) { - if (item->type != kUpb_LayoutItemType_OneofCase) continue; - upb_MiniTableField* f = &d->fields[item->field_index]; - while (true) { - f->presence = ~item->offset; - if (f->offset == kUpb_LayoutItem_IndexSentinel) break; - UPB_ASSERT(f->offset - kOneofBase < d->table->field_count); - f = &d->fields[f->offset - kOneofBase]; - } - } +/* Writes the given bytes to the buffer, handling reserve/advance. */ +static void encode_bytes(upb_encstate* e, const void* data, size_t len) { + if (len == 0) return; /* memcpy() with zero size is UB */ + encode_reserve(e, len); + memcpy(e->ptr, data, len); +} - // Assign offsets. - for (upb_LayoutItem* item = d->vec.data; item < end; item++) { - upb_MiniTableField* f = &d->fields[item->field_index]; - switch (item->type) { - case kUpb_LayoutItemType_OneofField: - while (true) { - uint16_t next_offset = f->offset; - f->offset = item->offset; - if (next_offset == kUpb_LayoutItem_IndexSentinel) break; - f = &d->fields[next_offset - kOneofBase]; - } - break; - case kUpb_LayoutItemType_Field: - f->offset = item->offset; - break; - default: - break; - } - } +static void encode_fixed64(upb_encstate* e, uint64_t val) { + val = _upb_BigEndian_Swap64(val); + encode_bytes(e, &val, sizeof(uint64_t)); +} - // The fasttable parser (supported on 64-bit only) depends on this being a - // multiple of 8 in order to satisfy UPB_MALLOC_ALIGN, which is also 8. - // - // On 32-bit we could potentially make this smaller, but there is no - // compelling reason to optimize this right now. - d->table->size = UPB_ALIGN_UP(d->table->size, 8); +static void encode_fixed32(upb_encstate* e, uint32_t val) { + val = _upb_BigEndian_Swap32(val); + encode_bytes(e, &val, sizeof(uint32_t)); } -static void upb_MtDecoder_ValidateEntryField(upb_MtDecoder* d, - const upb_MiniTableField* f, - uint32_t expected_num) { - const char* name = expected_num == 1 ? "key" : "val"; - if (f->number != expected_num) { - upb_MdDecoder_ErrorJmp(&d->base, - "map %s did not have expected number (%d vs %d)", - name, expected_num, (int)f->number); - } +UPB_NOINLINE +static void encode_longvarint(upb_encstate* e, uint64_t val) { + size_t len; + char* start; - if (upb_IsRepeatedOrMap(f)) { - upb_MdDecoder_ErrorJmp( - &d->base, "map %s cannot be repeated or map, or be in oneof", name); - } + encode_reserve(e, UPB_PB_VARINT_MAX_LEN); + len = encode_varint64(val, e->ptr); + start = e->ptr + UPB_PB_VARINT_MAX_LEN - len; + memmove(start, e->ptr, len); + e->ptr = start; +} - uint32_t not_ok_types; - if (expected_num == 1) { - not_ok_types = (1 << kUpb_FieldType_Float) | (1 << kUpb_FieldType_Double) | - (1 << kUpb_FieldType_Message) | (1 << kUpb_FieldType_Group) | - (1 << kUpb_FieldType_Bytes) | (1 << kUpb_FieldType_Enum); +UPB_FORCEINLINE +static void encode_varint(upb_encstate* e, uint64_t val) { + if (val < 128 && e->ptr != e->buf) { + --e->ptr; + *e->ptr = val; } else { - not_ok_types = 1 << kUpb_FieldType_Group; - } - - if ((1 << upb_MiniTableField_Type(f)) & not_ok_types) { - upb_MdDecoder_ErrorJmp(&d->base, "map %s cannot have type %d", name, - (int)f->UPB_PRIVATE(descriptortype)); + encode_longvarint(e, val); } } -static void upb_MtDecoder_ParseMap(upb_MtDecoder* d, const char* data, - size_t len) { - upb_MtDecoder_ParseMessage(d, data, len); - upb_MtDecoder_AssignHasbits(d); +static void encode_double(upb_encstate* e, double d) { + uint64_t u64; + UPB_ASSERT(sizeof(double) == sizeof(uint64_t)); + memcpy(&u64, &d, sizeof(uint64_t)); + encode_fixed64(e, u64); +} - if (UPB_UNLIKELY(d->table->field_count != 2)) { - upb_MdDecoder_ErrorJmp(&d->base, "%hu fields in map", - d->table->field_count); - UPB_UNREACHABLE(); - } +static void encode_float(upb_encstate* e, float d) { + uint32_t u32; + UPB_ASSERT(sizeof(float) == sizeof(uint32_t)); + memcpy(&u32, &d, sizeof(uint32_t)); + encode_fixed32(e, u32); +} - upb_LayoutItem* end = UPB_PTRADD(d->vec.data, d->vec.size); - for (upb_LayoutItem* item = d->vec.data; item < end; item++) { - if (item->type == kUpb_LayoutItemType_OneofCase) { - upb_MdDecoder_ErrorJmp(&d->base, "Map entry cannot have oneof"); - } - } +static void encode_tag(upb_encstate* e, uint32_t field_number, + uint8_t wire_type) { + encode_varint(e, (field_number << 3) | wire_type); +} - upb_MtDecoder_ValidateEntryField(d, &d->table->fields[0], 1); - upb_MtDecoder_ValidateEntryField(d, &d->table->fields[1], 2); +static void encode_fixedarray(upb_encstate* e, const upb_Array* arr, + size_t elem_size, uint32_t tag) { + size_t bytes = arr->size * elem_size; + const char* data = _upb_array_constptr(arr); + const char* ptr = data + bytes - elem_size; - // Map entries have a pre-determined layout, regardless of types. - // NOTE: sync with mini_table/message_internal.h. - const size_t kv_size = d->platform == kUpb_MiniTablePlatform_32Bit ? 8 : 16; - const size_t hasbit_size = 8; - d->fields[0].offset = hasbit_size; - d->fields[1].offset = hasbit_size + kv_size; - d->table->size = UPB_ALIGN_UP(hasbit_size + kv_size + kv_size, 8); + if (tag || !_upb_IsLittleEndian()) { + while (true) { + if (elem_size == 4) { + uint32_t val; + memcpy(&val, ptr, sizeof(val)); + val = _upb_BigEndian_Swap32(val); + encode_bytes(e, &val, elem_size); + } else { + UPB_ASSERT(elem_size == 8); + uint64_t val; + memcpy(&val, ptr, sizeof(val)); + val = _upb_BigEndian_Swap64(val); + encode_bytes(e, &val, elem_size); + } - // Map entries have a special bit set to signal it's a map entry, used in - // upb_MiniTable_SetSubMessage() below. - d->table->ext |= kUpb_ExtMode_IsMapEntry; + if (tag) encode_varint(e, tag); + if (ptr == data) break; + ptr -= elem_size; + } + } else { + encode_bytes(e, data, bytes); + } } -static void upb_MtDecoder_ParseMessageSet(upb_MtDecoder* d, const char* data, - size_t len) { - if (len > 0) { - upb_MdDecoder_ErrorJmp(&d->base, "Invalid message set encode length: %zu", - len); - } +static void encode_message(upb_encstate* e, const upb_Message* msg, + const upb_MiniTable* m, size_t* size); - upb_MiniTable* ret = d->table; - ret->size = 0; - ret->field_count = 0; - ret->ext = kUpb_ExtMode_IsMessageSet; - ret->dense_below = 0; - ret->table_mask = -1; - ret->required_count = 0; +static void encode_TaggedMessagePtr(upb_encstate* e, + upb_TaggedMessagePtr tagged, + const upb_MiniTable* m, size_t* size) { + if (upb_TaggedMessagePtr_IsEmpty(tagged)) { + m = &_kUpb_MiniTable_Empty; + } + encode_message(e, _upb_TaggedMessagePtr_GetMessage(tagged), m, size); } -static upb_MiniTable* upb_MtDecoder_DoBuildMiniTableWithBuf( - upb_MtDecoder* decoder, const char* data, size_t len, void** buf, - size_t* buf_size) { - upb_MdDecoder_CheckOutOfMemory(&decoder->base, decoder->table); - - decoder->table->size = 0; - decoder->table->field_count = 0; - decoder->table->ext = kUpb_ExtMode_NonExtendable; - decoder->table->dense_below = 0; - decoder->table->table_mask = -1; - decoder->table->required_count = 0; +static void encode_scalar(upb_encstate* e, const void* _field_mem, + const upb_MiniTableSub* subs, + const upb_MiniTableField* f) { + const char* field_mem = _field_mem; + int wire_type; - // Strip off and verify the version tag. - if (!len--) goto done; - const char vers = *data++; +#define CASE(ctype, type, wtype, encodeval) \ + { \ + ctype val = *(ctype*)field_mem; \ + encode_##type(e, encodeval); \ + wire_type = wtype; \ + break; \ + } - switch (vers) { - case kUpb_EncodedVersion_MapV1: - upb_MtDecoder_ParseMap(decoder, data, len); + switch (f->UPB_PRIVATE(descriptortype)) { + case kUpb_FieldType_Double: + CASE(double, double, kUpb_WireType_64Bit, val); + case kUpb_FieldType_Float: + CASE(float, float, kUpb_WireType_32Bit, val); + case kUpb_FieldType_Int64: + case kUpb_FieldType_UInt64: + CASE(uint64_t, varint, kUpb_WireType_Varint, val); + case kUpb_FieldType_UInt32: + CASE(uint32_t, varint, kUpb_WireType_Varint, val); + case kUpb_FieldType_Int32: + case kUpb_FieldType_Enum: + CASE(int32_t, varint, kUpb_WireType_Varint, (int64_t)val); + case kUpb_FieldType_SFixed64: + case kUpb_FieldType_Fixed64: + CASE(uint64_t, fixed64, kUpb_WireType_64Bit, val); + case kUpb_FieldType_Fixed32: + case kUpb_FieldType_SFixed32: + CASE(uint32_t, fixed32, kUpb_WireType_32Bit, val); + case kUpb_FieldType_Bool: + CASE(bool, varint, kUpb_WireType_Varint, val); + case kUpb_FieldType_SInt32: + CASE(int32_t, varint, kUpb_WireType_Varint, encode_zz32(val)); + case kUpb_FieldType_SInt64: + CASE(int64_t, varint, kUpb_WireType_Varint, encode_zz64(val)); + case kUpb_FieldType_String: + case kUpb_FieldType_Bytes: { + upb_StringView view = *(upb_StringView*)field_mem; + encode_bytes(e, view.data, view.size); + encode_varint(e, view.size); + wire_type = kUpb_WireType_Delimited; break; - - case kUpb_EncodedVersion_MessageV1: - upb_MtDecoder_ParseMessage(decoder, data, len); - upb_MtDecoder_AssignHasbits(decoder); - upb_MtDecoder_SortLayoutItems(decoder); - upb_MtDecoder_AssignOffsets(decoder); + } + case kUpb_FieldType_Group: { + size_t size; + upb_TaggedMessagePtr submsg = *(upb_TaggedMessagePtr*)field_mem; + const upb_MiniTable* subm = subs[f->UPB_PRIVATE(submsg_index)].submsg; + if (submsg == 0) { + return; + } + if (--e->depth == 0) encode_err(e, kUpb_EncodeStatus_MaxDepthExceeded); + encode_tag(e, f->number, kUpb_WireType_EndGroup); + encode_TaggedMessagePtr(e, submsg, subm, &size); + wire_type = kUpb_WireType_StartGroup; + e->depth++; break; - - case kUpb_EncodedVersion_MessageSetV1: - upb_MtDecoder_ParseMessageSet(decoder, data, len); + } + case kUpb_FieldType_Message: { + size_t size; + upb_TaggedMessagePtr submsg = *(upb_TaggedMessagePtr*)field_mem; + const upb_MiniTable* subm = subs[f->UPB_PRIVATE(submsg_index)].submsg; + if (submsg == 0) { + return; + } + if (--e->depth == 0) encode_err(e, kUpb_EncodeStatus_MaxDepthExceeded); + encode_TaggedMessagePtr(e, submsg, subm, &size); + encode_varint(e, size); + wire_type = kUpb_WireType_Delimited; + e->depth++; break; - + } default: - upb_MdDecoder_ErrorJmp(&decoder->base, "Invalid message version: %c", - vers); + UPB_UNREACHABLE(); } +#undef CASE -done: - *buf = decoder->vec.data; - *buf_size = decoder->vec.capacity * sizeof(*decoder->vec.data); - return decoder->table; + encode_tag(e, f->number, wire_type); } -static upb_MiniTable* upb_MtDecoder_BuildMiniTableWithBuf( - upb_MtDecoder* const decoder, const char* const data, const size_t len, - void** const buf, size_t* const buf_size) { - if (UPB_SETJMP(decoder->base.err) != 0) { - *buf = decoder->vec.data; - *buf_size = decoder->vec.capacity * sizeof(*decoder->vec.data); - return NULL; - } +static void encode_array(upb_encstate* e, const upb_Message* msg, + const upb_MiniTableSub* subs, + const upb_MiniTableField* f) { + const upb_Array* arr = *UPB_PTR_AT(msg, f->offset, upb_Array*); + bool packed = f->mode & kUpb_LabelFlags_IsPacked; + size_t pre_len = e->limit - e->ptr; - return upb_MtDecoder_DoBuildMiniTableWithBuf(decoder, data, len, buf, - buf_size); -} + if (arr == NULL || arr->size == 0) { + return; + } -upb_MiniTable* upb_MiniTable_BuildWithBuf(const char* data, size_t len, - upb_MiniTablePlatform platform, - upb_Arena* arena, void** buf, - size_t* buf_size, - upb_Status* status) { - upb_MtDecoder decoder = { - .base = {.status = status}, - .platform = platform, - .vec = - { - .data = *buf, - .capacity = *buf_size / sizeof(*decoder.vec.data), - .size = 0, - }, - .arena = arena, - .table = upb_Arena_Malloc(arena, sizeof(*decoder.table)), - }; +#define VARINT_CASE(ctype, encode) \ + { \ + const ctype* start = _upb_array_constptr(arr); \ + const ctype* ptr = start + arr->size; \ + uint32_t tag = packed ? 0 : (f->number << 3) | kUpb_WireType_Varint; \ + do { \ + ptr--; \ + encode_varint(e, encode); \ + if (tag) encode_varint(e, tag); \ + } while (ptr != start); \ + } \ + break; - return upb_MtDecoder_BuildMiniTableWithBuf(&decoder, data, len, buf, - buf_size); -} +#define TAG(wire_type) (packed ? 0 : (f->number << 3 | wire_type)) -static const char* upb_MtDecoder_DoBuildMiniTableExtension( - upb_MtDecoder* decoder, const char* data, size_t len, - upb_MiniTableExtension* ext, const upb_MiniTable* extendee, - upb_MiniTableSub sub) { - // If the string is non-empty then it must begin with a version tag. - if (len) { - if (*data != kUpb_EncodedVersion_ExtensionV1) { - upb_MdDecoder_ErrorJmp(&decoder->base, "Invalid ext version: %c", *data); + switch (f->UPB_PRIVATE(descriptortype)) { + case kUpb_FieldType_Double: + encode_fixedarray(e, arr, sizeof(double), TAG(kUpb_WireType_64Bit)); + break; + case kUpb_FieldType_Float: + encode_fixedarray(e, arr, sizeof(float), TAG(kUpb_WireType_32Bit)); + break; + case kUpb_FieldType_SFixed64: + case kUpb_FieldType_Fixed64: + encode_fixedarray(e, arr, sizeof(uint64_t), TAG(kUpb_WireType_64Bit)); + break; + case kUpb_FieldType_Fixed32: + case kUpb_FieldType_SFixed32: + encode_fixedarray(e, arr, sizeof(uint32_t), TAG(kUpb_WireType_32Bit)); + break; + case kUpb_FieldType_Int64: + case kUpb_FieldType_UInt64: + VARINT_CASE(uint64_t, *ptr); + case kUpb_FieldType_UInt32: + VARINT_CASE(uint32_t, *ptr); + case kUpb_FieldType_Int32: + case kUpb_FieldType_Enum: + VARINT_CASE(int32_t, (int64_t)*ptr); + case kUpb_FieldType_Bool: + VARINT_CASE(bool, *ptr); + case kUpb_FieldType_SInt32: + VARINT_CASE(int32_t, encode_zz32(*ptr)); + case kUpb_FieldType_SInt64: + VARINT_CASE(int64_t, encode_zz64(*ptr)); + case kUpb_FieldType_String: + case kUpb_FieldType_Bytes: { + const upb_StringView* start = _upb_array_constptr(arr); + const upb_StringView* ptr = start + arr->size; + do { + ptr--; + encode_bytes(e, ptr->data, ptr->size); + encode_varint(e, ptr->size); + encode_tag(e, f->number, kUpb_WireType_Delimited); + } while (ptr != start); + return; + } + case kUpb_FieldType_Group: { + const upb_TaggedMessagePtr* start = _upb_array_constptr(arr); + const upb_TaggedMessagePtr* ptr = start + arr->size; + const upb_MiniTable* subm = subs[f->UPB_PRIVATE(submsg_index)].submsg; + if (--e->depth == 0) encode_err(e, kUpb_EncodeStatus_MaxDepthExceeded); + do { + size_t size; + ptr--; + encode_tag(e, f->number, kUpb_WireType_EndGroup); + encode_TaggedMessagePtr(e, *ptr, subm, &size); + encode_tag(e, f->number, kUpb_WireType_StartGroup); + } while (ptr != start); + e->depth++; + return; + } + case kUpb_FieldType_Message: { + const upb_TaggedMessagePtr* start = _upb_array_constptr(arr); + const upb_TaggedMessagePtr* ptr = start + arr->size; + const upb_MiniTable* subm = subs[f->UPB_PRIVATE(submsg_index)].submsg; + if (--e->depth == 0) encode_err(e, kUpb_EncodeStatus_MaxDepthExceeded); + do { + size_t size; + ptr--; + encode_TaggedMessagePtr(e, *ptr, subm, &size); + encode_varint(e, size); + encode_tag(e, f->number, kUpb_WireType_Delimited); + } while (ptr != start); + e->depth++; + return; } - data++; - len--; } +#undef VARINT_CASE - uint16_t count = 0; - upb_SubCounts sub_counts = {0, 0}; - const char* ret = upb_MtDecoder_Parse(decoder, data, len, ext, sizeof(*ext), - &count, &sub_counts); - if (!ret || count != 1) return NULL; - - upb_MiniTableField* f = &ext->field; - - f->mode |= kUpb_LabelFlags_IsExtension; - f->offset = 0; - f->presence = 0; - - if (extendee->ext & kUpb_ExtMode_IsMessageSet) { - // Extensions of MessageSet must be messages. - if (!upb_IsSubMessage(f)) return NULL; - - // Extensions of MessageSet must be non-repeating. - if ((f->mode & kUpb_FieldMode_Mask) == kUpb_FieldMode_Array) return NULL; + if (packed) { + encode_varint(e, e->limit - e->ptr - pre_len); + encode_tag(e, f->number, kUpb_WireType_Delimited); } - - ext->extendee = extendee; - ext->sub = sub; - - return ret; -} - -static const char* upb_MtDecoder_BuildMiniTableExtension( - upb_MtDecoder* const decoder, const char* const data, const size_t len, - upb_MiniTableExtension* const ext, const upb_MiniTable* const extendee, - const upb_MiniTableSub sub) { - if (UPB_SETJMP(decoder->base.err) != 0) return NULL; - return upb_MtDecoder_DoBuildMiniTableExtension(decoder, data, len, ext, - extendee, sub); } -const char* _upb_MiniTableExtension_Init(const char* data, size_t len, - upb_MiniTableExtension* ext, - const upb_MiniTable* extendee, - upb_MiniTableSub sub, - upb_MiniTablePlatform platform, - upb_Status* status) { - upb_MtDecoder decoder = { - .base = {.status = status}, - .arena = NULL, - .table = NULL, - .platform = platform, - }; - - return upb_MtDecoder_BuildMiniTableExtension(&decoder, data, len, ext, - extendee, sub); +static void encode_mapentry(upb_encstate* e, uint32_t number, + const upb_MiniTable* layout, + const upb_MapEntry* ent) { + const upb_MiniTableField* key_field = &layout->fields[0]; + const upb_MiniTableField* val_field = &layout->fields[1]; + size_t pre_len = e->limit - e->ptr; + size_t size; + encode_scalar(e, &ent->data.v, layout->subs, val_field); + encode_scalar(e, &ent->data.k, layout->subs, key_field); + size = (e->limit - e->ptr) - pre_len; + encode_varint(e, size); + encode_tag(e, number, kUpb_WireType_Delimited); } -upb_MiniTableExtension* _upb_MiniTableExtension_Build( - const char* data, size_t len, const upb_MiniTable* extendee, - upb_MiniTableSub sub, upb_MiniTablePlatform platform, upb_Arena* arena, - upb_Status* status) { - upb_MiniTableExtension* ext = - upb_Arena_Malloc(arena, sizeof(upb_MiniTableExtension)); - if (UPB_UNLIKELY(!ext)) return NULL; +static void encode_map(upb_encstate* e, const upb_Message* msg, + const upb_MiniTableSub* subs, + const upb_MiniTableField* f) { + const upb_Map* map = *UPB_PTR_AT(msg, f->offset, const upb_Map*); + const upb_MiniTable* layout = subs[f->UPB_PRIVATE(submsg_index)].submsg; + UPB_ASSERT(layout->field_count == 2); - const char* ptr = _upb_MiniTableExtension_Init(data, len, ext, extendee, sub, - platform, status); - if (UPB_UNLIKELY(!ptr)) return NULL; + if (map == NULL) return; - return ext; + if (e->options & kUpb_EncodeOption_Deterministic) { + _upb_sortedmap sorted; + _upb_mapsorter_pushmap(&e->sorter, + layout->fields[0].UPB_PRIVATE(descriptortype), map, + &sorted); + upb_MapEntry ent; + while (_upb_sortedmap_next(&e->sorter, map, &sorted, &ent)) { + encode_mapentry(e, f->number, layout, &ent); + } + _upb_mapsorter_popmap(&e->sorter, &sorted); + } else { + intptr_t iter = UPB_STRTABLE_BEGIN; + upb_StringView key; + upb_value val; + while (upb_strtable_next2(&map->table, &key, &val, &iter)) { + upb_MapEntry ent; + _upb_map_fromkey(key, &ent.data.k, map->key_size); + _upb_map_fromvalue(val, &ent.data.v, map->val_size); + encode_mapentry(e, f->number, layout, &ent); + } + } } -upb_MiniTable* _upb_MiniTable_Build(const char* data, size_t len, - upb_MiniTablePlatform platform, - upb_Arena* arena, upb_Status* status) { - void* buf = NULL; - size_t size = 0; - upb_MiniTable* ret = upb_MiniTable_BuildWithBuf(data, len, platform, arena, - &buf, &size, status); - free(buf); - return ret; +static bool encode_shouldencode(upb_encstate* e, const upb_Message* msg, + const upb_MiniTableSub* subs, + const upb_MiniTableField* f) { + if (f->presence == 0) { + /* Proto3 presence or map/array. */ + const void* mem = UPB_PTR_AT(msg, f->offset, void); + switch (_upb_MiniTableField_GetRep(f)) { + case kUpb_FieldRep_1Byte: { + char ch; + memcpy(&ch, mem, 1); + return ch != 0; + } + case kUpb_FieldRep_4Byte: { + uint32_t u32; + memcpy(&u32, mem, 4); + return u32 != 0; + } + case kUpb_FieldRep_8Byte: { + uint64_t u64; + memcpy(&u64, mem, 8); + return u64 != 0; + } + case kUpb_FieldRep_StringView: { + const upb_StringView* str = (const upb_StringView*)mem; + return str->size != 0; + } + default: + UPB_UNREACHABLE(); + } + } else if (f->presence > 0) { + /* Proto2 presence: hasbit. */ + return _upb_hasbit_field(msg, f); + } else { + /* Field is in a oneof. */ + return _upb_getoneofcase_field(msg, f) == f->number; + } } - -// Must be last. - -bool upb_MiniTable_SetSubMessage(upb_MiniTable* table, - upb_MiniTableField* field, - const upb_MiniTable* sub) { - UPB_ASSERT((uintptr_t)table->fields <= (uintptr_t)field && - (uintptr_t)field < - (uintptr_t)(table->fields + table->field_count)); - UPB_ASSERT(sub); - - const bool sub_is_map = sub->ext & kUpb_ExtMode_IsMapEntry; - - switch (field->UPB_PRIVATE(descriptortype)) { - case kUpb_FieldType_Message: - if (sub_is_map) { - const bool table_is_map = table->ext & kUpb_ExtMode_IsMapEntry; - if (UPB_UNLIKELY(table_is_map)) return false; - - field->mode = (field->mode & ~kUpb_FieldMode_Mask) | kUpb_FieldMode_Map; - } +static void encode_field(upb_encstate* e, const upb_Message* msg, + const upb_MiniTableSub* subs, + const upb_MiniTableField* field) { + switch (upb_FieldMode_Get(field)) { + case kUpb_FieldMode_Array: + encode_array(e, msg, subs, field); break; - - case kUpb_FieldType_Group: - if (UPB_UNLIKELY(sub_is_map)) return false; + case kUpb_FieldMode_Map: + encode_map(e, msg, subs, field); + break; + case kUpb_FieldMode_Scalar: + encode_scalar(e, UPB_PTR_AT(msg, field->offset, void), subs, field); break; - default: - return false; + UPB_UNREACHABLE(); } - - upb_MiniTableSub* table_sub = - (void*)&table->subs[field->UPB_PRIVATE(submsg_index)]; - // TODO(haberman): Add this assert back once YouTube is updated to not call - // this function repeatedly. - // UPB_ASSERT(table_sub->submsg == &_kUpb_MiniTable_Empty); - table_sub->submsg = sub; - return true; } -bool upb_MiniTable_SetSubEnum(upb_MiniTable* table, upb_MiniTableField* field, - const upb_MiniTableEnum* sub) { - UPB_ASSERT((uintptr_t)table->fields <= (uintptr_t)field && - (uintptr_t)field < - (uintptr_t)(table->fields + table->field_count)); - UPB_ASSERT(sub); - - upb_MiniTableSub* table_sub = - (void*)&table->subs[field->UPB_PRIVATE(submsg_index)]; - table_sub->subenum = sub; - return true; +static void encode_msgset_item(upb_encstate* e, + const upb_Message_Extension* ext) { + size_t size; + encode_tag(e, kUpb_MsgSet_Item, kUpb_WireType_EndGroup); + encode_message(e, ext->data.ptr, ext->ext->sub.submsg, &size); + encode_varint(e, size); + encode_tag(e, kUpb_MsgSet_Message, kUpb_WireType_Delimited); + encode_varint(e, ext->ext->field.number); + encode_tag(e, kUpb_MsgSet_TypeId, kUpb_WireType_Varint); + encode_tag(e, kUpb_MsgSet_Item, kUpb_WireType_StartGroup); } -uint32_t upb_MiniTable_GetSubList(const upb_MiniTable* mt, - const upb_MiniTableField** subs) { - uint32_t msg_count = 0; - uint32_t enum_count = 0; - - for (int i = 0; i < mt->field_count; i++) { - const upb_MiniTableField* f = &mt->fields[i]; - if (upb_MiniTableField_CType(f) == kUpb_CType_Message) { - *subs = f; - ++subs; - msg_count++; - } +static void encode_ext(upb_encstate* e, const upb_Message_Extension* ext, + bool is_message_set) { + if (UPB_UNLIKELY(is_message_set)) { + encode_msgset_item(e, ext); + } else { + encode_field(e, &ext->data, &ext->ext->sub, &ext->ext->field); } +} - for (int i = 0; i < mt->field_count; i++) { - const upb_MiniTableField* f = &mt->fields[i]; - if (upb_MiniTableField_CType(f) == kUpb_CType_Enum) { - *subs = f; - ++subs; - enum_count++; +static void encode_message(upb_encstate* e, const upb_Message* msg, + const upb_MiniTable* m, size_t* size) { + size_t pre_len = e->limit - e->ptr; + + if ((e->options & kUpb_EncodeOption_CheckRequired) && m->required_count) { + uint64_t msg_head; + memcpy(&msg_head, msg, 8); + msg_head = _upb_BigEndian_Swap64(msg_head); + if (upb_MiniTable_requiredmask(m) & ~msg_head) { + encode_err(e, kUpb_EncodeStatus_MissingRequired); } } - return (msg_count << 16) | enum_count; -} + if ((e->options & kUpb_EncodeOption_SkipUnknown) == 0) { + size_t unknown_size; + const char* unknown = upb_Message_GetUnknown(msg, &unknown_size); -// The list of sub_tables and sub_enums must exactly match the number and order -// of sub-message fields and sub-enum fields given by upb_MiniTable_GetSubList() -// above. -bool upb_MiniTable_Link(upb_MiniTable* mt, const upb_MiniTable** sub_tables, - size_t sub_table_count, - const upb_MiniTableEnum** sub_enums, - size_t sub_enum_count) { - uint32_t msg_count = 0; - uint32_t enum_count = 0; + if (unknown) { + encode_bytes(e, unknown, unknown_size); + } + } - for (int i = 0; i < mt->field_count; i++) { - upb_MiniTableField* f = (upb_MiniTableField*)&mt->fields[i]; - if (upb_MiniTableField_CType(f) == kUpb_CType_Message) { - const upb_MiniTable* sub = sub_tables[msg_count++]; - if (msg_count > sub_table_count) return false; - if (sub != NULL) { - if (!upb_MiniTable_SetSubMessage(mt, f, sub)) return false; + if (m->ext != kUpb_ExtMode_NonExtendable) { + /* Encode all extensions together. Unlike C++, we do not attempt to keep + * these in field number order relative to normal fields or even to each + * other. */ + size_t ext_count; + const upb_Message_Extension* ext = _upb_Message_Getexts(msg, &ext_count); + if (ext_count) { + if (e->options & kUpb_EncodeOption_Deterministic) { + _upb_sortedmap sorted; + _upb_mapsorter_pushexts(&e->sorter, ext, ext_count, &sorted); + while (_upb_sortedmap_nextext(&e->sorter, &sorted, &ext)) { + encode_ext(e, ext, m->ext == kUpb_ExtMode_IsMessageSet); + } + _upb_mapsorter_popmap(&e->sorter, &sorted); + } else { + const upb_Message_Extension* end = ext + ext_count; + for (; ext != end; ext++) { + encode_ext(e, ext, m->ext == kUpb_ExtMode_IsMessageSet); + } } } } - for (int i = 0; i < mt->field_count; i++) { - upb_MiniTableField* f = (upb_MiniTableField*)&mt->fields[i]; - if (upb_MiniTableField_IsClosedEnum(f)) { - const upb_MiniTableEnum* sub = sub_enums[enum_count++]; - if (enum_count > sub_enum_count) return false; - if (sub != NULL) { - if (!upb_MiniTable_SetSubEnum(mt, f, sub)) return false; + if (m->field_count) { + const upb_MiniTableField* f = &m->fields[m->field_count]; + const upb_MiniTableField* first = &m->fields[0]; + while (f != first) { + f--; + if (encode_shouldencode(e, msg, m->subs, f)) { + encode_field(e, msg, m->subs, f); } } } - return true; -} - - -const struct upb_MiniTable _kUpb_MiniTable_Empty = { - .subs = NULL, - .fields = NULL, - .size = 0, - .field_count = 0, - .ext = kUpb_ExtMode_NonExtendable, - .dense_below = 0, - .table_mask = -1, - .required_count = 0, -}; - - - -// Must be last. - -#define EXTREG_KEY_SIZE (sizeof(upb_MiniTable*) + sizeof(uint32_t)) - -struct upb_ExtensionRegistry { - upb_Arena* arena; - upb_strtable exts; // Key is upb_MiniTable* concatenated with fieldnum. -}; - -static void extreg_key(char* buf, const upb_MiniTable* l, uint32_t fieldnum) { - memcpy(buf, &l, sizeof(l)); - memcpy(buf + sizeof(l), &fieldnum, sizeof(fieldnum)); + *size = (e->limit - e->ptr) - pre_len; } -upb_ExtensionRegistry* upb_ExtensionRegistry_New(upb_Arena* arena) { - upb_ExtensionRegistry* r = upb_Arena_Malloc(arena, sizeof(*r)); - if (!r) return NULL; - r->arena = arena; - if (!upb_strtable_init(&r->exts, 8, arena)) return NULL; - return r; -} +static upb_EncodeStatus upb_Encoder_Encode(upb_encstate* const encoder, + const void* const msg, + const upb_MiniTable* const l, + char** const buf, + size_t* const size) { + // Unfortunately we must continue to perform hackery here because there are + // code paths which blindly copy the returned pointer without bothering to + // check for errors until much later (b/235839510). So we still set *buf to + // NULL on error and we still set it to non-NULL on a successful empty result. + if (UPB_SETJMP(encoder->err) == 0) { + encode_message(encoder, msg, l, size); + *size = encoder->limit - encoder->ptr; + if (*size == 0) { + static char ch; + *buf = &ch; + } else { + UPB_ASSERT(encoder->ptr); + *buf = encoder->ptr; + } + } else { + UPB_ASSERT(encoder->status != kUpb_EncodeStatus_Ok); + *buf = NULL; + *size = 0; + } -UPB_API bool upb_ExtensionRegistry_Add(upb_ExtensionRegistry* r, - const upb_MiniTableExtension* e) { - char buf[EXTREG_KEY_SIZE]; - extreg_key(buf, e->extendee, e->field.number); - if (upb_strtable_lookup2(&r->exts, buf, EXTREG_KEY_SIZE, NULL)) return false; - return upb_strtable_insert(&r->exts, buf, EXTREG_KEY_SIZE, - upb_value_constptr(e), r->arena); + _upb_mapsorter_destroy(&encoder->sorter); + return encoder->status; } -bool upb_ExtensionRegistry_AddArray(upb_ExtensionRegistry* r, - const upb_MiniTableExtension** e, - size_t count) { - const upb_MiniTableExtension** start = e; - const upb_MiniTableExtension** end = UPB_PTRADD(e, count); - for (; e < end; e++) { - if (!upb_ExtensionRegistry_Add(r, *e)) goto failure; - } - return true; +upb_EncodeStatus upb_Encode(const void* msg, const upb_MiniTable* l, + int options, upb_Arena* arena, char** buf, + size_t* size) { + upb_encstate e; + unsigned depth = (unsigned)options >> 16; -failure: - // Back out the entries previously added. - for (end = e, e = start; e < end; e++) { - const upb_MiniTableExtension* ext = *e; - char buf[EXTREG_KEY_SIZE]; - extreg_key(buf, ext->extendee, ext->field.number); - upb_strtable_remove2(&r->exts, buf, EXTREG_KEY_SIZE, NULL); - } - return false; -} + e.status = kUpb_EncodeStatus_Ok; + e.arena = arena; + e.buf = NULL; + e.limit = NULL; + e.ptr = NULL; + e.depth = depth ? depth : kUpb_WireFormat_DefaultDepthLimit; + e.options = options; + _upb_mapsorter_init(&e.sorter); -const upb_MiniTableExtension* upb_ExtensionRegistry_Lookup( - const upb_ExtensionRegistry* r, const upb_MiniTable* t, uint32_t num) { - char buf[EXTREG_KEY_SIZE]; - upb_value v; - extreg_key(buf, t, num); - if (upb_strtable_lookup2(&r->exts, buf, EXTREG_KEY_SIZE, &v)) { - return upb_value_getconstptr(v); - } else { - return NULL; - } + return upb_Encoder_Encode(&e, msg, l, buf, size); } -#include - // Must be last. -const upb_MiniTableField* upb_MiniTable_FindFieldByNumber( - const upb_MiniTable* t, uint32_t number) { - const size_t i = ((size_t)number) - 1; // 0 wraps to SIZE_MAX - - // Ideal case: index into dense fields - if (i < t->dense_below) { - UPB_ASSERT(t->fields[i].number == number); - return &t->fields[i]; - } - - // Slow case: binary search - int lo = t->dense_below; - int hi = t->field_count - 1; - while (lo <= hi) { - int mid = (lo + hi) / 2; - uint32_t num = t->fields[mid].number; - if (num < number) { - lo = mid + 1; - continue; - } - if (num > number) { - hi = mid - 1; - continue; - } - return &t->fields[mid]; - } - return NULL; -} - -static bool upb_MiniTable_Is_Oneof(const upb_MiniTableField* f) { - return f->presence < 0; -} - -const upb_MiniTableField* upb_MiniTable_GetOneof(const upb_MiniTable* m, - const upb_MiniTableField* f) { - if (UPB_UNLIKELY(!upb_MiniTable_Is_Oneof(f))) { - return NULL; - } - const upb_MiniTableField* ptr = &m->fields[0]; - const upb_MiniTableField* end = &m->fields[m->field_count]; - while (++ptr < end) { - if (ptr->presence == (*f).presence) { - return ptr; +UPB_NOINLINE _upb_WireReader_ReadLongVarintRet +_upb_WireReader_ReadLongVarint(const char* ptr, uint64_t val) { + _upb_WireReader_ReadLongVarintRet ret = {NULL, 0}; + uint64_t byte; + int i; + for (i = 1; i < 10; i++) { + byte = (uint8_t)ptr[i]; + val += (byte - 1) << (i * 7); + if (!(byte & 0x80)) { + ret.ptr = ptr + i + 1; + ret.val = val; + return ret; } } - return NULL; + return ret; } -bool upb_MiniTable_NextOneofField(const upb_MiniTable* m, - const upb_MiniTableField** f) { - const upb_MiniTableField* ptr = *f; - const upb_MiniTableField* end = &m->fields[m->field_count]; - while (++ptr < end) { - if (ptr->presence == (*f)->presence) { - *f = ptr; - return true; - } +const char* _upb_WireReader_SkipGroup(const char* ptr, uint32_t tag, + int depth_limit, + upb_EpsCopyInputStream* stream) { + if (--depth_limit == 0) return NULL; + uint32_t end_group_tag = (tag & ~7ULL) | kUpb_WireType_EndGroup; + while (!upb_EpsCopyInputStream_IsDone(stream, &ptr)) { + uint32_t tag; + ptr = upb_WireReader_ReadTag(ptr, &tag); + if (!ptr) return NULL; + if (tag == end_group_tag) return ptr; + ptr = _upb_WireReader_SkipValue(ptr, tag, depth_limit, stream); + if (!ptr) return NULL; } - return false; + return ptr; } // This should #undef all macros #defined in def.inc diff --git a/ruby/ext/google/protobuf_c/ruby-upb.h b/ruby/ext/google/protobuf_c/ruby-upb.h index 41be731db1..b9f1b6b3d0 100755 --- a/ruby/ext/google/protobuf_c/ruby-upb.h +++ b/ruby/ext/google/protobuf_c/ruby-upb.h @@ -359,8 +359,8 @@ void upb_Status_VAppendErrorFormat(upb_Status* status, const char* fmt, #endif /* UPB_BASE_STATUS_H_ */ -#ifndef UPB_INTERNAL_ARRAY_INTERNAL_H_ -#define UPB_INTERNAL_ARRAY_INTERNAL_H_ +#ifndef UPB_COLLECTIONS_INTERNAL_ARRAY_H_ +#define UPB_COLLECTIONS_INTERNAL_ARRAY_H_ #include @@ -1366,7 +1366,7 @@ UPB_INLINE void _upb_array_detach(const void* msg, size_t ofs) { #endif -#endif /* UPB_INTERNAL_ARRAY_INTERNAL_H_ */ +#endif /* UPB_COLLECTIONS_INTERNAL_ARRAY_H_ */ #ifndef UPB_COLLECTIONS_MAP_H_ #define UPB_COLLECTIONS_MAP_H_ @@ -1479,8 +1479,8 @@ UPB_API upb_MessageValue upb_MapIterator_Value(const upb_Map* map, size_t iter); // EVERYTHING BELOW THIS LINE IS INTERNAL - DO NOT USE ///////////////////////// -#ifndef UPB_COLLECTIONS_MAP_INTERNAL_H_ -#define UPB_COLLECTIONS_MAP_INTERNAL_H_ +#ifndef UPB_COLLECTIONS_INTERNAL_MAP_H_ +#define UPB_COLLECTIONS_INTERNAL_MAP_H_ #ifndef UPB_HASH_STR_TABLE_H_ @@ -1916,47 +1916,18 @@ upb_Map* _upb_Map_New(upb_Arena* a, size_t key_size, size_t value_size); #endif -#endif /* UPB_COLLECTIONS_MAP_INTERNAL_H_ */ - -#ifndef UPB_BASE_LOG2_H_ -#define UPB_BASE_LOG2_H_ - -// Must be last. - -#ifdef __cplusplus -extern "C" { -#endif - -UPB_INLINE int upb_Log2Ceiling(int x) { - if (x <= 1) return 0; -#ifdef __GNUC__ - return 32 - __builtin_clz(x - 1); -#else - int lg2 = 0; - while ((1 << lg2) < x) lg2++; - return lg2; -#endif -} - -UPB_INLINE int upb_Log2CeilingSize(int x) { return 1 << upb_Log2Ceiling(x); } - -#ifdef __cplusplus -} /* extern "C" */ -#endif - - -#endif /* UPB_BASE_LOG2_H_ */ +#endif /* UPB_COLLECTIONS_INTERNAL_MAP_H_ */ // EVERYTHING BELOW THIS LINE IS INTERNAL - DO NOT USE ///////////////////////// -#ifndef UPB_COLLECTIONS_MAP_SORTER_INTERNAL_H_ -#define UPB_COLLECTIONS_MAP_SORTER_INTERNAL_H_ +#ifndef UPB_COLLECTIONS_INTERNAL_MAP_SORTER_H_ +#define UPB_COLLECTIONS_INTERNAL_MAP_SORTER_H_ #include -#ifndef UPB_MESSAGE_EXTENSION_INTERNAL_H_ -#define UPB_MESSAGE_EXTENSION_INTERNAL_H_ +#ifndef UPB_MESSAGE_INTERNAL_EXTENSION_H_ +#define UPB_MESSAGE_INTERNAL_EXTENSION_H_ // Public APIs for message operations that do not depend on the schema. @@ -2070,7 +2041,7 @@ const upb_Message_Extension* _upb_Message_Getext( #endif -#endif /* UPB_MESSAGE_EXTENSION_INTERNAL_H_ */ +#endif /* UPB_MESSAGE_INTERNAL_EXTENSION_H_ */ #ifndef UPB_MINI_TABLE_INTERNAL_MAP_ENTRY_DATA_H_ #define UPB_MINI_TABLE_INTERNAL_MAP_ENTRY_DATA_H_ @@ -2110,7 +2081,7 @@ typedef struct { // require 8-byte alignment. double d; }; - // LINT.ThenChange(//depot/google3/third_party/upb/upb/message/internal.h:internal_layout) + // LINT.ThenChange(//depot/google3/third_party/upb/upb/message/internal/message.h:internal_layout) upb_MapEntryData data; } upb_MapEntry; @@ -2184,7 +2155,36 @@ bool _upb_mapsorter_pushexts(_upb_mapsorter* s, #endif -#endif /* UPB_COLLECTIONS_MAP_SORTER_INTERNAL_H_ */ +#endif /* UPB_COLLECTIONS_INTERNAL_MAP_SORTER_H_ */ + +#ifndef UPB_BASE_LOG2_H_ +#define UPB_BASE_LOG2_H_ + +// Must be last. + +#ifdef __cplusplus +extern "C" { +#endif + +UPB_INLINE int upb_Log2Ceiling(int x) { + if (x <= 1) return 0; +#ifdef __GNUC__ + return 32 - __builtin_clz(x - 1); +#else + int lg2 = 0; + while ((1 << lg2) < x) lg2++; + return lg2; +#endif +} + +UPB_INLINE int upb_Log2CeilingSize(int x) { return 1 << upb_Log2Ceiling(x); } + +#ifdef __cplusplus +} /* extern "C" */ +#endif + + +#endif /* UPB_BASE_LOG2_H_ */ #ifndef UPB_GENERATED_CODE_SUPPORT_H_ #define UPB_GENERATED_CODE_SUPPORT_H_ @@ -2244,6 +2244,10 @@ UPB_INLINE void _upb_msg_map_set_value(void* msg, const void* val, #define UPB_MESSAGE_ACCESSORS_H_ +#ifndef UPB_MESSAGE_INTERNAL_ACCESSORS_H_ +#define UPB_MESSAGE_INTERNAL_ACCESSORS_H_ + + /* ** Our memory representation for parsing tables and messages themselves. ** Functions in this file are used by generated code and possibly reflection. @@ -2423,10 +2427,6 @@ bool _upb_Message_AddUnknown(upb_Message* msg, const char* data, size_t len, #endif /* UPB_MESSAGE_INTERNAL_H_ */ -#ifndef UPB_MESSAGE_INTERNAL_ACCESSORS_H_ -#define UPB_MESSAGE_INTERNAL_ACCESSORS_H_ - - // Must be last. #if defined(__GNUC__) && !defined(__clang__) @@ -11732,12 +11732,12 @@ UPB_INLINE const char* upb_WireReader_SkipValue( #endif // UPB_WIRE_READER_H_ -#ifndef UPB_REFLECTION_DEF_BUILDER_INTERNAL_H_ -#define UPB_REFLECTION_DEF_BUILDER_INTERNAL_H_ +#ifndef UPB_MINI_DESCRIPTOR_INTERNAL_DECODER_H_ +#define UPB_MINI_DESCRIPTOR_INTERNAL_DECODER_H_ -#ifndef UPB_REFLECTION_DEF_POOL_INTERNAL_H_ -#define UPB_REFLECTION_DEF_POOL_INTERNAL_H_ +#ifndef UPB_MINI_DESCRIPTOR_INTERNAL_BASE92_H_ +#define UPB_MINI_DESCRIPTOR_INTERNAL_BASE92_H_ // Must be last. @@ -11746,117 +11746,377 @@ UPB_INLINE const char* upb_WireReader_SkipValue( extern "C" { #endif -upb_Arena* _upb_DefPool_Arena(const upb_DefPool* s); -size_t _upb_DefPool_BytesLoaded(const upb_DefPool* s); -upb_ExtensionRegistry* _upb_DefPool_ExtReg(const upb_DefPool* s); - -bool _upb_DefPool_InsertExt(upb_DefPool* s, const upb_MiniTableExtension* ext, - const upb_FieldDef* f); -bool _upb_DefPool_InsertSym(upb_DefPool* s, upb_StringView sym, upb_value v, - upb_Status* status); -bool _upb_DefPool_LookupSym(const upb_DefPool* s, const char* sym, size_t size, - upb_value* v); - -void** _upb_DefPool_ScratchData(const upb_DefPool* s); -size_t* _upb_DefPool_ScratchSize(const upb_DefPool* s); -void _upb_DefPool_SetPlatform(upb_DefPool* s, upb_MiniTablePlatform platform); - -// For generated code only: loads a generated descriptor. -typedef struct _upb_DefPool_Init { - struct _upb_DefPool_Init** deps; // Dependencies of this file. - const upb_MiniTableFile* layout; - const char* filename; - upb_StringView descriptor; // Serialized descriptor. -} _upb_DefPool_Init; +UPB_INLINE char _upb_ToBase92(int8_t ch) { + extern const char _kUpb_ToBase92[]; + UPB_ASSERT(0 <= ch && ch < 92); + return _kUpb_ToBase92[ch]; +} -bool _upb_DefPool_LoadDefInit(upb_DefPool* s, const _upb_DefPool_Init* init); +UPB_INLINE char _upb_FromBase92(uint8_t ch) { + extern const int8_t _kUpb_FromBase92[]; + if (' ' > ch || ch > '~') return -1; + return _kUpb_FromBase92[ch - ' ']; +} -// Should only be directly called by tests. This variant lets us suppress -// the use of compiled-in tables, forcing a rebuild of the tables at runtime. -bool _upb_DefPool_LoadDefInitEx(upb_DefPool* s, const _upb_DefPool_Init* init, - bool rebuild_minitable); +UPB_INLINE const char* _upb_Base92_DecodeVarint(const char* ptr, + const char* end, char first_ch, + uint8_t min, uint8_t max, + uint32_t* out_val) { + uint32_t val = 0; + uint32_t shift = 0; + const int bits_per_char = + upb_Log2Ceiling(_upb_FromBase92(max) - _upb_FromBase92(min)); + char ch = first_ch; + while (1) { + uint32_t bits = _upb_FromBase92(ch) - _upb_FromBase92(min); + val |= bits << shift; + if (ptr == end || *ptr < min || max < *ptr) { + *out_val = val; + UPB_ASSUME(ptr != NULL); + return ptr; + } + ch = *ptr++; + shift += bits_per_char; + if (shift >= 32) return NULL; + } +} #ifdef __cplusplus } /* extern "C" */ #endif -#endif /* UPB_REFLECTION_DEF_POOL_INTERNAL_H_ */ +#endif // UPB_MINI_DESCRIPTOR_INTERNAL_BASE92_H_ // Must be last. -// We want to copy the options verbatim into the destination options proto. -// We use serialize+parse as our deep copy. -#define UPB_DEF_SET_OPTIONS(target, desc_type, options_type, proto) \ - if (UPB_DESC(desc_type##_has_options)(proto)) { \ - size_t size; \ - char* pb = UPB_DESC(options_type##_serialize)( \ - UPB_DESC(desc_type##_options)(proto), ctx->tmp_arena, &size); \ - if (!pb) _upb_DefBuilder_OomErr(ctx); \ - target = \ - UPB_DESC(options_type##_parse)(pb, size, _upb_DefBuilder_Arena(ctx)); \ - if (!target) _upb_DefBuilder_OomErr(ctx); \ - } else { \ - target = (const UPB_DESC(options_type)*)kUpbDefOptDefault; \ - } +// upb_MdDecoder: used internally for decoding MiniDescriptors for messages, +// extensions, and enums. +typedef struct { + const char* end; + upb_Status* status; + jmp_buf err; +} upb_MdDecoder; -#ifdef __cplusplus -extern "C" { -#endif +UPB_PRINTF(2, 3) +UPB_NORETURN UPB_INLINE void upb_MdDecoder_ErrorJmp(upb_MdDecoder* d, + const char* fmt, ...) { + if (d->status) { + va_list argp; + upb_Status_SetErrorMessage(d->status, "Error building mini table: "); + va_start(argp, fmt); + upb_Status_VAppendErrorFormat(d->status, fmt, argp); + va_end(argp); + } + UPB_LONGJMP(d->err, 1); +} -struct upb_DefBuilder { - upb_DefPool* symtab; - upb_FileDef* file; // File we are building. - upb_Arena* arena; // Allocate defs here. - upb_Arena* tmp_arena; // For temporary allocations. - upb_Status* status; // Record errors here. - const upb_MiniTableFile* layout; // NULL if we should build layouts. - upb_MiniTablePlatform platform; // Platform we are targeting. - int enum_count; // Count of enums built so far. - int msg_count; // Count of messages built so far. - int ext_count; // Count of extensions built so far. - jmp_buf err; // longjmp() on error. -}; +UPB_INLINE void upb_MdDecoder_CheckOutOfMemory(upb_MdDecoder* d, + const void* ptr) { + if (!ptr) upb_MdDecoder_ErrorJmp(d, "Out of memory"); +} -extern const char* kUpbDefOptDefault; +UPB_INLINE const char* upb_MdDecoder_DecodeBase92Varint( + upb_MdDecoder* d, const char* ptr, char first_ch, uint8_t min, uint8_t max, + uint32_t* out_val) { + ptr = _upb_Base92_DecodeVarint(ptr, d->end, first_ch, min, max, out_val); + if (!ptr) upb_MdDecoder_ErrorJmp(d, "Overlong varint"); + return ptr; +} -// ctx->status has already been set elsewhere so just fail/longjmp() -UPB_NORETURN void _upb_DefBuilder_FailJmp(upb_DefBuilder* ctx); -UPB_NORETURN void _upb_DefBuilder_Errf(upb_DefBuilder* ctx, const char* fmt, - ...) UPB_PRINTF(2, 3); -UPB_NORETURN void _upb_DefBuilder_OomErr(upb_DefBuilder* ctx); +#endif // UPB_MINI_DESCRIPTOR_INTERNAL_DECODER_H_ -const char* _upb_DefBuilder_MakeFullName(upb_DefBuilder* ctx, - const char* prefix, - upb_StringView name); +#ifndef UPB_MINI_DESCRIPTOR_INTERNAL_WIRE_CONSTANTS_H_ +#define UPB_MINI_DESCRIPTOR_INTERNAL_WIRE_CONSTANTS_H_ -// Given a symbol and the base symbol inside which it is defined, -// find the symbol's definition. -const void* _upb_DefBuilder_ResolveAny(upb_DefBuilder* ctx, - const char* from_name_dbg, - const char* base, upb_StringView sym, - upb_deftype_t* type); -const void* _upb_DefBuilder_Resolve(upb_DefBuilder* ctx, - const char* from_name_dbg, const char* base, - upb_StringView sym, upb_deftype_t type); +// Must be last. -char _upb_DefBuilder_ParseEscape(upb_DefBuilder* ctx, const upb_FieldDef* f, - const char** src, const char* end); +typedef enum { + kUpb_EncodedType_Double = 0, + kUpb_EncodedType_Float = 1, + kUpb_EncodedType_Fixed32 = 2, + kUpb_EncodedType_Fixed64 = 3, + kUpb_EncodedType_SFixed32 = 4, + kUpb_EncodedType_SFixed64 = 5, + kUpb_EncodedType_Int32 = 6, + kUpb_EncodedType_UInt32 = 7, + kUpb_EncodedType_SInt32 = 8, + kUpb_EncodedType_Int64 = 9, + kUpb_EncodedType_UInt64 = 10, + kUpb_EncodedType_SInt64 = 11, + kUpb_EncodedType_OpenEnum = 12, + kUpb_EncodedType_Bool = 13, + kUpb_EncodedType_Bytes = 14, + kUpb_EncodedType_String = 15, + kUpb_EncodedType_Group = 16, + kUpb_EncodedType_Message = 17, + kUpb_EncodedType_ClosedEnum = 18, -const char* _upb_DefBuilder_FullToShort(const char* fullname); + kUpb_EncodedType_RepeatedBase = 20, +} upb_EncodedType; -UPB_INLINE void* _upb_DefBuilder_Alloc(upb_DefBuilder* ctx, size_t bytes) { - if (bytes == 0) return NULL; - void* ret = upb_Arena_Malloc(ctx->arena, bytes); - if (!ret) _upb_DefBuilder_OomErr(ctx); - return ret; -} +typedef enum { + kUpb_EncodedFieldModifier_FlipPacked = 1 << 0, + kUpb_EncodedFieldModifier_IsRequired = 1 << 1, + kUpb_EncodedFieldModifier_IsProto3Singular = 1 << 2, +} upb_EncodedFieldModifier; -// Adds a symbol |v| to the symtab, which must be a def pointer previously -// packed with pack_def(). The def's pointer to upb_FileDef* must be set before -// adding, so we know which entries to remove if building this file fails. +enum { + kUpb_EncodedValue_MinField = ' ', + kUpb_EncodedValue_MaxField = 'I', + kUpb_EncodedValue_MinModifier = 'L', + kUpb_EncodedValue_MaxModifier = '[', + kUpb_EncodedValue_End = '^', + kUpb_EncodedValue_MinSkip = '_', + kUpb_EncodedValue_MaxSkip = '~', + kUpb_EncodedValue_OneofSeparator = '~', + kUpb_EncodedValue_FieldSeparator = '|', + kUpb_EncodedValue_MinOneofField = ' ', + kUpb_EncodedValue_MaxOneofField = 'b', + kUpb_EncodedValue_MaxEnumMask = 'A', +}; + +enum { + kUpb_EncodedVersion_EnumV1 = '!', + kUpb_EncodedVersion_ExtensionV1 = '#', + kUpb_EncodedVersion_MapV1 = '%', + kUpb_EncodedVersion_MessageV1 = '$', + kUpb_EncodedVersion_MessageSetV1 = '&', +}; + + +#endif // UPB_MINI_DESCRIPTOR_INTERNAL_WIRE_CONSTANTS_H_ + +#ifndef UPB_MINI_DESCRIPTOR_INTERNAL_MODIFIERS_H_ +#define UPB_MINI_DESCRIPTOR_INTERNAL_MODIFIERS_H_ + +// Must be last. + +typedef enum { + kUpb_FieldModifier_IsRepeated = 1 << 0, + kUpb_FieldModifier_IsPacked = 1 << 1, + kUpb_FieldModifier_IsClosedEnum = 1 << 2, + kUpb_FieldModifier_IsProto3Singular = 1 << 3, + kUpb_FieldModifier_IsRequired = 1 << 4, +} kUpb_FieldModifier; + +typedef enum { + kUpb_MessageModifier_ValidateUtf8 = 1 << 0, + kUpb_MessageModifier_DefaultIsPacked = 1 << 1, + kUpb_MessageModifier_IsExtendable = 1 << 2, +} kUpb_MessageModifier; + + +#endif // UPB_MINI_DESCRIPTOR_INTERNAL_MODIFIERS_H_ + +#ifndef UPB_MINI_DESCRIPTOR_INTERNAL_ENCODE_H_ +#define UPB_MINI_DESCRIPTOR_INTERNAL_ENCODE_H_ + + +// Must be last. + +// If the input buffer has at least this many bytes available, the encoder call +// is guaranteed to succeed (as long as field number order is maintained). +#define kUpb_MtDataEncoder_MinSize 16 + +typedef struct { + char* end; // Limit of the buffer passed as a parameter. + // Aliased to internal-only members in .cc. + char internal[32]; +} upb_MtDataEncoder; + +#ifdef __cplusplus +extern "C" { +#endif + +// Encodes field/oneof information for a given message. The sequence of calls +// should look like: +// +// upb_MtDataEncoder e; +// char buf[256]; +// char* ptr = buf; +// e.end = ptr + sizeof(buf); +// unit64_t msg_mod = ...; // bitwise & of kUpb_MessageModifiers or zero +// ptr = upb_MtDataEncoder_StartMessage(&e, ptr, msg_mod); +// // Fields *must* be in field number order. +// ptr = upb_MtDataEncoder_PutField(&e, ptr, ...); +// ptr = upb_MtDataEncoder_PutField(&e, ptr, ...); +// ptr = upb_MtDataEncoder_PutField(&e, ptr, ...); +// +// // If oneofs are present. Oneofs must be encoded after regular fields. +// ptr = upb_MiniTable_StartOneof(&e, ptr) +// ptr = upb_MiniTable_PutOneofField(&e, ptr, ...); +// ptr = upb_MiniTable_PutOneofField(&e, ptr, ...); +// +// ptr = upb_MiniTable_StartOneof(&e, ptr); +// ptr = upb_MiniTable_PutOneofField(&e, ptr, ...); +// ptr = upb_MiniTable_PutOneofField(&e, ptr, ...); +// +// Oneofs must be encoded after all regular fields. +char* upb_MtDataEncoder_StartMessage(upb_MtDataEncoder* e, char* ptr, + uint64_t msg_mod); +char* upb_MtDataEncoder_PutField(upb_MtDataEncoder* e, char* ptr, + upb_FieldType type, uint32_t field_num, + uint64_t field_mod); +char* upb_MtDataEncoder_StartOneof(upb_MtDataEncoder* e, char* ptr); +char* upb_MtDataEncoder_PutOneofField(upb_MtDataEncoder* e, char* ptr, + uint32_t field_num); + +// Encodes the set of values for a given enum. The values must be given in +// order (after casting to uint32_t), and repeats are not allowed. +char* upb_MtDataEncoder_StartEnum(upb_MtDataEncoder* e, char* ptr); +char* upb_MtDataEncoder_PutEnumValue(upb_MtDataEncoder* e, char* ptr, + uint32_t val); +char* upb_MtDataEncoder_EndEnum(upb_MtDataEncoder* e, char* ptr); + +// Encodes an entire mini descriptor for an extension. +char* upb_MtDataEncoder_EncodeExtension(upb_MtDataEncoder* e, char* ptr, + upb_FieldType type, uint32_t field_num, + uint64_t field_mod); + +// Encodes an entire mini descriptor for a map. +char* upb_MtDataEncoder_EncodeMap(upb_MtDataEncoder* e, char* ptr, + upb_FieldType key_type, + upb_FieldType value_type, uint64_t key_mod, + uint64_t value_mod); + +// Encodes an entire mini descriptor for a message set. +char* upb_MtDataEncoder_EncodeMessageSet(upb_MtDataEncoder* e, char* ptr); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + + +#endif /* UPB_MINI_DESCRIPTOR_INTERNAL_ENCODE_H_ */ + +#ifndef UPB_REFLECTION_DEF_BUILDER_INTERNAL_H_ +#define UPB_REFLECTION_DEF_BUILDER_INTERNAL_H_ + + +#ifndef UPB_REFLECTION_DEF_POOL_INTERNAL_H_ +#define UPB_REFLECTION_DEF_POOL_INTERNAL_H_ + + +// Must be last. + +#ifdef __cplusplus +extern "C" { +#endif + +upb_Arena* _upb_DefPool_Arena(const upb_DefPool* s); +size_t _upb_DefPool_BytesLoaded(const upb_DefPool* s); +upb_ExtensionRegistry* _upb_DefPool_ExtReg(const upb_DefPool* s); + +bool _upb_DefPool_InsertExt(upb_DefPool* s, const upb_MiniTableExtension* ext, + const upb_FieldDef* f); +bool _upb_DefPool_InsertSym(upb_DefPool* s, upb_StringView sym, upb_value v, + upb_Status* status); +bool _upb_DefPool_LookupSym(const upb_DefPool* s, const char* sym, size_t size, + upb_value* v); + +void** _upb_DefPool_ScratchData(const upb_DefPool* s); +size_t* _upb_DefPool_ScratchSize(const upb_DefPool* s); +void _upb_DefPool_SetPlatform(upb_DefPool* s, upb_MiniTablePlatform platform); + +// For generated code only: loads a generated descriptor. +typedef struct _upb_DefPool_Init { + struct _upb_DefPool_Init** deps; // Dependencies of this file. + const upb_MiniTableFile* layout; + const char* filename; + upb_StringView descriptor; // Serialized descriptor. +} _upb_DefPool_Init; + +bool _upb_DefPool_LoadDefInit(upb_DefPool* s, const _upb_DefPool_Init* init); + +// Should only be directly called by tests. This variant lets us suppress +// the use of compiled-in tables, forcing a rebuild of the tables at runtime. +bool _upb_DefPool_LoadDefInitEx(upb_DefPool* s, const _upb_DefPool_Init* init, + bool rebuild_minitable); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + + +#endif /* UPB_REFLECTION_DEF_POOL_INTERNAL_H_ */ + +// Must be last. + +// We want to copy the options verbatim into the destination options proto. +// We use serialize+parse as our deep copy. +#define UPB_DEF_SET_OPTIONS(target, desc_type, options_type, proto) \ + if (UPB_DESC(desc_type##_has_options)(proto)) { \ + size_t size; \ + char* pb = UPB_DESC(options_type##_serialize)( \ + UPB_DESC(desc_type##_options)(proto), ctx->tmp_arena, &size); \ + if (!pb) _upb_DefBuilder_OomErr(ctx); \ + target = \ + UPB_DESC(options_type##_parse)(pb, size, _upb_DefBuilder_Arena(ctx)); \ + if (!target) _upb_DefBuilder_OomErr(ctx); \ + } else { \ + target = (const UPB_DESC(options_type)*)kUpbDefOptDefault; \ + } + +#ifdef __cplusplus +extern "C" { +#endif + +struct upb_DefBuilder { + upb_DefPool* symtab; + upb_FileDef* file; // File we are building. + upb_Arena* arena; // Allocate defs here. + upb_Arena* tmp_arena; // For temporary allocations. + upb_Status* status; // Record errors here. + const upb_MiniTableFile* layout; // NULL if we should build layouts. + upb_MiniTablePlatform platform; // Platform we are targeting. + int enum_count; // Count of enums built so far. + int msg_count; // Count of messages built so far. + int ext_count; // Count of extensions built so far. + jmp_buf err; // longjmp() on error. +}; + +extern const char* kUpbDefOptDefault; + +// ctx->status has already been set elsewhere so just fail/longjmp() +UPB_NORETURN void _upb_DefBuilder_FailJmp(upb_DefBuilder* ctx); + +UPB_NORETURN void _upb_DefBuilder_Errf(upb_DefBuilder* ctx, const char* fmt, + ...) UPB_PRINTF(2, 3); +UPB_NORETURN void _upb_DefBuilder_OomErr(upb_DefBuilder* ctx); + +const char* _upb_DefBuilder_MakeFullName(upb_DefBuilder* ctx, + const char* prefix, + upb_StringView name); + +// Given a symbol and the base symbol inside which it is defined, +// find the symbol's definition. +const void* _upb_DefBuilder_ResolveAny(upb_DefBuilder* ctx, + const char* from_name_dbg, + const char* base, upb_StringView sym, + upb_deftype_t* type); + +const void* _upb_DefBuilder_Resolve(upb_DefBuilder* ctx, + const char* from_name_dbg, const char* base, + upb_StringView sym, upb_deftype_t type); + +char _upb_DefBuilder_ParseEscape(upb_DefBuilder* ctx, const upb_FieldDef* f, + const char** src, const char* end); + +const char* _upb_DefBuilder_FullToShort(const char* fullname); + +UPB_INLINE void* _upb_DefBuilder_Alloc(upb_DefBuilder* ctx, size_t bytes) { + if (bytes == 0) return NULL; + void* ret = upb_Arena_Malloc(ctx->arena, bytes); + if (!ret) _upb_DefBuilder_OomErr(ctx); + return ret; +} + +// Adds a symbol |v| to the symtab, which must be a def pointer previously +// packed with pack_def(). The def's pointer to upb_FileDef* must be set before +// adding, so we know which entries to remove if building this file fails. UPB_INLINE void _upb_DefBuilder_Add(upb_DefBuilder* ctx, const char* name, upb_value v) { upb_StringView sym = {.data = name, .size = strlen(name)}; @@ -12067,116 +12327,35 @@ upb_MessageDef* _upb_MessageDefs_New( #endif -#endif /* UPB_REFLECTION_MESSAGE_DEF_INTERNAL_H_ */ - -#ifndef UPB_REFLECTION_SERVICE_DEF_INTERNAL_H_ -#define UPB_REFLECTION_SERVICE_DEF_INTERNAL_H_ - - -// Must be last. - -#ifdef __cplusplus -extern "C" { -#endif - -upb_ServiceDef* _upb_ServiceDef_At(const upb_ServiceDef* s, int i); - -// Allocate and initialize an array of |n| service defs. -upb_ServiceDef* _upb_ServiceDefs_New( - upb_DefBuilder* ctx, int n, - const UPB_DESC(ServiceDescriptorProto) * const* protos); - -#ifdef __cplusplus -} /* extern "C" */ -#endif - - -#endif /* UPB_REFLECTION_SERVICE_DEF_INTERNAL_H_ */ - -#ifndef UPB_REFLECTION_DESC_STATE_INTERNAL_H_ -#define UPB_REFLECTION_DESC_STATE_INTERNAL_H_ - - -#ifndef UPB_MINI_DESCRIPTOR_INTERNAL_ENCODE_H_ -#define UPB_MINI_DESCRIPTOR_INTERNAL_ENCODE_H_ - - -// Must be last. +#endif /* UPB_REFLECTION_MESSAGE_DEF_INTERNAL_H_ */ -// If the input buffer has at least this many bytes available, the encoder call -// is guaranteed to succeed (as long as field number order is maintained). -#define kUpb_MtDataEncoder_MinSize 16 +#ifndef UPB_REFLECTION_SERVICE_DEF_INTERNAL_H_ +#define UPB_REFLECTION_SERVICE_DEF_INTERNAL_H_ -typedef struct { - char* end; // Limit of the buffer passed as a parameter. - // Aliased to internal-only members in .cc. - char internal[32]; -} upb_MtDataEncoder; + +// Must be last. #ifdef __cplusplus extern "C" { #endif -// Encodes field/oneof information for a given message. The sequence of calls -// should look like: -// -// upb_MtDataEncoder e; -// char buf[256]; -// char* ptr = buf; -// e.end = ptr + sizeof(buf); -// unit64_t msg_mod = ...; // bitwise & of kUpb_MessageModifiers or zero -// ptr = upb_MtDataEncoder_StartMessage(&e, ptr, msg_mod); -// // Fields *must* be in field number order. -// ptr = upb_MtDataEncoder_PutField(&e, ptr, ...); -// ptr = upb_MtDataEncoder_PutField(&e, ptr, ...); -// ptr = upb_MtDataEncoder_PutField(&e, ptr, ...); -// -// // If oneofs are present. Oneofs must be encoded after regular fields. -// ptr = upb_MiniTable_StartOneof(&e, ptr) -// ptr = upb_MiniTable_PutOneofField(&e, ptr, ...); -// ptr = upb_MiniTable_PutOneofField(&e, ptr, ...); -// -// ptr = upb_MiniTable_StartOneof(&e, ptr); -// ptr = upb_MiniTable_PutOneofField(&e, ptr, ...); -// ptr = upb_MiniTable_PutOneofField(&e, ptr, ...); -// -// Oneofs must be encoded after all regular fields. -char* upb_MtDataEncoder_StartMessage(upb_MtDataEncoder* e, char* ptr, - uint64_t msg_mod); -char* upb_MtDataEncoder_PutField(upb_MtDataEncoder* e, char* ptr, - upb_FieldType type, uint32_t field_num, - uint64_t field_mod); -char* upb_MtDataEncoder_StartOneof(upb_MtDataEncoder* e, char* ptr); -char* upb_MtDataEncoder_PutOneofField(upb_MtDataEncoder* e, char* ptr, - uint32_t field_num); - -// Encodes the set of values for a given enum. The values must be given in -// order (after casting to uint32_t), and repeats are not allowed. -char* upb_MtDataEncoder_StartEnum(upb_MtDataEncoder* e, char* ptr); -char* upb_MtDataEncoder_PutEnumValue(upb_MtDataEncoder* e, char* ptr, - uint32_t val); -char* upb_MtDataEncoder_EndEnum(upb_MtDataEncoder* e, char* ptr); - -// Encodes an entire mini descriptor for an extension. -char* upb_MtDataEncoder_EncodeExtension(upb_MtDataEncoder* e, char* ptr, - upb_FieldType type, uint32_t field_num, - uint64_t field_mod); - -// Encodes an entire mini descriptor for a map. -char* upb_MtDataEncoder_EncodeMap(upb_MtDataEncoder* e, char* ptr, - upb_FieldType key_type, - upb_FieldType value_type, uint64_t key_mod, - uint64_t value_mod); +upb_ServiceDef* _upb_ServiceDef_At(const upb_ServiceDef* s, int i); -// Encodes an entire mini descriptor for a message set. -char* upb_MtDataEncoder_EncodeMessageSet(upb_MtDataEncoder* e, char* ptr); +// Allocate and initialize an array of |n| service defs. +upb_ServiceDef* _upb_ServiceDefs_New( + upb_DefBuilder* ctx, int n, + const UPB_DESC(ServiceDescriptorProto) * const* protos); #ifdef __cplusplus } /* extern "C" */ #endif -#endif /* UPB_MINI_DESCRIPTOR_INTERNAL_ENCODE_H_ */ +#endif /* UPB_REFLECTION_SERVICE_DEF_INTERNAL_H_ */ + +#ifndef UPB_REFLECTION_DESC_STATE_INTERNAL_H_ +#define UPB_REFLECTION_DESC_STATE_INTERNAL_H_ + // Must be last. @@ -12281,28 +12460,6 @@ upb_ExtensionRange* _upb_ExtensionRanges_New( #endif /* UPB_REFLECTION_EXTENSION_RANGE_INTERNAL_H_ */ -#ifndef UPB_MINI_DESCRIPTOR_INTERNAL_MODIFIERS_H_ -#define UPB_MINI_DESCRIPTOR_INTERNAL_MODIFIERS_H_ - -// Must be last. - -typedef enum { - kUpb_FieldModifier_IsRepeated = 1 << 0, - kUpb_FieldModifier_IsPacked = 1 << 1, - kUpb_FieldModifier_IsClosedEnum = 1 << 2, - kUpb_FieldModifier_IsProto3Singular = 1 << 3, - kUpb_FieldModifier_IsRequired = 1 << 4, -} kUpb_FieldModifier; - -typedef enum { - kUpb_MessageModifier_ValidateUtf8 = 1 << 0, - kUpb_MessageModifier_DefaultIsPacked = 1 << 1, - kUpb_MessageModifier_IsExtendable = 1 << 2, -} kUpb_MessageModifier; - - -#endif // UPB_MINI_DESCRIPTOR_INTERNAL_MODIFIERS_H_ - #ifndef UPB_REFLECTION_ONEOF_DEF_INTERNAL_H_ #define UPB_REFLECTION_ONEOF_DEF_INTERNAL_H_ @@ -12556,163 +12713,6 @@ UPB_INLINE uint32_t _upb_FastDecoder_LoadTag(const char* ptr) { #endif /* UPB_WIRE_INTERNAL_DECODE_H_ */ -#ifndef UPB_MINI_DESCRIPTOR_INTERNAL_BASE92_H_ -#define UPB_MINI_DESCRIPTOR_INTERNAL_BASE92_H_ - - -// Must be last. - -#ifdef __cplusplus -extern "C" { -#endif - -UPB_INLINE char _upb_ToBase92(int8_t ch) { - extern const char _kUpb_ToBase92[]; - UPB_ASSERT(0 <= ch && ch < 92); - return _kUpb_ToBase92[ch]; -} - -UPB_INLINE char _upb_FromBase92(uint8_t ch) { - extern const int8_t _kUpb_FromBase92[]; - if (' ' > ch || ch > '~') return -1; - return _kUpb_FromBase92[ch - ' ']; -} - -UPB_INLINE const char* _upb_Base92_DecodeVarint(const char* ptr, - const char* end, char first_ch, - uint8_t min, uint8_t max, - uint32_t* out_val) { - uint32_t val = 0; - uint32_t shift = 0; - const int bits_per_char = - upb_Log2Ceiling(_upb_FromBase92(max) - _upb_FromBase92(min)); - char ch = first_ch; - while (1) { - uint32_t bits = _upb_FromBase92(ch) - _upb_FromBase92(min); - val |= bits << shift; - if (ptr == end || *ptr < min || max < *ptr) { - *out_val = val; - UPB_ASSUME(ptr != NULL); - return ptr; - } - ch = *ptr++; - shift += bits_per_char; - if (shift >= 32) return NULL; - } -} - -#ifdef __cplusplus -} /* extern "C" */ -#endif - - -#endif // UPB_MINI_DESCRIPTOR_INTERNAL_BASE92_H_ - -#ifndef UPB_MINI_DESCRIPTOR_INTERNAL_WIRE_CONSTANTS_H_ -#define UPB_MINI_DESCRIPTOR_INTERNAL_WIRE_CONSTANTS_H_ - - -// Must be last. - -typedef enum { - kUpb_EncodedType_Double = 0, - kUpb_EncodedType_Float = 1, - kUpb_EncodedType_Fixed32 = 2, - kUpb_EncodedType_Fixed64 = 3, - kUpb_EncodedType_SFixed32 = 4, - kUpb_EncodedType_SFixed64 = 5, - kUpb_EncodedType_Int32 = 6, - kUpb_EncodedType_UInt32 = 7, - kUpb_EncodedType_SInt32 = 8, - kUpb_EncodedType_Int64 = 9, - kUpb_EncodedType_UInt64 = 10, - kUpb_EncodedType_SInt64 = 11, - kUpb_EncodedType_OpenEnum = 12, - kUpb_EncodedType_Bool = 13, - kUpb_EncodedType_Bytes = 14, - kUpb_EncodedType_String = 15, - kUpb_EncodedType_Group = 16, - kUpb_EncodedType_Message = 17, - kUpb_EncodedType_ClosedEnum = 18, - - kUpb_EncodedType_RepeatedBase = 20, -} upb_EncodedType; - -typedef enum { - kUpb_EncodedFieldModifier_FlipPacked = 1 << 0, - kUpb_EncodedFieldModifier_IsRequired = 1 << 1, - kUpb_EncodedFieldModifier_IsProto3Singular = 1 << 2, -} upb_EncodedFieldModifier; - -enum { - kUpb_EncodedValue_MinField = ' ', - kUpb_EncodedValue_MaxField = 'I', - kUpb_EncodedValue_MinModifier = 'L', - kUpb_EncodedValue_MaxModifier = '[', - kUpb_EncodedValue_End = '^', - kUpb_EncodedValue_MinSkip = '_', - kUpb_EncodedValue_MaxSkip = '~', - kUpb_EncodedValue_OneofSeparator = '~', - kUpb_EncodedValue_FieldSeparator = '|', - kUpb_EncodedValue_MinOneofField = ' ', - kUpb_EncodedValue_MaxOneofField = 'b', - kUpb_EncodedValue_MaxEnumMask = 'A', -}; - -enum { - kUpb_EncodedVersion_EnumV1 = '!', - kUpb_EncodedVersion_ExtensionV1 = '#', - kUpb_EncodedVersion_MapV1 = '%', - kUpb_EncodedVersion_MessageV1 = '$', - kUpb_EncodedVersion_MessageSetV1 = '&', -}; - - -#endif // UPB_MINI_DESCRIPTOR_INTERNAL_WIRE_CONSTANTS_H_ - -#ifndef UPB_MINI_DESCRIPTOR_INTERNAL_DECODER_H_ -#define UPB_MINI_DESCRIPTOR_INTERNAL_DECODER_H_ - - -// Must be last. - -// upb_MdDecoder: used internally for decoding MiniDescriptors for messages, -// extensions, and enums. -typedef struct { - const char* end; - upb_Status* status; - jmp_buf err; -} upb_MdDecoder; - -UPB_PRINTF(2, 3) -UPB_NORETURN UPB_INLINE void upb_MdDecoder_ErrorJmp(upb_MdDecoder* d, - const char* fmt, ...) { - if (d->status) { - va_list argp; - upb_Status_SetErrorMessage(d->status, "Error building mini table: "); - va_start(argp, fmt); - upb_Status_VAppendErrorFormat(d->status, fmt, argp); - va_end(argp); - } - UPB_LONGJMP(d->err, 1); -} - -UPB_INLINE void upb_MdDecoder_CheckOutOfMemory(upb_MdDecoder* d, - const void* ptr) { - if (!ptr) upb_MdDecoder_ErrorJmp(d, "Out of memory"); -} - -UPB_INLINE const char* upb_MdDecoder_DecodeBase92Varint( - upb_MdDecoder* d, const char* ptr, char first_ch, uint8_t min, uint8_t max, - uint32_t* out_val) { - ptr = _upb_Base92_DecodeVarint(ptr, d->end, first_ch, min, max, out_val); - if (!ptr) upb_MdDecoder_ErrorJmp(d, "Overlong varint"); - return ptr; -} - - -#endif // UPB_MINI_DESCRIPTOR_INTERNAL_DECODER_H_ - // This should #undef all macros #defined in def.inc #undef UPB_SIZE