diff --git a/php/ext/google/protobuf/php-upb.c b/php/ext/google/protobuf/php-upb.c index 6ca8ac1414..fb6a75697f 100644 --- a/php/ext/google/protobuf/php-upb.c +++ b/php/ext/google/protobuf/php-upb.c @@ -7897,450 +7897,123 @@ const struct upb_MiniTable _kUpb_MiniTable_Empty = { }; -#include - // Must be last. -/* The upb core does not generally have a concept of default instances. However - * for descriptor options we make an exception since the max size is known and - * modest (<200 bytes). All types can share a default instance since it is - * initialized to zeroes. - * - * We have to allocate an extra pointer for upb's internal metadata. */ -static const char opt_default_buf[_UPB_MAXOPT_SIZE + sizeof(void*)] = {0}; -const char* kUpbDefOptDefault = &opt_default_buf[sizeof(void*)]; - -const char* _upb_DefBuilder_FullToShort(const char* fullname) { - const char* p; +struct upb_DefPool { + upb_Arena* arena; + upb_strtable syms; // full_name -> packed def ptr + upb_strtable files; // file_name -> (upb_FileDef*) + upb_inttable exts; // (upb_MiniTableExtension*) -> (upb_FieldDef*) + upb_ExtensionRegistry* extreg; + upb_MiniTablePlatform platform; + void* scratch_data; + size_t scratch_size; + size_t bytes_loaded; +}; - if (fullname == NULL) { - return NULL; - } else if ((p = strrchr(fullname, '.')) == NULL) { - /* No '.' in the name, return the full string. */ - return fullname; - } else { - /* Return one past the last '.'. */ - return p + 1; - } +void upb_DefPool_Free(upb_DefPool* s) { + upb_Arena_Free(s->arena); + upb_gfree(s->scratch_data); + upb_gfree(s); } -void _upb_DefBuilder_FailJmp(upb_DefBuilder* ctx) { UPB_LONGJMP(ctx->err, 1); } +upb_DefPool* upb_DefPool_New(void) { + upb_DefPool* s = upb_gmalloc(sizeof(*s)); + if (!s) return NULL; -void _upb_DefBuilder_Errf(upb_DefBuilder* ctx, const char* fmt, ...) { - va_list argp; - va_start(argp, fmt); - upb_Status_VSetErrorFormat(ctx->status, fmt, argp); - va_end(argp); - _upb_DefBuilder_FailJmp(ctx); -} + s->arena = upb_Arena_New(); + s->bytes_loaded = 0; -void _upb_DefBuilder_OomErr(upb_DefBuilder* ctx) { - upb_Status_SetErrorMessage(ctx->status, "out of memory"); - _upb_DefBuilder_FailJmp(ctx); -} + s->scratch_size = 240; + s->scratch_data = upb_gmalloc(s->scratch_size); + if (!s->scratch_data) goto err; -// Verify a relative identifier string. The loop is branchless for speed. -static void _upb_DefBuilder_CheckIdentNotFull(upb_DefBuilder* ctx, - upb_StringView name) { - bool good = name.size > 0; + if (!upb_strtable_init(&s->syms, 32, s->arena)) goto err; + if (!upb_strtable_init(&s->files, 4, s->arena)) goto err; + if (!upb_inttable_init(&s->exts, s->arena)) goto err; - for (size_t i = 0; i < name.size; i++) { - const char c = name.data[i]; - const char d = c | 0x20; // force lowercase - const bool is_alpha = (('a' <= d) & (d <= 'z')) | (c == '_'); - const bool is_numer = ('0' <= c) & (c <= '9') & (i != 0); + s->extreg = upb_ExtensionRegistry_New(s->arena); + if (!s->extreg) goto err; - good &= is_alpha | is_numer; - } + s->platform = kUpb_MiniTablePlatform_Native; - if (!good) _upb_DefBuilder_CheckIdentSlow(ctx, name, false); -} + return s; -const char* _upb_DefBuilder_MakeFullName(upb_DefBuilder* ctx, - const char* prefix, - upb_StringView name) { - _upb_DefBuilder_CheckIdentNotFull(ctx, name); - if (prefix) { - // ret = prefix + '.' + name; - size_t n = strlen(prefix); - char* ret = _upb_DefBuilder_Alloc(ctx, n + name.size + 2); - strcpy(ret, prefix); - ret[n] = '.'; - memcpy(&ret[n + 1], name.data, name.size); - ret[n + 1 + name.size] = '\0'; - return ret; - } else { - char* ret = upb_strdup2(name.data, name.size, ctx->arena); - if (!ret) _upb_DefBuilder_OomErr(ctx); - return ret; - } +err: + upb_DefPool_Free(s); + return NULL; } -static bool remove_component(char* base, size_t* len) { - if (*len == 0) return false; +bool _upb_DefPool_InsertExt(upb_DefPool* s, const upb_MiniTableExtension* ext, + const upb_FieldDef* f) { + return upb_inttable_insert(&s->exts, (uintptr_t)ext, upb_value_constptr(f), + s->arena); +} - for (size_t i = *len - 1; i > 0; i--) { - if (base[i] == '.') { - *len = i; - return true; - } +bool _upb_DefPool_InsertSym(upb_DefPool* s, upb_StringView sym, upb_value v, + upb_Status* status) { + // TODO: table should support an operation "tryinsert" to avoid the double + // lookup. + if (upb_strtable_lookup2(&s->syms, sym.data, sym.size, NULL)) { + upb_Status_SetErrorFormat(status, "duplicate symbol '%s'", sym.data); + return false; + } + if (!upb_strtable_insert(&s->syms, sym.data, sym.size, v, s->arena)) { + upb_Status_SetErrorMessage(status, "out of memory"); + return false; } - - *len = 0; return true; } -const void* _upb_DefBuilder_ResolveAny(upb_DefBuilder* ctx, - const char* from_name_dbg, - const char* base, upb_StringView sym, - upb_deftype_t* type) { - if (sym.size == 0) goto notfound; +static const void* _upb_DefPool_Unpack(const upb_DefPool* s, const char* sym, + size_t size, upb_deftype_t type) { upb_value v; - if (sym.data[0] == '.') { - /* Symbols starting with '.' are absolute, so we do a single lookup. - * Slice to omit the leading '.' */ - if (!_upb_DefPool_LookupSym(ctx->symtab, sym.data + 1, sym.size - 1, &v)) { - goto notfound; - } - } else { - /* Remove components from base until we find an entry or run out. */ - size_t baselen = base ? strlen(base) : 0; - char* tmp = malloc(sym.size + baselen + 1); - while (1) { - char* p = tmp; - if (baselen) { - memcpy(p, base, baselen); - p[baselen] = '.'; - p += baselen + 1; - } - memcpy(p, sym.data, sym.size); - p += sym.size; - if (_upb_DefPool_LookupSym(ctx->symtab, tmp, p - tmp, &v)) { - break; - } - if (!remove_component(tmp, &baselen)) { - free(tmp); - goto notfound; - } - } - free(tmp); - } - - *type = _upb_DefType_Type(v); - return _upb_DefType_Unpack(v, *type); + return upb_strtable_lookup2(&s->syms, sym, size, &v) + ? _upb_DefType_Unpack(v, type) + : NULL; +} -notfound: - _upb_DefBuilder_Errf(ctx, "couldn't resolve name '" UPB_STRINGVIEW_FORMAT "'", - UPB_STRINGVIEW_ARGS(sym)); +bool _upb_DefPool_LookupSym(const upb_DefPool* s, const char* sym, size_t size, + upb_value* v) { + return upb_strtable_lookup2(&s->syms, sym, size, v); } -const void* _upb_DefBuilder_Resolve(upb_DefBuilder* ctx, - const char* from_name_dbg, const char* base, - upb_StringView sym, upb_deftype_t type) { - upb_deftype_t found_type; - const void* ret = - _upb_DefBuilder_ResolveAny(ctx, from_name_dbg, base, sym, &found_type); - if (ret && found_type != type) { - _upb_DefBuilder_Errf(ctx, - "type mismatch when resolving %s: couldn't find " - "name " UPB_STRINGVIEW_FORMAT " with type=%d", - from_name_dbg, UPB_STRINGVIEW_ARGS(sym), (int)type); - } - return ret; +upb_ExtensionRegistry* _upb_DefPool_ExtReg(const upb_DefPool* s) { + return s->extreg; } -// Per ASCII this will lower-case a letter. If the result is a letter, the -// input was definitely a letter. If the output is not a letter, this may -// have transformed the character unpredictably. -static char upb_ascii_lower(char ch) { return ch | 0x20; } +void** _upb_DefPool_ScratchData(const upb_DefPool* s) { + return (void**)&s->scratch_data; +} -// isalpha() etc. from are locale-dependent, which we don't want. -static bool upb_isbetween(uint8_t c, uint8_t low, uint8_t high) { - return low <= c && c <= high; +size_t* _upb_DefPool_ScratchSize(const upb_DefPool* s) { + return (size_t*)&s->scratch_size; } -static bool upb_isletter(char c) { - char lower = upb_ascii_lower(c); - return upb_isbetween(lower, 'a', 'z') || c == '_'; +void _upb_DefPool_SetPlatform(upb_DefPool* s, upb_MiniTablePlatform platform) { + assert(upb_strtable_count(&s->files) == 0); + s->platform = platform; } -static bool upb_isalphanum(char c) { - return upb_isletter(c) || upb_isbetween(c, '0', '9'); +const upb_MessageDef* upb_DefPool_FindMessageByName(const upb_DefPool* s, + const char* sym) { + return _upb_DefPool_Unpack(s, sym, strlen(sym), UPB_DEFTYPE_MSG); } -static bool TryGetChar(const char** src, const char* end, char* ch) { - if (*src == end) return false; - *ch = **src; - *src += 1; - return true; +const upb_MessageDef* upb_DefPool_FindMessageByNameWithSize( + const upb_DefPool* s, const char* sym, size_t len) { + return _upb_DefPool_Unpack(s, sym, len, UPB_DEFTYPE_MSG); } -static int TryGetHexDigit(const char** src, const char* end) { - char ch; - if (!TryGetChar(src, end, &ch)) return -1; - if ('0' <= ch && ch <= '9') { - return ch - '0'; - } - ch = upb_ascii_lower(ch); - if ('a' <= ch && ch <= 'f') { - return ch - 'a' + 0xa; - } - *src -= 1; // Char wasn't actually a hex digit. - return -1; +const upb_EnumDef* upb_DefPool_FindEnumByName(const upb_DefPool* s, + const char* sym) { + return _upb_DefPool_Unpack(s, sym, strlen(sym), UPB_DEFTYPE_ENUM); } -static char upb_DefBuilder_ParseHexEscape(upb_DefBuilder* ctx, - const upb_FieldDef* f, - const char** src, const char* end) { - int hex_digit = TryGetHexDigit(src, end); - if (hex_digit < 0) { - _upb_DefBuilder_Errf( - ctx, "\\x must be followed by at least one hex digit (field='%s')", - upb_FieldDef_FullName(f)); - return 0; - } - unsigned int ret = hex_digit; - while ((hex_digit = TryGetHexDigit(src, end)) >= 0) { - ret = (ret << 4) | hex_digit; - } - if (ret > 0xff) { - _upb_DefBuilder_Errf(ctx, "Value of hex escape in field %s exceeds 8 bits", - upb_FieldDef_FullName(f)); - return 0; - } - return ret; -} - -static char TryGetOctalDigit(const char** src, const char* end) { - char ch; - if (!TryGetChar(src, end, &ch)) return -1; - if ('0' <= ch && ch <= '7') { - return ch - '0'; - } - *src -= 1; // Char wasn't actually an octal digit. - return -1; -} - -static char upb_DefBuilder_ParseOctalEscape(upb_DefBuilder* ctx, - const upb_FieldDef* f, - const char** src, const char* end) { - char ch = 0; - for (int i = 0; i < 3; i++) { - char digit; - if ((digit = TryGetOctalDigit(src, end)) >= 0) { - ch = (ch << 3) | digit; - } - } - return ch; -} - -char _upb_DefBuilder_ParseEscape(upb_DefBuilder* ctx, const upb_FieldDef* f, - const char** src, const char* end) { - char ch; - if (!TryGetChar(src, end, &ch)) { - _upb_DefBuilder_Errf(ctx, "unterminated escape sequence in field %s", - upb_FieldDef_FullName(f)); - return 0; - } - switch (ch) { - case 'a': - return '\a'; - case 'b': - return '\b'; - case 'f': - return '\f'; - case 'n': - return '\n'; - case 'r': - return '\r'; - case 't': - return '\t'; - case 'v': - return '\v'; - case '\\': - return '\\'; - case '\'': - return '\''; - case '\"': - return '\"'; - case '?': - return '\?'; - case 'x': - case 'X': - return upb_DefBuilder_ParseHexEscape(ctx, f, src, end); - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - *src -= 1; - return upb_DefBuilder_ParseOctalEscape(ctx, f, src, end); - } - _upb_DefBuilder_Errf(ctx, "Unknown escape sequence: \\%c", ch); -} - -void _upb_DefBuilder_CheckIdentSlow(upb_DefBuilder* ctx, upb_StringView name, - bool full) { - const char* str = name.data; - const size_t len = name.size; - bool start = true; - for (size_t i = 0; i < len; i++) { - const char c = str[i]; - if (c == '.') { - if (start || !full) { - _upb_DefBuilder_Errf( - ctx, "invalid name: unexpected '.' (" UPB_STRINGVIEW_FORMAT ")", - UPB_STRINGVIEW_ARGS(name)); - } - start = true; - } else if (start) { - if (!upb_isletter(c)) { - _upb_DefBuilder_Errf(ctx, - "invalid name: path components must start with a " - "letter (" UPB_STRINGVIEW_FORMAT ")", - UPB_STRINGVIEW_ARGS(name)); - } - start = false; - } else if (!upb_isalphanum(c)) { - _upb_DefBuilder_Errf( - ctx, - "invalid name: non-alphanumeric character (" UPB_STRINGVIEW_FORMAT - ")", - UPB_STRINGVIEW_ARGS(name)); - } - } - if (start) { - _upb_DefBuilder_Errf(ctx, - "invalid name: empty part (" UPB_STRINGVIEW_FORMAT ")", - UPB_STRINGVIEW_ARGS(name)); - } - - // We should never reach this point. - UPB_ASSERT(false); -} - - - -// Must be last. - -struct upb_DefPool { - upb_Arena* arena; - upb_strtable syms; // full_name -> packed def ptr - upb_strtable files; // file_name -> (upb_FileDef*) - upb_inttable exts; // (upb_MiniTableExtension*) -> (upb_FieldDef*) - upb_ExtensionRegistry* extreg; - upb_MiniTablePlatform platform; - void* scratch_data; - size_t scratch_size; - size_t bytes_loaded; -}; - -void upb_DefPool_Free(upb_DefPool* s) { - upb_Arena_Free(s->arena); - upb_gfree(s->scratch_data); - upb_gfree(s); -} - -upb_DefPool* upb_DefPool_New(void) { - upb_DefPool* s = upb_gmalloc(sizeof(*s)); - if (!s) return NULL; - - s->arena = upb_Arena_New(); - s->bytes_loaded = 0; - - s->scratch_size = 240; - s->scratch_data = upb_gmalloc(s->scratch_size); - if (!s->scratch_data) goto err; - - if (!upb_strtable_init(&s->syms, 32, s->arena)) goto err; - if (!upb_strtable_init(&s->files, 4, s->arena)) goto err; - if (!upb_inttable_init(&s->exts, s->arena)) goto err; - - s->extreg = upb_ExtensionRegistry_New(s->arena); - if (!s->extreg) goto err; - - s->platform = kUpb_MiniTablePlatform_Native; - - return s; - -err: - upb_DefPool_Free(s); - return NULL; -} - -bool _upb_DefPool_InsertExt(upb_DefPool* s, const upb_MiniTableExtension* ext, - const upb_FieldDef* f) { - return upb_inttable_insert(&s->exts, (uintptr_t)ext, upb_value_constptr(f), - s->arena); -} - -bool _upb_DefPool_InsertSym(upb_DefPool* s, upb_StringView sym, upb_value v, - upb_Status* status) { - // TODO: table should support an operation "tryinsert" to avoid the double - // lookup. - if (upb_strtable_lookup2(&s->syms, sym.data, sym.size, NULL)) { - upb_Status_SetErrorFormat(status, "duplicate symbol '%s'", sym.data); - return false; - } - if (!upb_strtable_insert(&s->syms, sym.data, sym.size, v, s->arena)) { - upb_Status_SetErrorMessage(status, "out of memory"); - return false; - } - return true; -} - -static const void* _upb_DefPool_Unpack(const upb_DefPool* s, const char* sym, - size_t size, upb_deftype_t type) { - upb_value v; - return upb_strtable_lookup2(&s->syms, sym, size, &v) - ? _upb_DefType_Unpack(v, type) - : NULL; -} - -bool _upb_DefPool_LookupSym(const upb_DefPool* s, const char* sym, size_t size, - upb_value* v) { - return upb_strtable_lookup2(&s->syms, sym, size, v); -} - -upb_ExtensionRegistry* _upb_DefPool_ExtReg(const upb_DefPool* s) { - return s->extreg; -} - -void** _upb_DefPool_ScratchData(const upb_DefPool* s) { - return (void**)&s->scratch_data; -} - -size_t* _upb_DefPool_ScratchSize(const upb_DefPool* s) { - return (size_t*)&s->scratch_size; -} - -void _upb_DefPool_SetPlatform(upb_DefPool* s, upb_MiniTablePlatform platform) { - assert(upb_strtable_count(&s->files) == 0); - s->platform = platform; -} - -const upb_MessageDef* upb_DefPool_FindMessageByName(const upb_DefPool* s, - const char* sym) { - return _upb_DefPool_Unpack(s, sym, strlen(sym), UPB_DEFTYPE_MSG); -} - -const upb_MessageDef* upb_DefPool_FindMessageByNameWithSize( - const upb_DefPool* s, const char* sym, size_t len) { - return _upb_DefPool_Unpack(s, sym, len, UPB_DEFTYPE_MSG); -} - -const upb_EnumDef* upb_DefPool_FindEnumByName(const upb_DefPool* s, - const char* sym) { - return _upb_DefPool_Unpack(s, sym, strlen(sym), UPB_DEFTYPE_ENUM); -} - -const upb_EnumValueDef* upb_DefPool_FindEnumByNameval(const upb_DefPool* s, - const char* sym) { - return _upb_DefPool_Unpack(s, sym, strlen(sym), UPB_DEFTYPE_ENUMVAL); +const upb_EnumValueDef* upb_DefPool_FindEnumByNameval(const upb_DefPool* s, + const char* sym) { + return _upb_DefPool_Unpack(s, sym, strlen(sym), UPB_DEFTYPE_ENUMVAL); } const upb_FileDef* upb_DefPool_FindFileByName(const upb_DefPool* s, @@ -10430,6 +10103,333 @@ void _upb_FileDef_Create(upb_DefBuilder* ctx, #include +// Must be last. + +/* The upb core does not generally have a concept of default instances. However + * for descriptor options we make an exception since the max size is known and + * modest (<200 bytes). All types can share a default instance since it is + * initialized to zeroes. + * + * We have to allocate an extra pointer for upb's internal metadata. */ +static const char opt_default_buf[_UPB_MAXOPT_SIZE + sizeof(void*)] = {0}; +const char* kUpbDefOptDefault = &opt_default_buf[sizeof(void*)]; + +const char* _upb_DefBuilder_FullToShort(const char* fullname) { + const char* p; + + if (fullname == NULL) { + return NULL; + } else if ((p = strrchr(fullname, '.')) == NULL) { + /* No '.' in the name, return the full string. */ + return fullname; + } else { + /* Return one past the last '.'. */ + return p + 1; + } +} + +void _upb_DefBuilder_FailJmp(upb_DefBuilder* ctx) { UPB_LONGJMP(ctx->err, 1); } + +void _upb_DefBuilder_Errf(upb_DefBuilder* ctx, const char* fmt, ...) { + va_list argp; + va_start(argp, fmt); + upb_Status_VSetErrorFormat(ctx->status, fmt, argp); + va_end(argp); + _upb_DefBuilder_FailJmp(ctx); +} + +void _upb_DefBuilder_OomErr(upb_DefBuilder* ctx) { + upb_Status_SetErrorMessage(ctx->status, "out of memory"); + _upb_DefBuilder_FailJmp(ctx); +} + +// Verify a relative identifier string. The loop is branchless for speed. +static void _upb_DefBuilder_CheckIdentNotFull(upb_DefBuilder* ctx, + upb_StringView name) { + bool good = name.size > 0; + + for (size_t i = 0; i < name.size; i++) { + const char c = name.data[i]; + const char d = c | 0x20; // force lowercase + const bool is_alpha = (('a' <= d) & (d <= 'z')) | (c == '_'); + const bool is_numer = ('0' <= c) & (c <= '9') & (i != 0); + + good &= is_alpha | is_numer; + } + + if (!good) _upb_DefBuilder_CheckIdentSlow(ctx, name, false); +} + +const char* _upb_DefBuilder_MakeFullName(upb_DefBuilder* ctx, + const char* prefix, + upb_StringView name) { + _upb_DefBuilder_CheckIdentNotFull(ctx, name); + if (prefix) { + // ret = prefix + '.' + name; + size_t n = strlen(prefix); + char* ret = _upb_DefBuilder_Alloc(ctx, n + name.size + 2); + strcpy(ret, prefix); + ret[n] = '.'; + memcpy(&ret[n + 1], name.data, name.size); + ret[n + 1 + name.size] = '\0'; + return ret; + } else { + char* ret = upb_strdup2(name.data, name.size, ctx->arena); + if (!ret) _upb_DefBuilder_OomErr(ctx); + return ret; + } +} + +static bool remove_component(char* base, size_t* len) { + if (*len == 0) return false; + + for (size_t i = *len - 1; i > 0; i--) { + if (base[i] == '.') { + *len = i; + return true; + } + } + + *len = 0; + return true; +} + +const void* _upb_DefBuilder_ResolveAny(upb_DefBuilder* ctx, + const char* from_name_dbg, + const char* base, upb_StringView sym, + upb_deftype_t* type) { + if (sym.size == 0) goto notfound; + upb_value v; + if (sym.data[0] == '.') { + /* Symbols starting with '.' are absolute, so we do a single lookup. + * Slice to omit the leading '.' */ + if (!_upb_DefPool_LookupSym(ctx->symtab, sym.data + 1, sym.size - 1, &v)) { + goto notfound; + } + } else { + /* Remove components from base until we find an entry or run out. */ + size_t baselen = base ? strlen(base) : 0; + char* tmp = malloc(sym.size + baselen + 1); + while (1) { + char* p = tmp; + if (baselen) { + memcpy(p, base, baselen); + p[baselen] = '.'; + p += baselen + 1; + } + memcpy(p, sym.data, sym.size); + p += sym.size; + if (_upb_DefPool_LookupSym(ctx->symtab, tmp, p - tmp, &v)) { + break; + } + if (!remove_component(tmp, &baselen)) { + free(tmp); + goto notfound; + } + } + free(tmp); + } + + *type = _upb_DefType_Type(v); + return _upb_DefType_Unpack(v, *type); + +notfound: + _upb_DefBuilder_Errf(ctx, "couldn't resolve name '" UPB_STRINGVIEW_FORMAT "'", + UPB_STRINGVIEW_ARGS(sym)); +} + +const void* _upb_DefBuilder_Resolve(upb_DefBuilder* ctx, + const char* from_name_dbg, const char* base, + upb_StringView sym, upb_deftype_t type) { + upb_deftype_t found_type; + const void* ret = + _upb_DefBuilder_ResolveAny(ctx, from_name_dbg, base, sym, &found_type); + if (ret && found_type != type) { + _upb_DefBuilder_Errf(ctx, + "type mismatch when resolving %s: couldn't find " + "name " UPB_STRINGVIEW_FORMAT " with type=%d", + from_name_dbg, UPB_STRINGVIEW_ARGS(sym), (int)type); + } + return ret; +} + +// Per ASCII this will lower-case a letter. If the result is a letter, the +// input was definitely a letter. If the output is not a letter, this may +// have transformed the character unpredictably. +static char upb_ascii_lower(char ch) { return ch | 0x20; } + +// isalpha() etc. from are locale-dependent, which we don't want. +static bool upb_isbetween(uint8_t c, uint8_t low, uint8_t high) { + return low <= c && c <= high; +} + +static bool upb_isletter(char c) { + char lower = upb_ascii_lower(c); + return upb_isbetween(lower, 'a', 'z') || c == '_'; +} + +static bool upb_isalphanum(char c) { + return upb_isletter(c) || upb_isbetween(c, '0', '9'); +} + +static bool TryGetChar(const char** src, const char* end, char* ch) { + if (*src == end) return false; + *ch = **src; + *src += 1; + return true; +} + +static int TryGetHexDigit(const char** src, const char* end) { + char ch; + if (!TryGetChar(src, end, &ch)) return -1; + if ('0' <= ch && ch <= '9') { + return ch - '0'; + } + ch = upb_ascii_lower(ch); + if ('a' <= ch && ch <= 'f') { + return ch - 'a' + 0xa; + } + *src -= 1; // Char wasn't actually a hex digit. + return -1; +} + +static char upb_DefBuilder_ParseHexEscape(upb_DefBuilder* ctx, + const upb_FieldDef* f, + const char** src, const char* end) { + int hex_digit = TryGetHexDigit(src, end); + if (hex_digit < 0) { + _upb_DefBuilder_Errf( + ctx, "\\x must be followed by at least one hex digit (field='%s')", + upb_FieldDef_FullName(f)); + return 0; + } + unsigned int ret = hex_digit; + while ((hex_digit = TryGetHexDigit(src, end)) >= 0) { + ret = (ret << 4) | hex_digit; + } + if (ret > 0xff) { + _upb_DefBuilder_Errf(ctx, "Value of hex escape in field %s exceeds 8 bits", + upb_FieldDef_FullName(f)); + return 0; + } + return ret; +} + +static char TryGetOctalDigit(const char** src, const char* end) { + char ch; + if (!TryGetChar(src, end, &ch)) return -1; + if ('0' <= ch && ch <= '7') { + return ch - '0'; + } + *src -= 1; // Char wasn't actually an octal digit. + return -1; +} + +static char upb_DefBuilder_ParseOctalEscape(upb_DefBuilder* ctx, + const upb_FieldDef* f, + const char** src, const char* end) { + char ch = 0; + for (int i = 0; i < 3; i++) { + char digit; + if ((digit = TryGetOctalDigit(src, end)) >= 0) { + ch = (ch << 3) | digit; + } + } + return ch; +} + +char _upb_DefBuilder_ParseEscape(upb_DefBuilder* ctx, const upb_FieldDef* f, + const char** src, const char* end) { + char ch; + if (!TryGetChar(src, end, &ch)) { + _upb_DefBuilder_Errf(ctx, "unterminated escape sequence in field %s", + upb_FieldDef_FullName(f)); + return 0; + } + switch (ch) { + case 'a': + return '\a'; + case 'b': + return '\b'; + case 'f': + return '\f'; + case 'n': + return '\n'; + case 'r': + return '\r'; + case 't': + return '\t'; + case 'v': + return '\v'; + case '\\': + return '\\'; + case '\'': + return '\''; + case '\"': + return '\"'; + case '?': + return '\?'; + case 'x': + case 'X': + return upb_DefBuilder_ParseHexEscape(ctx, f, src, end); + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + *src -= 1; + return upb_DefBuilder_ParseOctalEscape(ctx, f, src, end); + } + _upb_DefBuilder_Errf(ctx, "Unknown escape sequence: \\%c", ch); +} + +void _upb_DefBuilder_CheckIdentSlow(upb_DefBuilder* ctx, upb_StringView name, + bool full) { + const char* str = name.data; + const size_t len = name.size; + bool start = true; + for (size_t i = 0; i < len; i++) { + const char c = str[i]; + if (c == '.') { + if (start || !full) { + _upb_DefBuilder_Errf( + ctx, "invalid name: unexpected '.' (" UPB_STRINGVIEW_FORMAT ")", + UPB_STRINGVIEW_ARGS(name)); + } + start = true; + } else if (start) { + if (!upb_isletter(c)) { + _upb_DefBuilder_Errf(ctx, + "invalid name: path components must start with a " + "letter (" UPB_STRINGVIEW_FORMAT ")", + UPB_STRINGVIEW_ARGS(name)); + } + start = false; + } else if (!upb_isalphanum(c)) { + _upb_DefBuilder_Errf( + ctx, + "invalid name: non-alphanumeric character (" UPB_STRINGVIEW_FORMAT + ")", + UPB_STRINGVIEW_ARGS(name)); + } + } + if (start) { + _upb_DefBuilder_Errf(ctx, + "invalid name: empty part (" UPB_STRINGVIEW_FORMAT ")", + UPB_STRINGVIEW_ARGS(name)); + } + + // We should never reach this point. + UPB_ASSERT(false); +} + + +#include + + // Must be last. char* upb_strdup2(const char* s, size_t len, upb_Arena* a) { diff --git a/php/ext/google/protobuf/php-upb.h b/php/ext/google/protobuf/php-upb.h index 831114c1c8..51357fb61e 100644 --- a/php/ext/google/protobuf/php-upb.h +++ b/php/ext/google/protobuf/php-upb.h @@ -12628,29 +12628,6 @@ UPB_INLINE void _upb_DefBuilder_CheckIdentFull(upb_DefBuilder* ctx, #endif /* UPB_REFLECTION_DEF_BUILDER_INTERNAL_H_ */ -#ifndef UPB_REFLECTION_INTERNAL_STRDUP2_H_ -#define UPB_REFLECTION_INTERNAL_STRDUP2_H_ - -#include - - -// Must be last. - -#ifdef __cplusplus -extern "C" { -#endif - -// Variant that works with a length-delimited rather than NULL-delimited string, -// as supported by strtable. -char* upb_strdup2(const char* s, size_t len, upb_Arena* a); - -#ifdef __cplusplus -} /* extern "C" */ -#endif - - -#endif /* UPB_REFLECTION_INTERNAL_STRDUP2_H_ */ - #ifndef UPB_REFLECTION_ENUM_DEF_INTERNAL_H_ #define UPB_REFLECTION_ENUM_DEF_INTERNAL_H_ @@ -12921,6 +12898,29 @@ upb_EnumReservedRange* _upb_EnumReservedRanges_New( #endif /* UPB_REFLECTION_ENUM_RESERVED_RANGE_INTERNAL_H_ */ +#ifndef UPB_REFLECTION_INTERNAL_STRDUP2_H_ +#define UPB_REFLECTION_INTERNAL_STRDUP2_H_ + +#include + + +// Must be last. + +#ifdef __cplusplus +extern "C" { +#endif + +// Variant that works with a length-delimited rather than NULL-delimited string, +// as supported by strtable. +char* upb_strdup2(const char* s, size_t len, upb_Arena* a); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + + +#endif /* UPB_REFLECTION_INTERNAL_STRDUP2_H_ */ + #ifndef UPB_REFLECTION_EXTENSION_RANGE_INTERNAL_H_ #define UPB_REFLECTION_EXTENSION_RANGE_INTERNAL_H_ diff --git a/ruby/ext/google/protobuf_c/ruby-upb.c b/ruby/ext/google/protobuf_c/ruby-upb.c index ccaa3f3846..99c0970215 100644 --- a/ruby/ext/google/protobuf_c/ruby-upb.c +++ b/ruby/ext/google/protobuf_c/ruby-upb.c @@ -7415,450 +7415,123 @@ const struct upb_MiniTable _kUpb_MiniTable_Empty = { }; -#include - // Must be last. -/* The upb core does not generally have a concept of default instances. However - * for descriptor options we make an exception since the max size is known and - * modest (<200 bytes). All types can share a default instance since it is - * initialized to zeroes. - * - * We have to allocate an extra pointer for upb's internal metadata. */ -static const char opt_default_buf[_UPB_MAXOPT_SIZE + sizeof(void*)] = {0}; -const char* kUpbDefOptDefault = &opt_default_buf[sizeof(void*)]; - -const char* _upb_DefBuilder_FullToShort(const char* fullname) { - const char* p; +struct upb_DefPool { + upb_Arena* arena; + upb_strtable syms; // full_name -> packed def ptr + upb_strtable files; // file_name -> (upb_FileDef*) + upb_inttable exts; // (upb_MiniTableExtension*) -> (upb_FieldDef*) + upb_ExtensionRegistry* extreg; + upb_MiniTablePlatform platform; + void* scratch_data; + size_t scratch_size; + size_t bytes_loaded; +}; - if (fullname == NULL) { - return NULL; - } else if ((p = strrchr(fullname, '.')) == NULL) { - /* No '.' in the name, return the full string. */ - return fullname; - } else { - /* Return one past the last '.'. */ - return p + 1; - } +void upb_DefPool_Free(upb_DefPool* s) { + upb_Arena_Free(s->arena); + upb_gfree(s->scratch_data); + upb_gfree(s); } -void _upb_DefBuilder_FailJmp(upb_DefBuilder* ctx) { UPB_LONGJMP(ctx->err, 1); } +upb_DefPool* upb_DefPool_New(void) { + upb_DefPool* s = upb_gmalloc(sizeof(*s)); + if (!s) return NULL; -void _upb_DefBuilder_Errf(upb_DefBuilder* ctx, const char* fmt, ...) { - va_list argp; - va_start(argp, fmt); - upb_Status_VSetErrorFormat(ctx->status, fmt, argp); - va_end(argp); - _upb_DefBuilder_FailJmp(ctx); -} + s->arena = upb_Arena_New(); + s->bytes_loaded = 0; -void _upb_DefBuilder_OomErr(upb_DefBuilder* ctx) { - upb_Status_SetErrorMessage(ctx->status, "out of memory"); - _upb_DefBuilder_FailJmp(ctx); -} + s->scratch_size = 240; + s->scratch_data = upb_gmalloc(s->scratch_size); + if (!s->scratch_data) goto err; -// Verify a relative identifier string. The loop is branchless for speed. -static void _upb_DefBuilder_CheckIdentNotFull(upb_DefBuilder* ctx, - upb_StringView name) { - bool good = name.size > 0; + if (!upb_strtable_init(&s->syms, 32, s->arena)) goto err; + if (!upb_strtable_init(&s->files, 4, s->arena)) goto err; + if (!upb_inttable_init(&s->exts, s->arena)) goto err; - for (size_t i = 0; i < name.size; i++) { - const char c = name.data[i]; - const char d = c | 0x20; // force lowercase - const bool is_alpha = (('a' <= d) & (d <= 'z')) | (c == '_'); - const bool is_numer = ('0' <= c) & (c <= '9') & (i != 0); + s->extreg = upb_ExtensionRegistry_New(s->arena); + if (!s->extreg) goto err; - good &= is_alpha | is_numer; - } + s->platform = kUpb_MiniTablePlatform_Native; - if (!good) _upb_DefBuilder_CheckIdentSlow(ctx, name, false); -} + return s; -const char* _upb_DefBuilder_MakeFullName(upb_DefBuilder* ctx, - const char* prefix, - upb_StringView name) { - _upb_DefBuilder_CheckIdentNotFull(ctx, name); - if (prefix) { - // ret = prefix + '.' + name; - size_t n = strlen(prefix); - char* ret = _upb_DefBuilder_Alloc(ctx, n + name.size + 2); - strcpy(ret, prefix); - ret[n] = '.'; - memcpy(&ret[n + 1], name.data, name.size); - ret[n + 1 + name.size] = '\0'; - return ret; - } else { - char* ret = upb_strdup2(name.data, name.size, ctx->arena); - if (!ret) _upb_DefBuilder_OomErr(ctx); - return ret; - } +err: + upb_DefPool_Free(s); + return NULL; } -static bool remove_component(char* base, size_t* len) { - if (*len == 0) return false; +bool _upb_DefPool_InsertExt(upb_DefPool* s, const upb_MiniTableExtension* ext, + const upb_FieldDef* f) { + return upb_inttable_insert(&s->exts, (uintptr_t)ext, upb_value_constptr(f), + s->arena); +} - for (size_t i = *len - 1; i > 0; i--) { - if (base[i] == '.') { - *len = i; - return true; - } +bool _upb_DefPool_InsertSym(upb_DefPool* s, upb_StringView sym, upb_value v, + upb_Status* status) { + // TODO: table should support an operation "tryinsert" to avoid the double + // lookup. + if (upb_strtable_lookup2(&s->syms, sym.data, sym.size, NULL)) { + upb_Status_SetErrorFormat(status, "duplicate symbol '%s'", sym.data); + return false; + } + if (!upb_strtable_insert(&s->syms, sym.data, sym.size, v, s->arena)) { + upb_Status_SetErrorMessage(status, "out of memory"); + return false; } - - *len = 0; return true; } -const void* _upb_DefBuilder_ResolveAny(upb_DefBuilder* ctx, - const char* from_name_dbg, - const char* base, upb_StringView sym, - upb_deftype_t* type) { - if (sym.size == 0) goto notfound; +static const void* _upb_DefPool_Unpack(const upb_DefPool* s, const char* sym, + size_t size, upb_deftype_t type) { upb_value v; - if (sym.data[0] == '.') { - /* Symbols starting with '.' are absolute, so we do a single lookup. - * Slice to omit the leading '.' */ - if (!_upb_DefPool_LookupSym(ctx->symtab, sym.data + 1, sym.size - 1, &v)) { - goto notfound; - } - } else { - /* Remove components from base until we find an entry or run out. */ - size_t baselen = base ? strlen(base) : 0; - char* tmp = malloc(sym.size + baselen + 1); - while (1) { - char* p = tmp; - if (baselen) { - memcpy(p, base, baselen); - p[baselen] = '.'; - p += baselen + 1; - } - memcpy(p, sym.data, sym.size); - p += sym.size; - if (_upb_DefPool_LookupSym(ctx->symtab, tmp, p - tmp, &v)) { - break; - } - if (!remove_component(tmp, &baselen)) { - free(tmp); - goto notfound; - } - } - free(tmp); - } - - *type = _upb_DefType_Type(v); - return _upb_DefType_Unpack(v, *type); + return upb_strtable_lookup2(&s->syms, sym, size, &v) + ? _upb_DefType_Unpack(v, type) + : NULL; +} -notfound: - _upb_DefBuilder_Errf(ctx, "couldn't resolve name '" UPB_STRINGVIEW_FORMAT "'", - UPB_STRINGVIEW_ARGS(sym)); +bool _upb_DefPool_LookupSym(const upb_DefPool* s, const char* sym, size_t size, + upb_value* v) { + return upb_strtable_lookup2(&s->syms, sym, size, v); } -const void* _upb_DefBuilder_Resolve(upb_DefBuilder* ctx, - const char* from_name_dbg, const char* base, - upb_StringView sym, upb_deftype_t type) { - upb_deftype_t found_type; - const void* ret = - _upb_DefBuilder_ResolveAny(ctx, from_name_dbg, base, sym, &found_type); - if (ret && found_type != type) { - _upb_DefBuilder_Errf(ctx, - "type mismatch when resolving %s: couldn't find " - "name " UPB_STRINGVIEW_FORMAT " with type=%d", - from_name_dbg, UPB_STRINGVIEW_ARGS(sym), (int)type); - } - return ret; +upb_ExtensionRegistry* _upb_DefPool_ExtReg(const upb_DefPool* s) { + return s->extreg; } -// Per ASCII this will lower-case a letter. If the result is a letter, the -// input was definitely a letter. If the output is not a letter, this may -// have transformed the character unpredictably. -static char upb_ascii_lower(char ch) { return ch | 0x20; } +void** _upb_DefPool_ScratchData(const upb_DefPool* s) { + return (void**)&s->scratch_data; +} -// isalpha() etc. from are locale-dependent, which we don't want. -static bool upb_isbetween(uint8_t c, uint8_t low, uint8_t high) { - return low <= c && c <= high; +size_t* _upb_DefPool_ScratchSize(const upb_DefPool* s) { + return (size_t*)&s->scratch_size; } -static bool upb_isletter(char c) { - char lower = upb_ascii_lower(c); - return upb_isbetween(lower, 'a', 'z') || c == '_'; +void _upb_DefPool_SetPlatform(upb_DefPool* s, upb_MiniTablePlatform platform) { + assert(upb_strtable_count(&s->files) == 0); + s->platform = platform; } -static bool upb_isalphanum(char c) { - return upb_isletter(c) || upb_isbetween(c, '0', '9'); +const upb_MessageDef* upb_DefPool_FindMessageByName(const upb_DefPool* s, + const char* sym) { + return _upb_DefPool_Unpack(s, sym, strlen(sym), UPB_DEFTYPE_MSG); } -static bool TryGetChar(const char** src, const char* end, char* ch) { - if (*src == end) return false; - *ch = **src; - *src += 1; - return true; +const upb_MessageDef* upb_DefPool_FindMessageByNameWithSize( + const upb_DefPool* s, const char* sym, size_t len) { + return _upb_DefPool_Unpack(s, sym, len, UPB_DEFTYPE_MSG); } -static int TryGetHexDigit(const char** src, const char* end) { - char ch; - if (!TryGetChar(src, end, &ch)) return -1; - if ('0' <= ch && ch <= '9') { - return ch - '0'; - } - ch = upb_ascii_lower(ch); - if ('a' <= ch && ch <= 'f') { - return ch - 'a' + 0xa; - } - *src -= 1; // Char wasn't actually a hex digit. - return -1; +const upb_EnumDef* upb_DefPool_FindEnumByName(const upb_DefPool* s, + const char* sym) { + return _upb_DefPool_Unpack(s, sym, strlen(sym), UPB_DEFTYPE_ENUM); } -static char upb_DefBuilder_ParseHexEscape(upb_DefBuilder* ctx, - const upb_FieldDef* f, - const char** src, const char* end) { - int hex_digit = TryGetHexDigit(src, end); - if (hex_digit < 0) { - _upb_DefBuilder_Errf( - ctx, "\\x must be followed by at least one hex digit (field='%s')", - upb_FieldDef_FullName(f)); - return 0; - } - unsigned int ret = hex_digit; - while ((hex_digit = TryGetHexDigit(src, end)) >= 0) { - ret = (ret << 4) | hex_digit; - } - if (ret > 0xff) { - _upb_DefBuilder_Errf(ctx, "Value of hex escape in field %s exceeds 8 bits", - upb_FieldDef_FullName(f)); - return 0; - } - return ret; -} - -static char TryGetOctalDigit(const char** src, const char* end) { - char ch; - if (!TryGetChar(src, end, &ch)) return -1; - if ('0' <= ch && ch <= '7') { - return ch - '0'; - } - *src -= 1; // Char wasn't actually an octal digit. - return -1; -} - -static char upb_DefBuilder_ParseOctalEscape(upb_DefBuilder* ctx, - const upb_FieldDef* f, - const char** src, const char* end) { - char ch = 0; - for (int i = 0; i < 3; i++) { - char digit; - if ((digit = TryGetOctalDigit(src, end)) >= 0) { - ch = (ch << 3) | digit; - } - } - return ch; -} - -char _upb_DefBuilder_ParseEscape(upb_DefBuilder* ctx, const upb_FieldDef* f, - const char** src, const char* end) { - char ch; - if (!TryGetChar(src, end, &ch)) { - _upb_DefBuilder_Errf(ctx, "unterminated escape sequence in field %s", - upb_FieldDef_FullName(f)); - return 0; - } - switch (ch) { - case 'a': - return '\a'; - case 'b': - return '\b'; - case 'f': - return '\f'; - case 'n': - return '\n'; - case 'r': - return '\r'; - case 't': - return '\t'; - case 'v': - return '\v'; - case '\\': - return '\\'; - case '\'': - return '\''; - case '\"': - return '\"'; - case '?': - return '\?'; - case 'x': - case 'X': - return upb_DefBuilder_ParseHexEscape(ctx, f, src, end); - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - *src -= 1; - return upb_DefBuilder_ParseOctalEscape(ctx, f, src, end); - } - _upb_DefBuilder_Errf(ctx, "Unknown escape sequence: \\%c", ch); -} - -void _upb_DefBuilder_CheckIdentSlow(upb_DefBuilder* ctx, upb_StringView name, - bool full) { - const char* str = name.data; - const size_t len = name.size; - bool start = true; - for (size_t i = 0; i < len; i++) { - const char c = str[i]; - if (c == '.') { - if (start || !full) { - _upb_DefBuilder_Errf( - ctx, "invalid name: unexpected '.' (" UPB_STRINGVIEW_FORMAT ")", - UPB_STRINGVIEW_ARGS(name)); - } - start = true; - } else if (start) { - if (!upb_isletter(c)) { - _upb_DefBuilder_Errf(ctx, - "invalid name: path components must start with a " - "letter (" UPB_STRINGVIEW_FORMAT ")", - UPB_STRINGVIEW_ARGS(name)); - } - start = false; - } else if (!upb_isalphanum(c)) { - _upb_DefBuilder_Errf( - ctx, - "invalid name: non-alphanumeric character (" UPB_STRINGVIEW_FORMAT - ")", - UPB_STRINGVIEW_ARGS(name)); - } - } - if (start) { - _upb_DefBuilder_Errf(ctx, - "invalid name: empty part (" UPB_STRINGVIEW_FORMAT ")", - UPB_STRINGVIEW_ARGS(name)); - } - - // We should never reach this point. - UPB_ASSERT(false); -} - - - -// Must be last. - -struct upb_DefPool { - upb_Arena* arena; - upb_strtable syms; // full_name -> packed def ptr - upb_strtable files; // file_name -> (upb_FileDef*) - upb_inttable exts; // (upb_MiniTableExtension*) -> (upb_FieldDef*) - upb_ExtensionRegistry* extreg; - upb_MiniTablePlatform platform; - void* scratch_data; - size_t scratch_size; - size_t bytes_loaded; -}; - -void upb_DefPool_Free(upb_DefPool* s) { - upb_Arena_Free(s->arena); - upb_gfree(s->scratch_data); - upb_gfree(s); -} - -upb_DefPool* upb_DefPool_New(void) { - upb_DefPool* s = upb_gmalloc(sizeof(*s)); - if (!s) return NULL; - - s->arena = upb_Arena_New(); - s->bytes_loaded = 0; - - s->scratch_size = 240; - s->scratch_data = upb_gmalloc(s->scratch_size); - if (!s->scratch_data) goto err; - - if (!upb_strtable_init(&s->syms, 32, s->arena)) goto err; - if (!upb_strtable_init(&s->files, 4, s->arena)) goto err; - if (!upb_inttable_init(&s->exts, s->arena)) goto err; - - s->extreg = upb_ExtensionRegistry_New(s->arena); - if (!s->extreg) goto err; - - s->platform = kUpb_MiniTablePlatform_Native; - - return s; - -err: - upb_DefPool_Free(s); - return NULL; -} - -bool _upb_DefPool_InsertExt(upb_DefPool* s, const upb_MiniTableExtension* ext, - const upb_FieldDef* f) { - return upb_inttable_insert(&s->exts, (uintptr_t)ext, upb_value_constptr(f), - s->arena); -} - -bool _upb_DefPool_InsertSym(upb_DefPool* s, upb_StringView sym, upb_value v, - upb_Status* status) { - // TODO: table should support an operation "tryinsert" to avoid the double - // lookup. - if (upb_strtable_lookup2(&s->syms, sym.data, sym.size, NULL)) { - upb_Status_SetErrorFormat(status, "duplicate symbol '%s'", sym.data); - return false; - } - if (!upb_strtable_insert(&s->syms, sym.data, sym.size, v, s->arena)) { - upb_Status_SetErrorMessage(status, "out of memory"); - return false; - } - return true; -} - -static const void* _upb_DefPool_Unpack(const upb_DefPool* s, const char* sym, - size_t size, upb_deftype_t type) { - upb_value v; - return upb_strtable_lookup2(&s->syms, sym, size, &v) - ? _upb_DefType_Unpack(v, type) - : NULL; -} - -bool _upb_DefPool_LookupSym(const upb_DefPool* s, const char* sym, size_t size, - upb_value* v) { - return upb_strtable_lookup2(&s->syms, sym, size, v); -} - -upb_ExtensionRegistry* _upb_DefPool_ExtReg(const upb_DefPool* s) { - return s->extreg; -} - -void** _upb_DefPool_ScratchData(const upb_DefPool* s) { - return (void**)&s->scratch_data; -} - -size_t* _upb_DefPool_ScratchSize(const upb_DefPool* s) { - return (size_t*)&s->scratch_size; -} - -void _upb_DefPool_SetPlatform(upb_DefPool* s, upb_MiniTablePlatform platform) { - assert(upb_strtable_count(&s->files) == 0); - s->platform = platform; -} - -const upb_MessageDef* upb_DefPool_FindMessageByName(const upb_DefPool* s, - const char* sym) { - return _upb_DefPool_Unpack(s, sym, strlen(sym), UPB_DEFTYPE_MSG); -} - -const upb_MessageDef* upb_DefPool_FindMessageByNameWithSize( - const upb_DefPool* s, const char* sym, size_t len) { - return _upb_DefPool_Unpack(s, sym, len, UPB_DEFTYPE_MSG); -} - -const upb_EnumDef* upb_DefPool_FindEnumByName(const upb_DefPool* s, - const char* sym) { - return _upb_DefPool_Unpack(s, sym, strlen(sym), UPB_DEFTYPE_ENUM); -} - -const upb_EnumValueDef* upb_DefPool_FindEnumByNameval(const upb_DefPool* s, - const char* sym) { - return _upb_DefPool_Unpack(s, sym, strlen(sym), UPB_DEFTYPE_ENUMVAL); +const upb_EnumValueDef* upb_DefPool_FindEnumByNameval(const upb_DefPool* s, + const char* sym) { + return _upb_DefPool_Unpack(s, sym, strlen(sym), UPB_DEFTYPE_ENUMVAL); } const upb_FileDef* upb_DefPool_FindFileByName(const upb_DefPool* s, @@ -9948,6 +9621,333 @@ void _upb_FileDef_Create(upb_DefBuilder* ctx, #include +// Must be last. + +/* The upb core does not generally have a concept of default instances. However + * for descriptor options we make an exception since the max size is known and + * modest (<200 bytes). All types can share a default instance since it is + * initialized to zeroes. + * + * We have to allocate an extra pointer for upb's internal metadata. */ +static const char opt_default_buf[_UPB_MAXOPT_SIZE + sizeof(void*)] = {0}; +const char* kUpbDefOptDefault = &opt_default_buf[sizeof(void*)]; + +const char* _upb_DefBuilder_FullToShort(const char* fullname) { + const char* p; + + if (fullname == NULL) { + return NULL; + } else if ((p = strrchr(fullname, '.')) == NULL) { + /* No '.' in the name, return the full string. */ + return fullname; + } else { + /* Return one past the last '.'. */ + return p + 1; + } +} + +void _upb_DefBuilder_FailJmp(upb_DefBuilder* ctx) { UPB_LONGJMP(ctx->err, 1); } + +void _upb_DefBuilder_Errf(upb_DefBuilder* ctx, const char* fmt, ...) { + va_list argp; + va_start(argp, fmt); + upb_Status_VSetErrorFormat(ctx->status, fmt, argp); + va_end(argp); + _upb_DefBuilder_FailJmp(ctx); +} + +void _upb_DefBuilder_OomErr(upb_DefBuilder* ctx) { + upb_Status_SetErrorMessage(ctx->status, "out of memory"); + _upb_DefBuilder_FailJmp(ctx); +} + +// Verify a relative identifier string. The loop is branchless for speed. +static void _upb_DefBuilder_CheckIdentNotFull(upb_DefBuilder* ctx, + upb_StringView name) { + bool good = name.size > 0; + + for (size_t i = 0; i < name.size; i++) { + const char c = name.data[i]; + const char d = c | 0x20; // force lowercase + const bool is_alpha = (('a' <= d) & (d <= 'z')) | (c == '_'); + const bool is_numer = ('0' <= c) & (c <= '9') & (i != 0); + + good &= is_alpha | is_numer; + } + + if (!good) _upb_DefBuilder_CheckIdentSlow(ctx, name, false); +} + +const char* _upb_DefBuilder_MakeFullName(upb_DefBuilder* ctx, + const char* prefix, + upb_StringView name) { + _upb_DefBuilder_CheckIdentNotFull(ctx, name); + if (prefix) { + // ret = prefix + '.' + name; + size_t n = strlen(prefix); + char* ret = _upb_DefBuilder_Alloc(ctx, n + name.size + 2); + strcpy(ret, prefix); + ret[n] = '.'; + memcpy(&ret[n + 1], name.data, name.size); + ret[n + 1 + name.size] = '\0'; + return ret; + } else { + char* ret = upb_strdup2(name.data, name.size, ctx->arena); + if (!ret) _upb_DefBuilder_OomErr(ctx); + return ret; + } +} + +static bool remove_component(char* base, size_t* len) { + if (*len == 0) return false; + + for (size_t i = *len - 1; i > 0; i--) { + if (base[i] == '.') { + *len = i; + return true; + } + } + + *len = 0; + return true; +} + +const void* _upb_DefBuilder_ResolveAny(upb_DefBuilder* ctx, + const char* from_name_dbg, + const char* base, upb_StringView sym, + upb_deftype_t* type) { + if (sym.size == 0) goto notfound; + upb_value v; + if (sym.data[0] == '.') { + /* Symbols starting with '.' are absolute, so we do a single lookup. + * Slice to omit the leading '.' */ + if (!_upb_DefPool_LookupSym(ctx->symtab, sym.data + 1, sym.size - 1, &v)) { + goto notfound; + } + } else { + /* Remove components from base until we find an entry or run out. */ + size_t baselen = base ? strlen(base) : 0; + char* tmp = malloc(sym.size + baselen + 1); + while (1) { + char* p = tmp; + if (baselen) { + memcpy(p, base, baselen); + p[baselen] = '.'; + p += baselen + 1; + } + memcpy(p, sym.data, sym.size); + p += sym.size; + if (_upb_DefPool_LookupSym(ctx->symtab, tmp, p - tmp, &v)) { + break; + } + if (!remove_component(tmp, &baselen)) { + free(tmp); + goto notfound; + } + } + free(tmp); + } + + *type = _upb_DefType_Type(v); + return _upb_DefType_Unpack(v, *type); + +notfound: + _upb_DefBuilder_Errf(ctx, "couldn't resolve name '" UPB_STRINGVIEW_FORMAT "'", + UPB_STRINGVIEW_ARGS(sym)); +} + +const void* _upb_DefBuilder_Resolve(upb_DefBuilder* ctx, + const char* from_name_dbg, const char* base, + upb_StringView sym, upb_deftype_t type) { + upb_deftype_t found_type; + const void* ret = + _upb_DefBuilder_ResolveAny(ctx, from_name_dbg, base, sym, &found_type); + if (ret && found_type != type) { + _upb_DefBuilder_Errf(ctx, + "type mismatch when resolving %s: couldn't find " + "name " UPB_STRINGVIEW_FORMAT " with type=%d", + from_name_dbg, UPB_STRINGVIEW_ARGS(sym), (int)type); + } + return ret; +} + +// Per ASCII this will lower-case a letter. If the result is a letter, the +// input was definitely a letter. If the output is not a letter, this may +// have transformed the character unpredictably. +static char upb_ascii_lower(char ch) { return ch | 0x20; } + +// isalpha() etc. from are locale-dependent, which we don't want. +static bool upb_isbetween(uint8_t c, uint8_t low, uint8_t high) { + return low <= c && c <= high; +} + +static bool upb_isletter(char c) { + char lower = upb_ascii_lower(c); + return upb_isbetween(lower, 'a', 'z') || c == '_'; +} + +static bool upb_isalphanum(char c) { + return upb_isletter(c) || upb_isbetween(c, '0', '9'); +} + +static bool TryGetChar(const char** src, const char* end, char* ch) { + if (*src == end) return false; + *ch = **src; + *src += 1; + return true; +} + +static int TryGetHexDigit(const char** src, const char* end) { + char ch; + if (!TryGetChar(src, end, &ch)) return -1; + if ('0' <= ch && ch <= '9') { + return ch - '0'; + } + ch = upb_ascii_lower(ch); + if ('a' <= ch && ch <= 'f') { + return ch - 'a' + 0xa; + } + *src -= 1; // Char wasn't actually a hex digit. + return -1; +} + +static char upb_DefBuilder_ParseHexEscape(upb_DefBuilder* ctx, + const upb_FieldDef* f, + const char** src, const char* end) { + int hex_digit = TryGetHexDigit(src, end); + if (hex_digit < 0) { + _upb_DefBuilder_Errf( + ctx, "\\x must be followed by at least one hex digit (field='%s')", + upb_FieldDef_FullName(f)); + return 0; + } + unsigned int ret = hex_digit; + while ((hex_digit = TryGetHexDigit(src, end)) >= 0) { + ret = (ret << 4) | hex_digit; + } + if (ret > 0xff) { + _upb_DefBuilder_Errf(ctx, "Value of hex escape in field %s exceeds 8 bits", + upb_FieldDef_FullName(f)); + return 0; + } + return ret; +} + +static char TryGetOctalDigit(const char** src, const char* end) { + char ch; + if (!TryGetChar(src, end, &ch)) return -1; + if ('0' <= ch && ch <= '7') { + return ch - '0'; + } + *src -= 1; // Char wasn't actually an octal digit. + return -1; +} + +static char upb_DefBuilder_ParseOctalEscape(upb_DefBuilder* ctx, + const upb_FieldDef* f, + const char** src, const char* end) { + char ch = 0; + for (int i = 0; i < 3; i++) { + char digit; + if ((digit = TryGetOctalDigit(src, end)) >= 0) { + ch = (ch << 3) | digit; + } + } + return ch; +} + +char _upb_DefBuilder_ParseEscape(upb_DefBuilder* ctx, const upb_FieldDef* f, + const char** src, const char* end) { + char ch; + if (!TryGetChar(src, end, &ch)) { + _upb_DefBuilder_Errf(ctx, "unterminated escape sequence in field %s", + upb_FieldDef_FullName(f)); + return 0; + } + switch (ch) { + case 'a': + return '\a'; + case 'b': + return '\b'; + case 'f': + return '\f'; + case 'n': + return '\n'; + case 'r': + return '\r'; + case 't': + return '\t'; + case 'v': + return '\v'; + case '\\': + return '\\'; + case '\'': + return '\''; + case '\"': + return '\"'; + case '?': + return '\?'; + case 'x': + case 'X': + return upb_DefBuilder_ParseHexEscape(ctx, f, src, end); + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + *src -= 1; + return upb_DefBuilder_ParseOctalEscape(ctx, f, src, end); + } + _upb_DefBuilder_Errf(ctx, "Unknown escape sequence: \\%c", ch); +} + +void _upb_DefBuilder_CheckIdentSlow(upb_DefBuilder* ctx, upb_StringView name, + bool full) { + const char* str = name.data; + const size_t len = name.size; + bool start = true; + for (size_t i = 0; i < len; i++) { + const char c = str[i]; + if (c == '.') { + if (start || !full) { + _upb_DefBuilder_Errf( + ctx, "invalid name: unexpected '.' (" UPB_STRINGVIEW_FORMAT ")", + UPB_STRINGVIEW_ARGS(name)); + } + start = true; + } else if (start) { + if (!upb_isletter(c)) { + _upb_DefBuilder_Errf(ctx, + "invalid name: path components must start with a " + "letter (" UPB_STRINGVIEW_FORMAT ")", + UPB_STRINGVIEW_ARGS(name)); + } + start = false; + } else if (!upb_isalphanum(c)) { + _upb_DefBuilder_Errf( + ctx, + "invalid name: non-alphanumeric character (" UPB_STRINGVIEW_FORMAT + ")", + UPB_STRINGVIEW_ARGS(name)); + } + } + if (start) { + _upb_DefBuilder_Errf(ctx, + "invalid name: empty part (" UPB_STRINGVIEW_FORMAT ")", + UPB_STRINGVIEW_ARGS(name)); + } + + // We should never reach this point. + UPB_ASSERT(false); +} + + +#include + + // Must be last. char* upb_strdup2(const char* s, size_t len, upb_Arena* a) { diff --git a/ruby/ext/google/protobuf_c/ruby-upb.h b/ruby/ext/google/protobuf_c/ruby-upb.h index e8fbf5723d..d7adbf113f 100755 --- a/ruby/ext/google/protobuf_c/ruby-upb.h +++ b/ruby/ext/google/protobuf_c/ruby-upb.h @@ -12272,10 +12272,6 @@ char* upb_MtDataEncoder_EncodeMessageSet(upb_MtDataEncoder* e, char* ptr); #endif /* UPB_MINI_DESCRIPTOR_INTERNAL_ENCODE_H_ */ -#ifndef UPB_REFLECTION_DEF_BUILDER_INTERNAL_H_ -#define UPB_REFLECTION_DEF_BUILDER_INTERNAL_H_ - - #ifndef UPB_REFLECTION_DEF_POOL_INTERNAL_H_ #define UPB_REFLECTION_DEF_POOL_INTERNAL_H_ @@ -12323,6 +12319,10 @@ bool _upb_DefPool_LoadDefInitEx(upb_DefPool* s, const _upb_DefPool_Init* init, #endif /* UPB_REFLECTION_DEF_POOL_INTERNAL_H_ */ +#ifndef UPB_REFLECTION_DEF_BUILDER_INTERNAL_H_ +#define UPB_REFLECTION_DEF_BUILDER_INTERNAL_H_ + + // Must be last. // We want to copy the options verbatim into the destination options proto. @@ -12445,29 +12445,6 @@ UPB_INLINE void _upb_DefBuilder_CheckIdentFull(upb_DefBuilder* ctx, #endif /* UPB_REFLECTION_DEF_BUILDER_INTERNAL_H_ */ -#ifndef UPB_REFLECTION_INTERNAL_STRDUP2_H_ -#define UPB_REFLECTION_INTERNAL_STRDUP2_H_ - -#include - - -// Must be last. - -#ifdef __cplusplus -extern "C" { -#endif - -// Variant that works with a length-delimited rather than NULL-delimited string, -// as supported by strtable. -char* upb_strdup2(const char* s, size_t len, upb_Arena* a); - -#ifdef __cplusplus -} /* extern "C" */ -#endif - - -#endif /* UPB_REFLECTION_INTERNAL_STRDUP2_H_ */ - #ifndef UPB_REFLECTION_ENUM_DEF_INTERNAL_H_ #define UPB_REFLECTION_ENUM_DEF_INTERNAL_H_ @@ -12738,6 +12715,29 @@ upb_EnumReservedRange* _upb_EnumReservedRanges_New( #endif /* UPB_REFLECTION_ENUM_RESERVED_RANGE_INTERNAL_H_ */ +#ifndef UPB_REFLECTION_INTERNAL_STRDUP2_H_ +#define UPB_REFLECTION_INTERNAL_STRDUP2_H_ + +#include + + +// Must be last. + +#ifdef __cplusplus +extern "C" { +#endif + +// Variant that works with a length-delimited rather than NULL-delimited string, +// as supported by strtable. +char* upb_strdup2(const char* s, size_t len, upb_Arena* a); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + + +#endif /* UPB_REFLECTION_INTERNAL_STRDUP2_H_ */ + #ifndef UPB_REFLECTION_EXTENSION_RANGE_INTERNAL_H_ #define UPB_REFLECTION_EXTENSION_RANGE_INTERNAL_H_