From acd72c6d3f0d7464e2d61668fd73d23d67857021 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 31 Oct 2020 11:04:08 -0700 Subject: [PATCH 1/2] WIP. --- upb/def.c | 119 ++++++++++++++++++++++++++---------------------------- 1 file changed, 58 insertions(+), 61 deletions(-) diff --git a/upb/def.c b/upb/def.c index 603deb2013..4d9713f22f 100644 --- a/upb/def.c +++ b/upb/def.c @@ -864,7 +864,40 @@ void upb_oneof_iter_setdone(upb_oneof_iter *iter) { upb_inttable_iter_setdone(iter); } -/* Dynamic Layout Generation. *************************************************/ +/* Code to build defs from descriptor protos. *********************************/ + +/* There is a question of how much validation to do here. It will be difficult + * to perfectly match the amount of validation performed by proto2. But since + * this code is used to directly build defs from Ruby (for example) we do need + * to validate important constraints like uniqueness of names and numbers. */ + +#define CHK_OOM(ctx, x) if (!(x)) { symtab_oomerr(ctx); } + +typedef struct { + const upb_symtab *symtab; + upb_filedef *file; /* File we are building. */ + upb_alloc *alloc; /* Allocate defs here. */ + upb_alloc *tmp; /* Alloc for addtab and any other tmp data. */ + upb_strtable *addtab; /* full_name -> packed def ptr for new defs */ + const upb_msglayout **layouts; /* NULL if we should build layouts. */ + upb_status *status; /* Record errors here. */ + jmp_buf err; /* longjmp() on error. */ +} symtab_addctx; + +static void symtab_err(symtab_addctx *ctx) { longjmp(ctx->err, 1); } + +static void symtab_errf(symtab_addctx *ctx, const char *fmt, ...) { + va_list argp; + va_start(argp, fmt); + upb_status_vseterrf(d->status, fmt, argp); + va_end(argp); + longjmp(ctx->err, 1); +} + +static void symtab_oomerr(symtab_addctx *ctx) { + upb_status_setoom(ctx->status); + longjmp(ctx->err, 1); +} static size_t div_round_up(size_t n, size_t d) { return (n + d - 1) / d; @@ -931,7 +964,8 @@ static void assign_layout_indices(const upb_msgdef *m, upb_msglayout_field *fiel /* This function is the dynamic equivalent of message_layout.{cc,h} in upbc. * It computes a dynamic layout for all of the fields in |m|. */ -static bool make_layout(const upb_symtab *symtab, const upb_msgdef *m) { +static void make_layout(symtab_addctx *ctx, const upb_symtab *symtab, + const upb_msgdef *m) { upb_msglayout *l = (upb_msglayout*)m->layout; upb_msg_field_iter it; upb_msg_oneof_iter oit; @@ -948,8 +982,7 @@ static bool make_layout(const upb_symtab *symtab, const upb_msgdef *m) { if ((!fields && upb_msgdef_numfields(m)) || (!submsgs && submsg_count)) { - /* OOM. */ - return false; + symtab_oomerr(ctx); } l->field_count = upb_msgdef_numfields(m); @@ -980,7 +1013,7 @@ static bool make_layout(const upb_symtab *symtab, const upb_msgdef *m) { l->field_count = 2; l->size = 2 * sizeof(upb_strview); l->size = UPB_ALIGN_UP(l->size, 8); - return true; + return; } /* Allocate data offsets in three stages: @@ -1092,30 +1125,8 @@ static bool make_layout(const upb_symtab *symtab, const upb_msgdef *m) { /* Sort fields by number. */ qsort(fields, upb_msgdef_numfields(m), sizeof(*fields), field_number_cmp); assign_layout_indices(m, fields); - - return true; } -/* Code to build defs from descriptor protos. *********************************/ - -/* There is a question of how much validation to do here. It will be difficult - * to perfectly match the amount of validation performed by proto2. But since - * this code is used to directly build defs from Ruby (for example) we do need - * to validate important constraints like uniqueness of names and numbers. */ - -#define CHK(x) if (!(x)) { return false; } -#define CHK_OOM(x) if (!(x)) { upb_status_setoom(ctx->status); return false; } - -typedef struct { - const upb_symtab *symtab; - upb_filedef *file; /* File we are building. */ - upb_alloc *alloc; /* Allocate defs here. */ - upb_alloc *tmp; /* Alloc for addtab and any other tmp data. */ - upb_strtable *addtab; /* full_name -> packed def ptr for new defs */ - const upb_msglayout **layouts; /* NULL if we should build layouts. */ - upb_status *status; /* Record errors here. */ -} symtab_addctx; - static char* strviewdup(const symtab_addctx *ctx, upb_strview view) { return upb_strdup2(view.data, view.size, ctx->alloc); } @@ -1145,7 +1156,7 @@ static const char *makefullname(const symtab_addctx *ctx, const char *prefix, } } -static bool finalize_oneofs(symtab_addctx *ctx, upb_msgdef *m) { +static void finalize_oneofs(symtab_addctx *ctx, upb_msgdef *m) { int i; int synthetic_count = 0; upb_oneofdef *mutable_oneofs = (upb_oneofdef*)m->oneofs; @@ -1154,19 +1165,15 @@ static bool finalize_oneofs(symtab_addctx *ctx, upb_msgdef *m) { upb_oneofdef *o = &mutable_oneofs[i]; if (o->synthetic && o->field_count != 1) { - upb_status_seterrf( - ctx->status, "Synthetic oneofs must have one field, not %d: %s", - o->field_count, upb_oneofdef_name(o)); - return false; + symtab_errf(ctx, "Synthetic oneofs must have one field, not %d: %s", + o->field_count, upb_oneofdef_name(o)); } if (o->synthetic) { synthetic_count++; } else if (synthetic_count != 0) { - upb_status_seterrf( - ctx->status, "Synthetic oneofs must be after all other oneofs: %s", - upb_oneofdef_name(o)); - return false; + symtab_errf(ctx, "Synthetic oneofs must be after all other oneofs: %s", + upb_oneofdef_name(o)); } o->fields = upb_malloc(ctx->alloc, sizeof(upb_fielddef*) * o->field_count); @@ -1182,7 +1189,6 @@ static bool finalize_oneofs(symtab_addctx *ctx, upb_msgdef *m) { } m->real_oneof_count = m->oneof_count - synthetic_count; - return true; } size_t getjsonname(const char *name, char *buf, size_t len) { @@ -1230,44 +1236,38 @@ static char* makejsonname(const char* name, upb_alloc *alloc) { return json_name; } -static bool symtab_add(const symtab_addctx *ctx, const char *name, +static void symtab_add(const symtab_addctx *ctx, const char *name, upb_value v) { upb_value tmp; if (upb_strtable_lookup(ctx->addtab, name, &tmp) || upb_strtable_lookup(&ctx->symtab->syms, name, &tmp)) { - upb_status_seterrf(ctx->status, "duplicate symbol '%s'", name); - return false; + status_errf(ctx, "duplicate symbol '%s'", name); } CHK_OOM(upb_strtable_insert3(ctx->addtab, name, strlen(name), v, ctx->tmp)); - return true; } /* Given a symbol and the base symbol inside which it is defined, find the * symbol's definition in t. */ -static bool resolvename(const upb_strtable *t, const upb_fielddef *f, - const char *base, upb_strview sym, - upb_deftype_t type, upb_status *status, - const void **def) { +static const void *resolvename(symtab_addctx *ctx, const upb_strtable *t, + const upb_fielddef *f, const char *base, + upb_strview sym, upb_deftype_t type, + const void **def) { if(sym.size == 0) return false; if(sym.data[0] == '.') { /* Symbols starting with '.' are absolute, so we do a single lookup. * Slice to omit the leading '.' */ upb_value v; if (!upb_strtable_lookup2(t, sym.data + 1, sym.size - 1, &v)) { - return false; + return NULL; } - *def = unpack_def(v, type); - - if (!*def) { - upb_status_seterrf(status, - "type mismatch when resolving field %s, name %s", - f->full_name, sym.data); - return false; + const void *ret = unpack_def(v, type); + if (ret) { + symtab_errf(ctx, "type mismatch when resolving field %s, name %s", + f->full_name, sym.data); } - - return true; + return ret; } else { /* Remove components from base until we find an entry or run out. * TODO: This branch is totally broken, but currently not used. */ @@ -1281,12 +1281,9 @@ const void *symtab_resolve(const symtab_addctx *ctx, const upb_fielddef *f, const char *base, upb_strview sym, upb_deftype_t type) { const void *ret; - if (!resolvename(ctx->addtab, f, base, sym, type, ctx->status, &ret) && - !resolvename(&ctx->symtab->syms, f, base, sym, type, ctx->status, &ret)) { - if (upb_ok(ctx->status)) { - upb_status_seterrf(ctx->status, "couldn't resolve name '%s'", sym.data); - } - return false; + if (!resolvename(ctx, ctx->addtab, f, base, sym, type, &ret) && + !resolvename(ctx, &ctx->symtab->syms, f, base, sym, type, &ret)) { + symtab_errf(ctx, "couldn't resolve name '%s'", sym.data); } return ret; } From c9f9668234bdc13df99b917d83c22e19bfb6af33 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sun, 1 Nov 2020 00:06:23 -0700 Subject: [PATCH 2/2] symtab: use longjmp() for errors and avoid intermediate table. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We used to use a separate "add table" during the upb_symtab_addfile() operation to make it easier to back out the file if it contained errors. But this created unnecessary work of re-adding the same symbols to the main symtab once everything was validated. Instead we directly add symbols to the main symbols table. If there is an error in validation, we remove precisely the set of symbols that were already added. This also requires using a separate arena for each file. We can fuse it with the symtab's main arena if the operation is successful. LoadDescriptor_Upb 61.2µs ± 4% 53.5µs ± 1% -12.50% (p=0.000 n=12+12) LoadAdsDescriptor_Upb 4.43ms ± 1% 3.06ms ± 0% -31.00% (p=0.000 n=12+12) LoadDescriptor_Proto2 257µs ± 0% 259µs ± 0% +1.00% (p=0.000 n=12+12) LoadAdsDescriptor_Proto2 13.9ms ± 1% 13.9ms ± 1% ~ (p=0.128 n=12+12) --- tests/bindings/lua/test_upb.lua | 14 + upb/def.c | 821 +++++++++++++++----------------- 2 files changed, 396 insertions(+), 439 deletions(-) diff --git a/tests/bindings/lua/test_upb.lua b/tests/bindings/lua/test_upb.lua index 345828d500..3a490c6f6d 100644 --- a/tests/bindings/lua/test_upb.lua +++ b/tests/bindings/lua/test_upb.lua @@ -512,6 +512,20 @@ function test_foo() assert_equal(set.file[1].name, "google/protobuf/descriptor.proto") end +function test_descriptor_error() + local symtab = upb.SymbolTable() + local file = descriptor.FileDescriptorProto() + file.name = "test.proto" + file.message_type[1] = descriptor.DescriptorProto{ + name = "ABC" + } + file.message_type[2] = descriptor.DescriptorProto{ + name = "BC." + } + assert_error(function () symtab:add_file(upb.encode(file)) end) + assert_nil(symtab:lookup_msg("ABC")) +end + function test_gc() local top = test_messages_proto3.TestAllTypesProto3() local n = 100 diff --git a/upb/def.c b/upb/def.c index 56497e5cca..7379a768a0 100644 --- a/upb/def.c +++ b/upb/def.c @@ -3,10 +3,11 @@ #include #include +#include #include #include -#include "google/protobuf/descriptor.upb.h" +#include "google/protobuf/descriptor.upb.h" #include "upb/port_def.inc" typedef struct { @@ -14,15 +15,6 @@ typedef struct { char str[1]; /* Null-terminated string data follows. */ } str_t; -static str_t *newstr(upb_alloc *alloc, const char *data, size_t len) { - str_t *ret = upb_malloc(alloc, sizeof(*ret) + len); - if (!ret) return NULL; - ret->len = len; - if (len) memcpy(ret->str, data, len); - ret->str[len] = '\0'; - return ret; -} - struct upb_fielddef { const upb_filedef *file; const upb_msgdef *msgdef; @@ -157,38 +149,6 @@ static bool upb_isalphanum(char c) { return upb_isletter(c) || upb_isbetween(c, '0', '9'); } -static bool upb_isident(upb_strview name, bool full, upb_status *s) { - const char *str = name.data; - size_t len = name.size; - bool start = true; - size_t i; - for (i = 0; i < len; i++) { - char c = str[i]; - if (c == '.') { - if (start || !full) { - upb_status_seterrf(s, "invalid name: unexpected '.' (%s)", str); - return false; - } - start = true; - } else if (start) { - if (!upb_isletter(c)) { - upb_status_seterrf( - s, "invalid name: path components must start with a letter (%s)", - str); - return false; - } - start = false; - } else { - if (!upb_isalphanum(c)) { - upb_status_seterrf(s, "invalid name: non-alphanumeric character (%s)", - str); - return false; - } - } - } - return !start; -} - static const char *shortdefname(const char *fullname) { const char *p; @@ -248,54 +208,6 @@ static void upb_status_setoom(upb_status *status) { upb_status_seterrmsg(status, "out of memory"); } -static bool assign_msg_indices(upb_msgdef *m, upb_status *s) { - /* Sort fields. upb internally relies on UPB_TYPE_MESSAGE fields having the - * lowest indexes, but we do not publicly guarantee this. */ - upb_msg_field_iter j; - int i; - uint32_t selector; - int n = upb_msgdef_numfields(m); - upb_fielddef **fields; - - if (n == 0) { - m->selector_count = UPB_STATIC_SELECTOR_COUNT; - m->submsg_field_count = 0; - return true; - } - - fields = upb_gmalloc(n * sizeof(*fields)); - if (!fields) { - upb_status_setoom(s); - return false; - } - - m->submsg_field_count = 0; - for(i = 0, upb_msg_field_begin(&j, m); - !upb_msg_field_done(&j); - upb_msg_field_next(&j), i++) { - upb_fielddef *f = upb_msg_iter_field(&j); - UPB_ASSERT(f->msgdef == m); - if (upb_fielddef_issubmsg(f)) { - m->submsg_field_count++; - } - fields[i] = f; - } - - qsort(fields, n, sizeof(*fields), cmp_fields); - - selector = UPB_STATIC_SELECTOR_COUNT + m->submsg_field_count; - for (i = 0; i < n; i++) { - upb_fielddef *f = fields[i]; - f->index_ = i; - f->selector_base = selector + upb_handlers_selectorbaseoffset(f); - selector += upb_handlers_selectorcount(f); - } - m->selector_count = selector; - - upb_gfree(fields); - return true; -} - static void assign_msg_wellknowntype(upb_msgdef *m) { const char *name = upb_msgdef_fullname(m); if (name == NULL) { @@ -865,6 +777,114 @@ void upb_oneof_iter_setdone(upb_oneof_iter *iter) { upb_inttable_iter_setdone(iter); } +/* upb_filedef ****************************************************************/ + +const char *upb_filedef_name(const upb_filedef *f) { + return f->name; +} + +const char *upb_filedef_package(const upb_filedef *f) { + return f->package; +} + +const char *upb_filedef_phpprefix(const upb_filedef *f) { + return f->phpprefix; +} + +const char *upb_filedef_phpnamespace(const upb_filedef *f) { + return f->phpnamespace; +} + +upb_syntax_t upb_filedef_syntax(const upb_filedef *f) { + return f->syntax; +} + +int upb_filedef_msgcount(const upb_filedef *f) { + return f->msg_count; +} + +int upb_filedef_depcount(const upb_filedef *f) { + return f->dep_count; +} + +int upb_filedef_enumcount(const upb_filedef *f) { + return f->enum_count; +} + +const upb_filedef *upb_filedef_dep(const upb_filedef *f, int i) { + return i < 0 || i >= f->dep_count ? NULL : f->deps[i]; +} + +const upb_msgdef *upb_filedef_msg(const upb_filedef *f, int i) { + return i < 0 || i >= f->msg_count ? NULL : &f->msgs[i]; +} + +const upb_enumdef *upb_filedef_enum(const upb_filedef *f, int i) { + return i < 0 || i >= f->enum_count ? NULL : &f->enums[i]; +} + +void upb_symtab_free(upb_symtab *s) { + upb_arena_free(s->arena); + upb_gfree(s); +} + +upb_symtab *upb_symtab_new(void) { + upb_symtab *s = upb_gmalloc(sizeof(*s)); + upb_alloc *alloc; + + if (!s) { + return NULL; + } + + s->arena = upb_arena_new(); + s->bytes_loaded = 0; + alloc = upb_arena_alloc(s->arena); + + if (!upb_strtable_init2(&s->syms, UPB_CTYPE_CONSTPTR, 32, alloc) || + !upb_strtable_init2(&s->files, UPB_CTYPE_CONSTPTR, 4, alloc)) { + upb_arena_free(s->arena); + upb_gfree(s); + s = NULL; + } + return s; +} + +const upb_msgdef *upb_symtab_lookupmsg(const upb_symtab *s, const char *sym) { + upb_value v; + return upb_strtable_lookup(&s->syms, sym, &v) ? + unpack_def(v, UPB_DEFTYPE_MSG) : NULL; +} + +const upb_msgdef *upb_symtab_lookupmsg2(const upb_symtab *s, const char *sym, + size_t len) { + upb_value v; + return upb_strtable_lookup2(&s->syms, sym, len, &v) ? + unpack_def(v, UPB_DEFTYPE_MSG) : NULL; +} + +const upb_enumdef *upb_symtab_lookupenum(const upb_symtab *s, const char *sym) { + upb_value v; + return upb_strtable_lookup(&s->syms, sym, &v) ? + unpack_def(v, UPB_DEFTYPE_ENUM) : NULL; +} + +const upb_filedef *upb_symtab_lookupfile(const upb_symtab *s, const char *name) { + upb_value v; + return upb_strtable_lookup(&s->files, name, &v) ? upb_value_getconstptr(v) + : NULL; +} + +const upb_filedef *upb_symtab_lookupfile2( + const upb_symtab *s, const char *name, size_t len) { + upb_value v; + return upb_strtable_lookup2(&s->files, name, len, &v) ? + upb_value_getconstptr(v) : NULL; +} + +int upb_symtab_filecount(const upb_symtab *s) { + return (int)upb_strtable_count(&s->files); +} + /* Code to build defs from descriptor protos. *********************************/ /* There is a question of how much validation to do here. It will be difficult @@ -872,34 +892,71 @@ void upb_oneof_iter_setdone(upb_oneof_iter *iter) { * this code is used to directly build defs from Ruby (for example) we do need * to validate important constraints like uniqueness of names and numbers. */ -#define CHK_OOM(ctx, x) if (!(x)) { symtab_oomerr(ctx); } +#define CHK_OOM(x) if (!(x)) { symtab_oomerr(ctx); } typedef struct { - const upb_symtab *symtab; + upb_symtab *symtab; upb_filedef *file; /* File we are building. */ - upb_alloc *alloc; /* Allocate defs here. */ - upb_alloc *tmp; /* Alloc for addtab and any other tmp data. */ - upb_strtable *addtab; /* full_name -> packed def ptr for new defs */ + upb_arena *file_arena; /* Allocate defs here. */ + upb_alloc *alloc; /* Alloc of file_arena, for tables. */ const upb_msglayout **layouts; /* NULL if we should build layouts. */ upb_status *status; /* Record errors here. */ jmp_buf err; /* longjmp() on error. */ } symtab_addctx; -static void symtab_err(symtab_addctx *ctx) { longjmp(ctx->err, 1); } - +UPB_NORETURN UPB_NOINLINE static void symtab_errf(symtab_addctx *ctx, const char *fmt, ...) { va_list argp; va_start(argp, fmt); - upb_status_vseterrf(d->status, fmt, argp); + upb_status_vseterrf(ctx->status, fmt, argp); va_end(argp); longjmp(ctx->err, 1); } +UPB_NORETURN UPB_NOINLINE static void symtab_oomerr(symtab_addctx *ctx) { upb_status_setoom(ctx->status); longjmp(ctx->err, 1); } +void *symtab_alloc(symtab_addctx *ctx, size_t bytes) { + void *ret = upb_arena_malloc(ctx->file_arena, bytes); + if (!ret) symtab_oomerr(ctx); + return ret; +} + +static void check_ident(symtab_addctx *ctx, upb_strview name, bool full) { + const char *str = name.data; + size_t len = name.size; + bool start = true; + size_t i; + for (i = 0; i < len; i++) { + char c = str[i]; + if (c == '.') { + if (start || !full) { + symtab_errf(ctx, "invalid name: unexpected '.' (%.*s)", (int)len, str); + } + start = true; + } else if (start) { + if (!upb_isletter(c)) { + symtab_errf( + ctx, + "invalid name: path components must start with a letter (%.*s)", + (int)len, str); + } + start = false; + } else { + if (!upb_isalphanum(c)) { + symtab_errf(ctx, "invalid name: non-alphanumeric character (%.*s)", + (int)len, str); + } + } + } + if (start) { + symtab_errf(ctx, "invalid name: empty part (%.*s)", (int)len, str); + } +} + static size_t div_round_up(size_t n, size_t d) { return (n + d - 1) / d; } @@ -965,8 +1022,7 @@ static void assign_layout_indices(const upb_msgdef *m, upb_msglayout_field *fiel /* This function is the dynamic equivalent of message_layout.{cc,h} in upbc. * It computes a dynamic layout for all of the fields in |m|. */ -static void make_layout(symtab_addctx *ctx, const upb_symtab *symtab, - const upb_msgdef *m) { +static void make_layout(symtab_addctx *ctx, const upb_msgdef *m) { upb_msglayout *l = (upb_msglayout*)m->layout; upb_msg_field_iter it; upb_msg_oneof_iter oit; @@ -974,17 +1030,11 @@ static void make_layout(symtab_addctx *ctx, const upb_symtab *symtab, size_t submsg_count = m->submsg_field_count; const upb_msglayout **submsgs; upb_msglayout_field *fields; - upb_alloc *alloc = upb_arena_alloc(symtab->arena); memset(l, 0, sizeof(*l)); - fields = upb_malloc(alloc, upb_msgdef_numfields(m) * sizeof(*fields)); - submsgs = upb_malloc(alloc, submsg_count * sizeof(*submsgs)); - - if ((!fields && upb_msgdef_numfields(m)) || - (!submsgs && submsg_count)) { - symtab_oomerr(ctx); - } + fields = symtab_alloc(ctx, upb_msgdef_numfields(m) * sizeof(*fields)); + submsgs = symtab_alloc(ctx, submsg_count * sizeof(*submsgs)); l->field_count = upb_msgdef_numfields(m); l->fields = fields; @@ -1128,7 +1178,50 @@ static void make_layout(symtab_addctx *ctx, const upb_symtab *symtab, assign_layout_indices(m, fields); } -static char* strviewdup(const symtab_addctx *ctx, upb_strview view) { +static void assign_msg_indices(symtab_addctx *ctx, upb_msgdef *m) { + /* Sort fields. upb internally relies on UPB_TYPE_MESSAGE fields having the + * lowest indexes, but we do not publicly guarantee this. */ + upb_msg_field_iter j; + int i; + uint32_t selector; + int n = upb_msgdef_numfields(m); + upb_fielddef **fields; + + if (n == 0) { + m->selector_count = UPB_STATIC_SELECTOR_COUNT; + m->submsg_field_count = 0; + return; + } + + fields = upb_gmalloc(n * sizeof(*fields)); + + m->submsg_field_count = 0; + for(i = 0, upb_msg_field_begin(&j, m); + !upb_msg_field_done(&j); + upb_msg_field_next(&j), i++) { + upb_fielddef *f = upb_msg_iter_field(&j); + UPB_ASSERT(f->msgdef == m); + if (upb_fielddef_issubmsg(f)) { + m->submsg_field_count++; + } + fields[i] = f; + } + + qsort(fields, n, sizeof(*fields), cmp_fields); + + selector = UPB_STATIC_SELECTOR_COUNT + m->submsg_field_count; + for (i = 0; i < n; i++) { + upb_fielddef *f = fields[i]; + f->index_ = i; + f->selector_base = selector + upb_handlers_selectorbaseoffset(f); + selector += upb_handlers_selectorcount(f); + } + m->selector_count = selector; + + upb_gfree(fields); +} + +static char *strviewdup(symtab_addctx *ctx, upb_strview view) { return upb_strdup2(view.data, view.size, ctx->alloc); } @@ -1140,13 +1233,12 @@ static bool streql_view(upb_strview view, const char *b) { return streql2(view.data, view.size, b); } -static const char *makefullname(const symtab_addctx *ctx, const char *prefix, +static const char *makefullname(symtab_addctx *ctx, const char *prefix, upb_strview name) { if (prefix) { /* ret = prefix + '.' + name; */ size_t n = strlen(prefix); - char *ret = upb_malloc(ctx->alloc, n + name.size + 2); - CHK_OOM(ret); + char *ret = symtab_alloc(ctx, n + name.size + 2); strcpy(ret, prefix); ret[n] = '.'; memcpy(&ret[n + 1], name.data, name.size); @@ -1177,7 +1269,7 @@ static void finalize_oneofs(symtab_addctx *ctx, upb_msgdef *m) { upb_oneofdef_name(o)); } - o->fields = upb_malloc(ctx->alloc, sizeof(upb_fielddef*) * o->field_count); + o->fields = symtab_alloc(ctx, sizeof(upb_fielddef *) * o->field_count); o->field_count = 0; } @@ -1230,41 +1322,39 @@ size_t getjsonname(const char *name, char *buf, size_t len) { #undef WRITE } -static char* makejsonname(const char* name, upb_alloc *alloc) { +static char* makejsonname(symtab_addctx *ctx, const char* name) { size_t size = getjsonname(name, NULL, 0); - char* json_name = upb_malloc(alloc, size); + char* json_name = symtab_alloc(ctx, size); getjsonname(name, json_name, size); return json_name; } -static void symtab_add(const symtab_addctx *ctx, const char *name, - upb_value v) { - upb_value tmp; - if (upb_strtable_lookup(ctx->addtab, name, &tmp) || - upb_strtable_lookup(&ctx->symtab->syms, name, &tmp)) { - status_errf(ctx, "duplicate symbol '%s'", name); +static void symtab_add(symtab_addctx *ctx, const char *name, upb_value v) { + if (upb_strtable_lookup(&ctx->symtab->syms, name, NULL)) { + symtab_errf(ctx, "duplicate symbol '%s'", name); } - - CHK_OOM(upb_strtable_insert3(ctx->addtab, name, strlen(name), v, ctx->tmp)); + upb_alloc *alloc = upb_arena_alloc(ctx->symtab->arena); + size_t len = strlen(name); + CHK_OOM(upb_strtable_insert3(&ctx->symtab->syms, name, len, v, alloc)); } /* Given a symbol and the base symbol inside which it is defined, find the * symbol's definition in t. */ -static const void *resolvename(symtab_addctx *ctx, const upb_strtable *t, - const upb_fielddef *f, const char *base, - upb_strview sym, upb_deftype_t type, - const void **def) { - if(sym.size == 0) return false; +static const void *symtab_resolve(symtab_addctx *ctx, const upb_fielddef *f, + const char *base, upb_strview sym, + upb_deftype_t type) { + const upb_strtable *t = &ctx->symtab->syms; + if(sym.size == 0) goto notfound; if(sym.data[0] == '.') { /* Symbols starting with '.' are absolute, so we do a single lookup. * Slice to omit the leading '.' */ upb_value v; if (!upb_strtable_lookup2(t, sym.data + 1, sym.size - 1, &v)) { - return NULL; + goto notfound; } const void *ret = unpack_def(v, type); - if (ret) { + if (!ret) { symtab_errf(ctx, "type mismatch when resolving field %s, name %s", f->full_name, sym.data); } @@ -1274,23 +1364,15 @@ static const void *resolvename(symtab_addctx *ctx, const upb_strtable *t, * TODO: This branch is totally broken, but currently not used. */ (void)base; UPB_ASSERT(false); - return false; + goto notfound; } -} -const void *symtab_resolve(const symtab_addctx *ctx, const upb_fielddef *f, - const char *base, upb_strview sym, - upb_deftype_t type) { - const void *ret; - if (!resolvename(ctx, ctx->addtab, f, base, sym, type, &ret) && - !resolvename(ctx, &ctx->symtab->syms, f, base, sym, type, &ret)) { - symtab_errf(ctx, "couldn't resolve name '%s'", sym.data); - } - return ret; +notfound: + symtab_errf(ctx, "couldn't resolve name '%s'", sym.data); } -static bool create_oneofdef( - const symtab_addctx *ctx, upb_msgdef *m, +static void create_oneofdef( + symtab_addctx *ctx, upb_msgdef *m, const google_protobuf_OneofDescriptorProto *oneof_proto) { upb_oneofdef *o; upb_strview name = google_protobuf_OneofDescriptorProto_name(oneof_proto); @@ -1303,16 +1385,23 @@ static bool create_oneofdef( o->synthetic = false; v = pack_def(o, UPB_DEFTYPE_ONEOF); - CHK_OOM(symtab_add(ctx, o->full_name, v)); + symtab_add(ctx, o->full_name, v); CHK_OOM(upb_strtable_insert3(&m->ntof, name.data, name.size, v, ctx->alloc)); CHK_OOM(upb_inttable_init2(&o->itof, UPB_CTYPE_CONSTPTR, ctx->alloc)); CHK_OOM(upb_strtable_init2(&o->ntof, UPB_CTYPE_CONSTPTR, 4, ctx->alloc)); +} - return true; +static str_t *newstr(symtab_addctx *ctx, const char *data, size_t len) { + str_t *ret = symtab_alloc(ctx, sizeof(*ret) + len); + if (!ret) return NULL; + ret->len = len; + if (len) memcpy(ret->str, data, len); + ret->str[len] = '\0'; + return ret; } -static bool parse_default(const symtab_addctx *ctx, const char *str, size_t len, +static void parse_default(symtab_addctx *ctx, const char *str, size_t len, upb_fielddef *f) { char *end; char nullz[64]; @@ -1327,7 +1416,7 @@ static bool parse_default(const symtab_addctx *ctx, const char *str, size_t len, case UPB_TYPE_FLOAT: /* Standard C number parsing functions expect null-terminated strings. */ if (len >= sizeof(nullz) - 1) { - return false; + symtab_errf(ctx, "Default too long: %.*s", (int)len, str); } memcpy(nullz, str, len); nullz[len] = '\0'; @@ -1340,47 +1429,61 @@ static bool parse_default(const symtab_addctx *ctx, const char *str, size_t len, switch (upb_fielddef_type(f)) { case UPB_TYPE_INT32: { long val = strtol(str, &end, 0); - CHK(val <= INT32_MAX && val >= INT32_MIN && errno != ERANGE && !*end); + if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || *end) { + goto invalid; + } f->defaultval.sint = val; break; } case UPB_TYPE_ENUM: { const upb_enumdef *e = f->sub.enumdef; int32_t val; - CHK(upb_enumdef_ntoi(e, str, len, &val)); + if (!upb_enumdef_ntoi(e, str, len, &val)) { + goto invalid; + } f->defaultval.sint = val; break; } case UPB_TYPE_INT64: { /* XXX: Need to write our own strtoll, since it's not available in c89. */ int64_t val = strtol(str, &end, 0); - CHK(val <= INT64_MAX && val >= INT64_MIN && errno != ERANGE && !*end); + if (val > INT64_MAX || val < INT64_MIN || errno == ERANGE || *end) { + goto invalid; + } f->defaultval.sint = val; break; } case UPB_TYPE_UINT32: { unsigned long val = strtoul(str, &end, 0); - CHK(val <= UINT32_MAX && errno != ERANGE && !*end); + if (val > UINT32_MAX || errno == ERANGE || *end) { + goto invalid; + } f->defaultval.uint = val; break; } case UPB_TYPE_UINT64: { /* XXX: Need to write our own strtoull, since it's not available in c89. */ uint64_t val = strtoul(str, &end, 0); - CHK(val <= UINT64_MAX && errno != ERANGE && !*end); + if (val > UINT64_MAX || errno == ERANGE || *end) { + goto invalid; + } f->defaultval.uint = val; break; } case UPB_TYPE_DOUBLE: { double val = strtod(str, &end); - CHK(errno != ERANGE && !*end); + if (errno == ERANGE || *end) { + goto invalid; + } f->defaultval.dbl = val; break; } case UPB_TYPE_FLOAT: { /* XXX: Need to write our own strtof, since it's not available in c89. */ float val = strtod(str, &end); - CHK(errno != ERANGE && !*end); + if (errno == ERANGE || *end) { + goto invalid; + } f->defaultval.flt = val; break; } @@ -1390,25 +1493,30 @@ static bool parse_default(const symtab_addctx *ctx, const char *str, size_t len, } else if (streql2(str, len, "true")) { f->defaultval.boolean = true; } else { - return false; } break; } case UPB_TYPE_STRING: - f->defaultval.str = newstr(ctx->alloc, str, len); + f->defaultval.str = newstr(ctx, str, len); break; case UPB_TYPE_BYTES: /* XXX: need to interpret the C-escaped value. */ - f->defaultval.str = newstr(ctx->alloc, str, len); + f->defaultval.str = newstr(ctx, str, len); break; case UPB_TYPE_MESSAGE: /* Should not have a default value. */ - return false; + symtab_errf(ctx, "Message should not have a default (%s)", + upb_fielddef_fullname(f)); } - return true; + + return; + +invalid: + symtab_errf(ctx, "Invalid default '%.*s' for field %f", (int)len, str, + upb_fielddef_fullname(f)); } -static void set_default_default(const symtab_addctx *ctx, upb_fielddef *f) { +static void set_default_default(symtab_addctx *ctx, upb_fielddef *f) { switch (upb_fielddef_type(f)) { case UPB_TYPE_INT32: case UPB_TYPE_INT64: @@ -1425,7 +1533,7 @@ static void set_default_default(const symtab_addctx *ctx, upb_fielddef *f) { break; case UPB_TYPE_STRING: case UPB_TYPE_BYTES: - f->defaultval.str = newstr(ctx->alloc, NULL, 0); + f->defaultval.str = newstr(ctx, NULL, 0); break; case UPB_TYPE_BOOL: f->defaultval.boolean = false; @@ -1435,8 +1543,8 @@ static void set_default_default(const symtab_addctx *ctx, upb_fielddef *f) { } } -static bool create_fielddef( - const symtab_addctx *ctx, const char *prefix, upb_msgdef *m, +static void create_fielddef( + symtab_addctx *ctx, const char *prefix, upb_msgdef *m, const google_protobuf_FieldDescriptorProto *field_proto) { upb_alloc *alloc = ctx->alloc; upb_fielddef *f; @@ -1448,12 +1556,11 @@ static bool create_fielddef( uint32_t field_number; if (!google_protobuf_FieldDescriptorProto_has_name(field_proto)) { - upb_status_seterrmsg(ctx->status, "field has no name"); - return false; + symtab_errf(ctx, "field has no name (%s)", upb_msgdef_fullname(m)); } name = google_protobuf_FieldDescriptorProto_name(field_proto); - CHK(upb_isident(name, false, ctx->status)); + check_ident(ctx, name, false); full_name = makefullname(ctx, prefix, name); shortname = shortdefname(full_name); @@ -1461,14 +1568,13 @@ static bool create_fielddef( json_name = strviewdup( ctx, google_protobuf_FieldDescriptorProto_json_name(field_proto)); } else { - json_name = makejsonname(shortname, ctx->alloc); + json_name = makejsonname(ctx, shortname); } field_number = google_protobuf_FieldDescriptorProto_number(field_proto); if (field_number == 0 || field_number > UPB_MAX_FIELDNUMBER) { - upb_status_seterrf(ctx->status, "invalid field number (%u)", field_number); - return false; + symtab_errf(ctx, "invalid field number (%u)", field_number); } if (m) { @@ -1481,19 +1587,15 @@ static bool create_fielddef( f->is_extension_ = false; if (upb_strtable_lookup(&m->ntof, shortname, NULL)) { - upb_status_seterrf(ctx->status, "duplicate field name (%s)", shortname); - return false; + symtab_errf(ctx, "duplicate field name (%s)", shortname); } if (upb_strtable_lookup(&m->ntof, json_name, NULL)) { - upb_status_seterrf(ctx->status, "duplicate json_name (%s)", json_name); - return false; + symtab_errf(ctx, "duplicate json_name (%s)", json_name); } if (upb_inttable_lookup(&m->itof, field_number, NULL)) { - upb_status_seterrf(ctx->status, "duplicate field number (%u)", - field_number); - return false; + symtab_errf(ctx, "duplicate field number (%u)", field_number); } field_v = pack_def(f, UPB_DEFTYPE_FIELD); @@ -1527,7 +1629,7 @@ static bool create_fielddef( /* extension field. */ f = (upb_fielddef*)&ctx->file->exts[ctx->file->ext_count++]; f->is_extension_ = true; - CHK_OOM(symtab_add(ctx, full_name, pack_def(f, UPB_DEFTYPE_FIELD))); + symtab_add(ctx, full_name, pack_def(f, UPB_DEFTYPE_FIELD)); } f->full_name = full_name; @@ -1546,9 +1648,7 @@ static bool create_fielddef( f->sub.unresolved = field_proto; if (f->label_ == UPB_LABEL_REQUIRED && f->file->syntax == UPB_SYNTAX_PROTO3) { - upb_status_seterrf(ctx->status, "proto3 fields cannot be required (%s)", - f->full_name); - return false; + symtab_errf(ctx, "proto3 fields cannot be required (%s)", f->full_name); } if (google_protobuf_FieldDescriptorProto_has_oneof_index(field_proto)) { @@ -1558,23 +1658,17 @@ static bool create_fielddef( upb_value v = upb_value_constptr(f); if (upb_fielddef_label(f) != UPB_LABEL_OPTIONAL) { - upb_status_seterrf(ctx->status, - "fields in oneof must have OPTIONAL label (%s)", - f->full_name); - return false; + symtab_errf(ctx, "fields in oneof must have OPTIONAL label (%s)", + f->full_name); } if (!m) { - upb_status_seterrf(ctx->status, - "oneof_index provided for extension field (%s)", - f->full_name); - return false; + symtab_errf(ctx, "oneof_index provided for extension field (%s)", + f->full_name); } if (oneof_index >= m->oneof_count) { - upb_status_seterrf(ctx->status, "oneof_index out of range (%s)", - f->full_name); - return false; + symtab_errf(ctx, "oneof_index out of range (%s)", f->full_name); } oneof = (upb_oneofdef*)&m->oneofs[oneof_index]; @@ -1584,15 +1678,13 @@ static bool create_fielddef( if (f->proto3_optional_) { oneof->synthetic = true; } - CHK(upb_inttable_insert2(&oneof->itof, f->number_, v, alloc)); - CHK(upb_strtable_insert3(&oneof->ntof, name.data, name.size, v, alloc)); + CHK_OOM(upb_inttable_insert2(&oneof->itof, f->number_, v, alloc)); + CHK_OOM(upb_strtable_insert3(&oneof->ntof, name.data, name.size, v, alloc)); } else { f->oneof = NULL; if (f->proto3_optional_) { - upb_status_seterrf(ctx->status, - "field with proto3_optional was not in a oneof (%s)", - f->full_name); - return false; + symtab_errf(ctx, "field with proto3_optional was not in a oneof (%s)", + f->full_name); } } @@ -1612,12 +1704,10 @@ static bool create_fielddef( } else { f->lazy_ = false; } - - return true; } -static bool create_enumdef( - const symtab_addctx *ctx, const char *prefix, +static void create_enumdef( + symtab_addctx *ctx, const char *prefix, const google_protobuf_EnumDescriptorProto *enum_proto) { upb_enumdef *e; const google_protobuf_EnumValueDescriptorProto *const *values; @@ -1625,11 +1715,11 @@ static bool create_enumdef( size_t i, n; name = google_protobuf_EnumDescriptorProto_name(enum_proto); - CHK(upb_isident(name, false, ctx->status)); + check_ident(ctx, name, false); e = (upb_enumdef*)&ctx->file->enums[ctx->file->enum_count++]; e->full_name = makefullname(ctx, prefix, name); - CHK_OOM(symtab_add(ctx, e->full_name, pack_def(e, UPB_DEFTYPE_ENUM))); + symtab_add(ctx, e->full_name, pack_def(e, UPB_DEFTYPE_ENUM)); values = google_protobuf_EnumDescriptorProto_value(enum_proto, &n); CHK_OOM(upb_strtable_init2(&e->ntoi, UPB_CTYPE_INT32, n, ctx->alloc)); @@ -1639,10 +1729,8 @@ static bool create_enumdef( e->defaultval = 0; if (n == 0) { - upb_status_seterrf(ctx->status, - "enums must contain at least one value (%s)", - e->full_name); - return false; + symtab_errf(ctx, "enums must contain at least one value (%s)", + e->full_name); } for (i = 0; i < n; i++) { @@ -1653,15 +1741,12 @@ static bool create_enumdef( upb_value v = upb_value_int32(num); if (i == 0 && e->file->syntax == UPB_SYNTAX_PROTO3 && num != 0) { - upb_status_seterrf(ctx->status, - "for proto3, the first enum value must be zero (%s)", - e->full_name); - return false; + symtab_errf(ctx, "for proto3, the first enum value must be zero (%s)", + e->full_name); } if (upb_strtable_lookup(&e->ntoi, name2, NULL)) { - upb_status_seterrf(ctx->status, "duplicate enum label '%s'", name2); - return false; + symtab_errf(ctx, "duplicate enum label '%s'", name2); } CHK_OOM(name2) @@ -1675,11 +1760,9 @@ static bool create_enumdef( } upb_inttable_compact2(&e->iton, ctx->alloc); - - return true; } -static bool create_msgdef(symtab_addctx *ctx, const char *prefix, +static void create_msgdef(symtab_addctx *ctx, const char *prefix, const google_protobuf_DescriptorProto *msg_proto) { upb_msgdef *m; const google_protobuf_MessageOptions *options; @@ -1691,11 +1774,11 @@ static bool create_msgdef(symtab_addctx *ctx, const char *prefix, upb_strview name; name = google_protobuf_DescriptorProto_name(msg_proto); - CHK(upb_isident(name, false, ctx->status)); + check_ident(ctx, name, false); m = (upb_msgdef*)&ctx->file->msgs[ctx->file->msg_count++]; m->full_name = makefullname(ctx, prefix, name); - CHK_OOM(symtab_add(ctx, m->full_name, pack_def(m, UPB_DEFTYPE_MSG))); + symtab_add(ctx, m->full_name, pack_def(m, UPB_DEFTYPE_MSG)); oneofs = google_protobuf_DescriptorProto_oneof_decl(msg_proto, &n_oneof); fields = google_protobuf_DescriptorProto_field(msg_proto, &n_field); @@ -1718,23 +1801,23 @@ static bool create_msgdef(symtab_addctx *ctx, const char *prefix, ctx->layouts++; } else { /* Allocate now (to allow cross-linking), populate later. */ - m->layout = upb_malloc(ctx->alloc, sizeof(*m->layout)); + m->layout = symtab_alloc(ctx, sizeof(*m->layout)); } m->oneof_count = 0; - m->oneofs = upb_malloc(ctx->alloc, sizeof(*m->oneofs) * n_oneof); + m->oneofs = symtab_alloc(ctx, sizeof(*m->oneofs) * n_oneof); for (i = 0; i < n_oneof; i++) { - CHK(create_oneofdef(ctx, m, oneofs[i])); + create_oneofdef(ctx, m, oneofs[i]); } m->field_count = 0; - m->fields = upb_malloc(ctx->alloc, sizeof(*m->fields) * n_field); + m->fields = symtab_alloc(ctx, sizeof(*m->fields) * n_field); for (i = 0; i < n_field; i++) { - CHK(create_fielddef(ctx, m->full_name, m, fields[i])); + create_fielddef(ctx, m->full_name, m, fields[i]); } - CHK(assign_msg_indices(m, ctx->status)); - CHK(finalize_oneofs(ctx, m)); + assign_msg_indices(ctx, m); + finalize_oneofs(ctx, m); assign_msg_wellknowntype(m); upb_inttable_compact2(&m->itof, ctx->alloc); @@ -1742,93 +1825,78 @@ static bool create_msgdef(symtab_addctx *ctx, const char *prefix, enums = google_protobuf_DescriptorProto_enum_type(msg_proto, &n); for (i = 0; i < n; i++) { - CHK(create_enumdef(ctx, m->full_name, enums[i])); + create_enumdef(ctx, m->full_name, enums[i]); } msgs = google_protobuf_DescriptorProto_nested_type(msg_proto, &n); for (i = 0; i < n; i++) { - CHK(create_msgdef(ctx, m->full_name, msgs[i])); + create_msgdef(ctx, m->full_name, msgs[i]); } - - return true; } -typedef struct { - int msg_count; - int enum_count; - int ext_count; -} decl_counts; - static void count_types_in_msg(const google_protobuf_DescriptorProto *msg_proto, - decl_counts *counts) { + upb_filedef *file) { const google_protobuf_DescriptorProto *const *msgs; size_t i, n; - counts->msg_count++; + file->msg_count++; msgs = google_protobuf_DescriptorProto_nested_type(msg_proto, &n); for (i = 0; i < n; i++) { - count_types_in_msg(msgs[i], counts); + count_types_in_msg(msgs[i], file); } google_protobuf_DescriptorProto_enum_type(msg_proto, &n); - counts->enum_count += n; + file->enum_count += n; google_protobuf_DescriptorProto_extension(msg_proto, &n); - counts->ext_count += n; + file->ext_count += n; } static void count_types_in_file( const google_protobuf_FileDescriptorProto *file_proto, - decl_counts *counts) { + upb_filedef *file) { const google_protobuf_DescriptorProto *const *msgs; size_t i, n; msgs = google_protobuf_FileDescriptorProto_message_type(file_proto, &n); for (i = 0; i < n; i++) { - count_types_in_msg(msgs[i], counts); + count_types_in_msg(msgs[i], file); } google_protobuf_FileDescriptorProto_enum_type(file_proto, &n); - counts->enum_count += n; + file->enum_count += n; google_protobuf_FileDescriptorProto_extension(file_proto, &n); - counts->ext_count += n; + file->ext_count += n; } -static bool resolve_fielddef(const symtab_addctx *ctx, const char *prefix, +static void resolve_fielddef(symtab_addctx *ctx, const char *prefix, upb_fielddef *f) { upb_strview name; const google_protobuf_FieldDescriptorProto *field_proto = f->sub.unresolved; if (f->is_extension_) { if (!google_protobuf_FieldDescriptorProto_has_extendee(field_proto)) { - upb_status_seterrf(ctx->status, - "extension for field '%s' had no extendee", - f->full_name); - return false; + symtab_errf(ctx, "extension for field '%s' had no extendee", + f->full_name); } name = google_protobuf_FieldDescriptorProto_extendee(field_proto); f->msgdef = symtab_resolve(ctx, f, prefix, name, UPB_DEFTYPE_MSG); - CHK(f->msgdef); } if ((upb_fielddef_issubmsg(f) || f->type_ == UPB_DESCRIPTOR_TYPE_ENUM) && !google_protobuf_FieldDescriptorProto_has_type_name(field_proto)) { - upb_status_seterrf(ctx->status, "field '%s' is missing type name", - f->full_name); - return false; + symtab_errf(ctx, "field '%s' is missing type name", f->full_name); } name = google_protobuf_FieldDescriptorProto_type_name(field_proto); if (upb_fielddef_issubmsg(f)) { f->sub.msgdef = symtab_resolve(ctx, f, prefix, name, UPB_DEFTYPE_MSG); - CHK(f->sub.msgdef); } else if (f->type_ == UPB_DESCRIPTOR_TYPE_ENUM) { f->sub.enumdef = symtab_resolve(ctx, f, prefix, name, UPB_DEFTYPE_ENUM); - CHK(f->sub.enumdef); } /* Have to delay resolving of the default value until now because of the enum @@ -1838,54 +1906,36 @@ static bool resolve_fielddef(const symtab_addctx *ctx, const char *prefix, google_protobuf_FieldDescriptorProto_default_value(field_proto); if (f->file->syntax == UPB_SYNTAX_PROTO3) { - upb_status_seterrf(ctx->status, - "proto3 fields cannot have explicit defaults (%s)", - f->full_name); - return false; + symtab_errf(ctx, "proto3 fields cannot have explicit defaults (%s)", + f->full_name); } if (upb_fielddef_issubmsg(f)) { - upb_status_seterrf(ctx->status, - "message fields cannot have explicit defaults (%s)", - f->full_name); - return false; + symtab_errf(ctx, "message fields cannot have explicit defaults (%s)", + f->full_name); } - if (!parse_default(ctx, defaultval.data, defaultval.size, f)) { - upb_status_seterrf(ctx->status, - "couldn't parse default '" UPB_STRVIEW_FORMAT - "' for field (%s)", - UPB_STRVIEW_ARGS(defaultval), f->full_name); - return false; - } + parse_default(ctx, defaultval.data, defaultval.size, f); } else { set_default_default(ctx, f); } - - return true; } -static bool build_filedef( +static void build_filedef( symtab_addctx *ctx, upb_filedef *file, const google_protobuf_FileDescriptorProto *file_proto) { - upb_alloc *alloc = ctx->alloc; const google_protobuf_FileOptions *file_options_proto; const google_protobuf_DescriptorProto *const *msgs; const google_protobuf_EnumDescriptorProto *const *enums; const google_protobuf_FieldDescriptorProto *const *exts; const upb_strview* strs; size_t i, n; - decl_counts counts = {0, 0, 0}; - count_types_in_file(file_proto, &counts); + count_types_in_file(file_proto, file); - file->msgs = upb_malloc(alloc, sizeof(*file->msgs) * counts.msg_count); - file->enums = upb_malloc(alloc, sizeof(*file->enums) * counts.enum_count); - file->exts = upb_malloc(alloc, sizeof(*file->exts) * counts.ext_count); - - CHK_OOM(counts.msg_count == 0 || file->msgs); - CHK_OOM(counts.enum_count == 0 || file->enums); - CHK_OOM(counts.ext_count == 0 || file->exts); + file->msgs = symtab_alloc(ctx, sizeof(*file->msgs) * file->msg_count); + file->enums = symtab_alloc(ctx, sizeof(*file->enums) * file->enum_count); + file->exts = symtab_alloc(ctx, sizeof(*file->exts) * file->ext_count); /* We increment these as defs are added. */ file->msg_count = 0; @@ -1893,8 +1943,7 @@ static bool build_filedef( file->ext_count = 0; if (!google_protobuf_FileDescriptorProto_has_name(file_proto)) { - upb_status_seterrmsg(ctx->status, "File has no name"); - return false; + symtab_errf(ctx, "File has no name"); } file->name = @@ -1905,7 +1954,7 @@ static bool build_filedef( if (google_protobuf_FileDescriptorProto_has_package(file_proto)) { upb_strview package = google_protobuf_FileDescriptorProto_package(file_proto); - CHK(upb_isident(package, true, ctx->status)); + check_ident(ctx, package, true); file->package = strviewdup(ctx, package); } else { file->package = NULL; @@ -1920,9 +1969,8 @@ static bool build_filedef( } else if (streql_view(syntax, "proto3")) { file->syntax = UPB_SYNTAX_PROTO3; } else { - upb_status_seterrf(ctx->status, "Invalid syntax '" UPB_STRVIEW_FORMAT "'", - UPB_STRVIEW_ARGS(syntax)); - return false; + symtab_errf(ctx, "Invalid syntax '" UPB_STRVIEW_FORMAT "'", + UPB_STRVIEW_ARGS(syntax)); } } else { file->syntax = UPB_SYNTAX_PROTO2; @@ -1944,19 +1992,17 @@ static bool build_filedef( /* Verify dependencies. */ strs = google_protobuf_FileDescriptorProto_dependency(file_proto, &n); - file->deps = upb_malloc(alloc, sizeof(*file->deps) * n) ; - CHK_OOM(n == 0 || file->deps); + file->deps = symtab_alloc(ctx, sizeof(*file->deps) * n); for (i = 0; i < n; i++) { upb_strview dep_name = strs[i]; upb_value v; if (!upb_strtable_lookup2(&ctx->symtab->files, dep_name.data, dep_name.size, &v)) { - upb_status_seterrf(ctx->status, - "Depends on file '" UPB_STRVIEW_FORMAT - "', but it has not been loaded", - UPB_STRVIEW_ARGS(dep_name)); - return false; + symtab_errf(ctx, + "Depends on file '" UPB_STRVIEW_FORMAT + "', but it has not been loaded", + UPB_STRVIEW_ARGS(dep_name)); } file->deps[i] = upb_value_getconstptr(v); } @@ -1964,194 +2010,92 @@ static bool build_filedef( /* Create messages. */ msgs = google_protobuf_FileDescriptorProto_message_type(file_proto, &n); for (i = 0; i < n; i++) { - CHK(create_msgdef(ctx, file->package, msgs[i])); + create_msgdef(ctx, file->package, msgs[i]); } /* Create enums. */ enums = google_protobuf_FileDescriptorProto_enum_type(file_proto, &n); for (i = 0; i < n; i++) { - CHK(create_enumdef(ctx, file->package, enums[i])); + create_enumdef(ctx, file->package, enums[i]); } /* Create extensions. */ exts = google_protobuf_FileDescriptorProto_extension(file_proto, &n); - file->exts = upb_malloc(alloc, sizeof(*file->exts) * n); - CHK_OOM(n == 0 || file->exts); + file->exts = symtab_alloc(ctx, sizeof(*file->exts) * n); for (i = 0; i < n; i++) { - CHK(create_fielddef(ctx, file->package, NULL, exts[i])); + create_fielddef(ctx, file->package, NULL, exts[i]); } /* Now that all names are in the table, build layouts and resolve refs. */ for (i = 0; i < (size_t)file->ext_count; i++) { - CHK(resolve_fielddef(ctx, file->package, (upb_fielddef*)&file->exts[i])); + resolve_fielddef(ctx, file->package, (upb_fielddef*)&file->exts[i]); } for (i = 0; i < (size_t)file->msg_count; i++) { const upb_msgdef *m = &file->msgs[i]; int j; for (j = 0; j < m->field_count; j++) { - CHK(resolve_fielddef(ctx, m->full_name, (upb_fielddef*)&m->fields[j])); + resolve_fielddef(ctx, m->full_name, (upb_fielddef*)&m->fields[j]); } } if (!ctx->layouts) { for (i = 0; i < (size_t)file->msg_count; i++) { const upb_msgdef *m = &file->msgs[i]; - make_layout(ctx->symtab, m); + make_layout(ctx, m); } } +} - return true; - } - -static bool upb_symtab_addtotabs(upb_symtab *s, symtab_addctx *ctx) { - const upb_filedef *file = ctx->file; +static void remove_filedef(upb_symtab *s, upb_filedef *file) { upb_alloc *alloc = upb_arena_alloc(s->arena); - upb_strtable_iter iter; - - CHK_OOM(upb_strtable_insert3(&s->files, file->name, strlen(file->name), - upb_value_constptr(file), alloc)); - - upb_strtable_begin(&iter, ctx->addtab); - for (; !upb_strtable_done(&iter); upb_strtable_next(&iter)) { - upb_strview key = upb_strtable_iter_key(&iter); - upb_value value = upb_strtable_iter_value(&iter); - CHK_OOM(upb_strtable_insert3(&s->syms, key.data, key.size, value, alloc)); + int i; + for (i = 0; i < file->msg_count; i++) { + const char *name = file->msgs[i].full_name; + upb_strtable_remove3(&s->syms, name, strlen(name), NULL, alloc); } - - return true; -} - -/* upb_filedef ****************************************************************/ - -const char *upb_filedef_name(const upb_filedef *f) { - return f->name; -} - -const char *upb_filedef_package(const upb_filedef *f) { - return f->package; -} - -const char *upb_filedef_phpprefix(const upb_filedef *f) { - return f->phpprefix; -} - -const char *upb_filedef_phpnamespace(const upb_filedef *f) { - return f->phpnamespace; -} - -upb_syntax_t upb_filedef_syntax(const upb_filedef *f) { - return f->syntax; -} - -int upb_filedef_msgcount(const upb_filedef *f) { - return f->msg_count; -} - -int upb_filedef_depcount(const upb_filedef *f) { - return f->dep_count; -} - -int upb_filedef_enumcount(const upb_filedef *f) { - return f->enum_count; -} - -const upb_filedef *upb_filedef_dep(const upb_filedef *f, int i) { - return i < 0 || i >= f->dep_count ? NULL : f->deps[i]; -} - -const upb_msgdef *upb_filedef_msg(const upb_filedef *f, int i) { - return i < 0 || i >= f->msg_count ? NULL : &f->msgs[i]; -} - -const upb_enumdef *upb_filedef_enum(const upb_filedef *f, int i) { - return i < 0 || i >= f->enum_count ? NULL : &f->enums[i]; -} - -void upb_symtab_free(upb_symtab *s) { - upb_arena_free(s->arena); - upb_gfree(s); -} - -upb_symtab *upb_symtab_new(void) { - upb_symtab *s = upb_gmalloc(sizeof(*s)); - upb_alloc *alloc; - - if (!s) { - return NULL; + for (i = 0; i < file->enum_count; i++) { + const char *name = file->enums[i].full_name; + upb_strtable_remove3(&s->syms, name, strlen(name), NULL, alloc); } - - s->arena = upb_arena_new(); - s->bytes_loaded = 0; - alloc = upb_arena_alloc(s->arena); - - if (!upb_strtable_init2(&s->syms, UPB_CTYPE_CONSTPTR, 32, alloc) || - !upb_strtable_init2(&s->files, UPB_CTYPE_CONSTPTR, 4, alloc)) { - upb_arena_free(s->arena); - upb_gfree(s); - s = NULL; + for (i = 0; i < file->ext_count; i++) { + const char *name = file->exts[i].full_name; + upb_strtable_remove3(&s->syms, name, strlen(name), NULL, alloc); } - return s; -} - -const upb_msgdef *upb_symtab_lookupmsg(const upb_symtab *s, const char *sym) { - upb_value v; - return upb_strtable_lookup(&s->syms, sym, &v) ? - unpack_def(v, UPB_DEFTYPE_MSG) : NULL; -} - -const upb_msgdef *upb_symtab_lookupmsg2(const upb_symtab *s, const char *sym, - size_t len) { - upb_value v; - return upb_strtable_lookup2(&s->syms, sym, len, &v) ? - unpack_def(v, UPB_DEFTYPE_MSG) : NULL; -} - -const upb_enumdef *upb_symtab_lookupenum(const upb_symtab *s, const char *sym) { - upb_value v; - return upb_strtable_lookup(&s->syms, sym, &v) ? - unpack_def(v, UPB_DEFTYPE_ENUM) : NULL; -} - -const upb_filedef *upb_symtab_lookupfile(const upb_symtab *s, const char *name) { - upb_value v; - return upb_strtable_lookup(&s->files, name, &v) ? upb_value_getconstptr(v) - : NULL; -} - -const upb_filedef *upb_symtab_lookupfile2( - const upb_symtab *s, const char *name, size_t len) { - upb_value v; - return upb_strtable_lookup2(&s->files, name, len, &v) ? - upb_value_getconstptr(v) : NULL; -} - -int upb_symtab_filecount(const upb_symtab *s) { - return (int)upb_strtable_count(&s->files); } static const upb_filedef *_upb_symtab_addfile( upb_symtab *s, const google_protobuf_FileDescriptorProto *file_proto, const upb_msglayout **layouts, upb_status *status) { - upb_arena *tmparena = upb_arena_new(); - upb_strtable addtab; - upb_alloc *alloc = upb_arena_alloc(s->arena); - upb_filedef *file = upb_malloc(alloc, sizeof(*file)); - bool ok; + upb_arena *file_arena = upb_arena_new(); + upb_filedef *file = upb_arena_malloc(file_arena, sizeof(*file)); + bool ok = true; symtab_addctx ctx; ctx.file = file; ctx.symtab = s; - ctx.alloc = alloc; - ctx.tmp = upb_arena_alloc(tmparena); - ctx.addtab = &addtab; + ctx.file_arena = file_arena; + ctx.alloc = upb_arena_alloc(file_arena); ctx.layouts = layouts; ctx.status = status; - ok = file && upb_strtable_init2(&addtab, UPB_CTYPE_CONSTPTR, 8, ctx.tmp) && - build_filedef(&ctx, file, file_proto) && upb_symtab_addtotabs(s, &ctx); + file->msg_count = 0; + file->enum_count = 0; + file->ext_count = 0; + + if (UPB_UNLIKELY(setjmp(ctx.err))) { + UPB_ASSERT(!upb_ok(status)); + ok = false; + remove_filedef(s, file); + } else { + build_filedef(&ctx, file, file_proto); + upb_strtable_insert3(&s->files, file->name, strlen(file->name), + upb_value_constptr(file), ctx.alloc); + UPB_ASSERT(upb_ok(status)); + upb_arena_fuse(s->arena, file_arena); + } - upb_arena_free(tmparena); + upb_arena_free(file_arena); return ok ? file : NULL; } @@ -2213,5 +2157,4 @@ size_t _upb_symtab_bytesloaded(const upb_symtab *s) { return s->bytes_loaded; } -#undef CHK #undef CHK_OOM